mafft-7.123-without-extensions/0000750000076500007650000000000012227117457015534 5ustar katohkatohmafft-7.123-without-extensions/binaries/0000750000076500007650000000000012227117127017322 5ustar katohkatohmafft-7.123-without-extensions/core/0000750000076500007650000000000012227117322016453 5ustar katohkatohmafft-7.123-without-extensions/core/nj.c0000640000076500007650000001155110510056752017234 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 void topolcpy( int s1[], int s2[], int *mpt1, int *mpt2 ) { int i; *mpt1 = *mpt2; for( i=0; i<*mpt2; i++ ) { s1[i] = s2[i]; } } void topolcat( int s1[], int s2[], int *mpt1, int *mpt2 ) { int i; for( i=*mpt1; i<*mpt1+*mpt2; i++ ) { s1[i] = s2[i-*mpt1]; } *mpt1 += *mpt2; } void topolsort( int m, int s[] ) { int i, j, im; int sm; for( j=0; j2; n--, m=nseq-n ) { t = 0.0; for( i=0; i 0 ) { topol[m][0][count] = l; count++; } mem[m][0] = count; for( l=0, count=0; l 0 ) { topol[m][1][count] = l; count++; } mem[m][1] = count; for( l=0; l 0 ); if( n > 3 ) reduc( mtx, nseq, im, jm ); } for( i=0; i 0 ) { topol[m][0][count] = l; count++; } mem[m][0] = count; /* printf( " total length == %f\n", totallen ); */ topolcpy( topol[nseq-2][1], topol[nseq-3][0], mem[nseq-2]+1, mem[nseq-3] ); topolcat( topol[nseq-2][1], topol[nseq-3][1], mem[nseq-2]+1, mem[nseq-3]+1 ); topolsort( mem[nseq-2][1], topol[nseq-2][1] ); if( topol[nseq-2][0][0] > topol[nseq-2][1][0] ) topolswap( topol[nseq-2][0], topol[nseq-2][1], mem[nseq-2], mem[nseq-2]+1 ); } mafft-7.123-without-extensions/core/dvtditr.c0000640000076500007650000005414012216774264020320 0ustar katohkatoh /* Tree-dependent-iteration */ /* Devide to segments */ #include "mltaln.h" extern char **seq_g; extern char **res_g; static int subalignment; static int subalignmentoffset; static int intop; static int intree; void arguments( int argc, char *argv[] ) { int c; char *argkey; outnumber = 0; nthread = 1; randomseed = 0; scoreout = 0; parallelizationstrategy = BAATARI1; intop = 0; intree = 0; inputfile = NULL; rnakozo = 0; rnaprediction = 'm'; nevermemsave = 0; score_check = 1; fftkeika = 1; constraint = 0; fmodel = 0; kobetsubunkatsu = 1; bunkatsu = 1; nblosum = 62; niter = 100; calledByXced = 0; devide = 1; divWinSize = 20; /* 70 */ divThreshold = 65; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; scmtd = 5; cooling = 1; weight = 4; utree = 1; refine = 1; check = 1; cut = 0.0; disp = 0; outgap = 1; use_fft = 0; // CHUUI dochira demo mafft.tmpl deha F force_fft = 0; alg = 'A'; /* chuui */ mix = 0; checkC = 0; tbitr = 0; treemethod = 'X'; scoremtx = 1; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNAppenalty_ex = NOTSPECIFIED; RNApthr = NOTSPECIFIED; TMorJTT = JTT; consweight_multi = 1.0; consweight_rna = 0.0; subalignment = 0; subalignmentoffset = 0; legacygapcost = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': niter = myatoi( *++argv ); fprintf( stderr, "niter = %d\n", niter ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; case 't': randomseed = myatoi( *++argv ); fprintf( stderr, "randomseed = %d\n", randomseed ); --argc; goto nextoption; case 'p': argkey = *++argv; if( !strcmp( argkey, "BESTFIRST" ) ) parallelizationstrategy = BESTFIRST; else if( !strcmp( argkey, "BAATARI0" ) ) parallelizationstrategy = BAATARI0; else if( !strcmp( argkey, "BAATARI1" ) ) parallelizationstrategy = BAATARI1; else if( !strcmp( argkey, "BAATARI2" ) ) parallelizationstrategy = BAATARI2; else { fprintf( stderr, "Unknown parallelization strategy, %s\n", argkey ); exit( 1 ); } // exit( 1 ); --argc; goto nextoption; case 'S' : scoreout = 1; break; case 's' : RNAscoremtx = 'r'; break; #if 1 case 'a': fmodel = 1; break; #endif case 'N': nevermemsave = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'Q': alg = 'Q'; break; case 'R': rnaprediction = 'r'; break; case 'O': fftNoAnchStop = 1; break; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; #endif case 'T': kobetsubunkatsu = 0; break; case 'B': bunkatsu = 0; break; #if 0 case 'c': cooling = 1; break; case 'a': alg = 'a'; break; case 's' : treemethod = 's'; break; case 'H': alg = 'H'; break; #endif case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'F': use_fft = 1; break; #if 0 case 't': weight = 4; break; #endif case 'u': weight = 0; break; case 'U': intree = 1; break; case 'V': intop = 1; break; case 'J': utree = 0; break; case 'd': disp = 1; break; case 'Z': score_check = 0; break; case 'Y': score_check = 2; break; case 'L': legacygapcost = 1; break; #if 0 case 'n' : treemethod = 'n'; break; #endif case 'n' : outnumber = 1; break; case 'X' : treemethod = 'X'; break; case 'E' : treemethod = 'E'; break; case 'q' : treemethod = 'q'; break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options : Check source file!\n" ); exit( 1 ); } #if 0 if( alg == 'A' && weight == 0 ) ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" ); #endif } int main( int argc, char *argv[] ) { int identity; static int nlen[M]; static char **name, **seq, **aseq, **bseq; static Segment *segment = NULL; static int anchors[MAXSEG]; int i, j; int iseg, nseg; int ***topol; double **len; double **eff; FILE *prep; FILE *infp; FILE *orderfp; int alloclen; int returnvalue; char c; int ocut; char **seq_g_bk; LocalHom **localhomtable = NULL; // by D.Mathog RNApair ***singlerna; int nogaplen; static char **nogap1seq; static char *kozoarivec; int nkozo; int alignmentlength; int **skipthisbranch; int foundthebranch; int nsubalignments, maxmem; int **subtable; int *insubtable; int *preservegaps; char ***subalnpt; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); nkozo = 0; if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } if( subalignment ) { readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem ); fprintf( stderr, "nsubalignments = %d\n", nsubalignments ); fprintf( stderr, "maxmem = %d\n", maxmem ); subtable = AllocateIntMtx( nsubalignments, maxmem+1 ); insubtable = AllocateIntVec( njob ); preservegaps = AllocateIntVec( njob ); for( i=0; i 30000 ) if( nlenmax > 50000 ) // version >= 6.823 { #if 0 if( constraint ) { fprintf( stderr, "\nnlenmax=%d, nagasugi!\n", nlenmax ); exit( 1 ); } if( nevermemsave ) { fprintf( stderr, "\nnevermemsave=1, nlenmax=%d, nagasugi!\n", nlenmax ); exit( 1 ); } #endif if( !constraint && !nevermemsave && alg != 'M' ) { fprintf( stderr, "\nnlenmax=%d, Switching to the memsave mode\n", nlenmax ); alg = 'M'; } } #if 0 Read( name, nlen, seq_g ); #else readData_pointer( infp, name, nlen, seq_g ); #endif fclose( infp ); for( i=0; i= njob ) { fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 ); fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" ); fprintf( stderr, "#\n" ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); fprintf( stderr, "#\n" ); fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "\n" ); exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 ); fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "############################################################################### \n" ); fprintf( stderr, "\n" ); exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %d\n\n", skipthisbranch[i][0] ); fprintf( stderr, "group2 = " ); for( j=0; topol[i][1][j] != -1; j++ ) fprintf( stderr, "%d ", topol[i][1][j]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][1] ); } #endif for( i=0; i nlenmax ) nlenmax = nlen[i]; i++; } } if( nlenmax > N || njob > M ) { fprintf( stderr, "ERROR in main\n" ); exit( 1 ); } /* nlenmax = Na; */ rewind( stdin ); value = main1( nlen, argc, argv ); exit( 0 ); } #endif mafft-7.123-without-extensions/core/f2cl.c0000640000076500007650000001342212225401572017451 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 static char *comment; static char *orderfile; static int format; static int namelen; static int extendedalphabet; static void fillspace( char *seq, int lenmax ) { int len = strlen( seq ); seq += len; lenmax -= len; while( lenmax-- ) *seq++ = ' '; *seq = 0; } void setmark_clustal( int nlen, int nseq, char **seq, char *mark ) { int i, j, k, nalpha; char firstletter; char *strong[9]; char *weaker[11]; int nstrong, nweaker; char s; if( dorp == 'd' ) { strong[0] = "TU"; nstrong = 1; weaker[0] = "AG"; weaker[1] = "CT"; nweaker = 2; nalpha = 10; } else { strong[0] = "STA"; strong[1] = "NEQK"; strong[2] = "NHQK"; strong[3] = "NDEQ"; strong[4] = "QHRK"; strong[5] = "MILV"; strong[6] = "MILF"; strong[7] = "HY"; strong[8] = "FYW"; nstrong = 9; weaker[0] = "CSA"; weaker[1] = "ATV"; weaker[2] = "SAG"; weaker[3] = "STNK"; weaker[4] = "STPA"; weaker[5] = "SGND"; weaker[6] = "SNDEQK"; weaker[7] = "NDEQHK"; weaker[8] = "NEQHRK"; weaker[9] = "FVLIM"; weaker[10] = "HFY"; nweaker = 11; nalpha = 20; } for( i=0; i= nalpha || amino_n[(int)firstletter] < 0 ) continue; for( j=0; j 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'c': comment = *++argv; fprintf( stderr, "comment = %s\n", comment ); --argc; goto nextoption; case 'r': orderfile = *++argv; fprintf( stderr, "orderfile = %s\n", orderfile ); --argc; goto nextoption; case 'n': namelen = myatoi( *++argv ); fprintf( stderr, "namelen = %d\n", namelen ); --argc; goto nextoption; case 'f': format = 'f'; break; case 'y': format = 'y'; break; case 'E': extendedalphabet = 1; nblosum = -2; break; case 'N': extendedalphabet = 0; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static int *nlen; static char **name, **seq, *mark; static int *order; int i; FILE *infp; FILE *orderfp; char gett[B]; int nlenmin; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen_casepreserve( infp, &nlenmin ); rewind( infp ); seq = AllocateCharMtx( njob, nlenmax*2+1 ); mark = AllocateCharVec( nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); if( orderfile ) { orderfp = fopen( orderfile, "r" ); if( !orderfp ) { fprintf( stderr, "Cannot open %s\n", orderfile ); exit( 1 ); } for( i=0; iR = result->I = 0.0; result++; } } #if 0 static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed ) { int i; for( i=st; i= 0 ) result->R += incr * score[n]; #if 0 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R ); #endif } } static void seq_vec_3( Fukusosuu **result, double incr, char *seq ) { int i; int n; for( i=0; *seq; i++ ) { n = amino_n[(int)*seq++]; if( n < n20or4or2 && n >= 0 ) result[n][i].R += incr; } } #if 0 static void seq_vec( Fukusosuu *result, char query, double incr, char *seq ) { #if 0 int bk = nlen; #endif while( *seq ) { if( *seq++ == query ) result->R += incr; result++; #if 0 fprintf( stderr, "i = %d result->R = %f\n", bk-nlen, (result-1)->R ); #endif } } static int checkRepeat( int num, int *cutpos ) { int tmp, buf; buf = *cutpos; while( num-- ) { if( ( tmp = *cutpos++ ) < buf ) return( 1 ); buf = tmp; } return( 0 ); } static int segcmp( void *ptr1, void *ptr2 ) { int diff; Segment **seg1 = (Segment **)ptr1; Segment **seg2 = (Segment **)ptr2; #if 0 return( (*seg1)->center - (*seg2)->center ); #else diff = (*seg1)->center - (*seg2)->center; if( diff ) return( diff ); diff = (*seg1)->start - (*seg2)->start; if( diff ) return( diff ); diff = (*seg1)->end - (*seg2)->end; if( diff ) return( diff ); fprintf( stderr, "USE STABLE SORT !!\n" ); exit( 1 ); return( 0 ); #endif } #endif static void mymergesort( int first, int last, Segment **seg ) { int middle; static TLS int i, j, k, p; static TLS int allo = 0; static TLS Segment **work = NULL; if( seg == NULL ) { free( work ); work = NULL; return; } if( last > allo ) { allo = last; if( work ) free( work ); work = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) ); } if( first < last ) { middle = ( first + last ) / 2; mymergesort( first, middle, seg ); mymergesort( middle+1, last, seg ); p = 0; for( i=first; i<=middle; i++ ) work[p++] = seg[i]; i = middle + 1; j = 0; k = first; while( i <= last && j < p ) { if( work[j]->center <= seg[i]->center ) seg[k++] = work[j++]; else seg[k++] = seg[i++]; } while( j < p ) seg[k++] = work[j++]; } } float Falign_localhom( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, LocalHom ***localhom, float *totalimpmatch, int *gapmap1, int *gapmap2, int *chudanpt, int chudanref, int *chudanres ) { // tditeration.c deha alloclen ha huhen nanode // prevalloclen ha iranai. int i, j, k, l, m, maxk; int nlen, nlen2, nlen4; static TLS int crossscoresize = 0; static TLS char **tmpseq1 = NULL; static TLS char **tmpseq2 = NULL; static TLS char **tmpptr1 = NULL; static TLS char **tmpptr2 = NULL; static TLS char **tmpres1 = NULL; static TLS char **tmpres2 = NULL; static TLS char **result1 = NULL; static TLS char **result2 = NULL; #if RND static TLS char **rndseq1 = NULL; static TLS char **rndseq2 = NULL; #endif static TLS Fukusosuu **seqVector1 = NULL; static TLS Fukusosuu **seqVector2 = NULL; static TLS Fukusosuu **naiseki = NULL; static TLS Fukusosuu *naisekiNoWa = NULL; static TLS double *soukan = NULL; static TLS double **crossscore = NULL; int nlentmp; static TLS int *kouho = NULL; static TLS Segment *segment = NULL; static TLS Segment *segment1 = NULL; static TLS Segment *segment2 = NULL; static TLS Segment **sortedseg1 = NULL; static TLS Segment **sortedseg2 = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; static TLS char *sgap1, *egap1, *sgap2, *egap2; static TLS int localalloclen = 0; int lag; int tmpint; int count, count0; int len1, len2; int totallen; float totalscore; float impmatch; extern Fukusosuu *AllocateFukusosuuVec(); extern Fukusosuu **AllocateFukusosuuMtx(); if( seq1 == NULL ) { if( result1 ) { // fprintf( stderr, "Freeing localarrays in Falign\n" ); localalloclen = 0; mymergesort( 0, 0, NULL ); alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL ); fft( 0, NULL, 1 ); A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, 0, 0, 0 ); partA__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL ); blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL ); if( crossscore ) FreeDoubleMtx( crossscore ); FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); free( sgap1 ); free( egap1 ); free( sgap2 ); free( egap2 ); free( kouho ); free( cut1 ); free( cut2 ); free( tmpptr1 ); free( tmpptr2 ); free( segment ); free( segment1 ); free( segment2 ); free( sortedseg1 ); free( sortedseg2 ); if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } } else { // fprintf( stderr, "Did not allocate localarrays in Falign\n" ); } return( 0.0 ); } len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( !localalloclen ) { sgap1 = AllocateCharVec( njob ); egap1 = AllocateCharVec( njob ); sgap2 = AllocateCharVec( njob ); egap2 = AllocateCharVec( njob ); kouho = AllocateIntVec( NKOUHO ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 4; else if( fftscore == 1 ) n20or4or2 = 2; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); if( !kobetsubunkatsu ) { naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); } #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i /dev/tty" ); #endif if( !kobetsubunkatsu ) { fprintf( stderr, "FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; m /dev/tty" ); #if 0 fftfp = fopen( "list.plot", "w" ); fprintf( fftfp, "plot 'frt'\n pause +1" ); fclose( fftfp ); system( "/usr/bin/gnuplot list.plot" ); #endif #endif getKouho( kouho, NKOUHO, soukan, nlen ); #if 0 for( i=0; i MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); while( tmpint-- > 0 ) { if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fftfp = fopen( "cand", "a" ); fprintf( fftfp, "Goukaku=%dko\n", tmpint ); fprintf( fftfp, "in 1 %d\n", segment1[count].center ); fprintf( fftfp, "in 2 %d\n", segment2[count].center ); fclose( fftfp ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 if( !kobetsubunkatsu ) fprintf( stderr, "%d segments found\n", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if DEBUG fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j output" exit end if scale <= 0.0 then STDERR.puts "Inappropriate scale, #{scale.to_s}" exit end STDERR.puts "scale = " + scale.to_s infp = File.open( ARGV.shift, "r" ) tree = "" while line = infp.gets tree += line.strip break if tree =~ /;$/ end infp.close #tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "").gsub(/:\-[0-9\.]+/, ":0.0" ) tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "") STDERR.puts "Initial tree = " + tree def resolve( tree ) while 1 # p tree tree.sub!( /\,([0-9]+):(\-?[0-9\.]+)\,([0-9]+):(\-?[0-9\.]+)/, ",XXX" ) hit1 = $1 hit2 = $2 hit3 = $3 hit4 = $4 # p hit1 # p hit2 # p hit3 # p hit4 # puts "introduce XXX" # p tree break unless tree.index(/XXX/) poshit = tree.index(/XXX/) # puts "poshit=" + poshit.to_s i = poshit height = 0 while i >= 0 break if height == 0 && tree[i..i] == '(' if tree[i..i] == ')' then height += 1 elsif tree[i..i] == '(' then height -= 1 end i -= 1 end poskakko = i # puts "poskakko = " + poskakko.to_s zenhan = tree[0..poskakko] zenhan = "" if poskakko == -1 # puts "zenhan = " + zenhan treelen = tree.length tree = zenhan + "(" + tree[poskakko+1..treelen] # puts "add (" # p tree tree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" ) # p tree end return tree end memi = [-1,-1] leni = [-1,-1] while tree.index( /\(/ ) tree = resolve( tree ) tree.sub!( /\(([0-9]+):(\-?[0-9\.]+),([0-9]+):(\-?[0-9\.]+)\)/, "XXX" ) memi[0] = $1.to_i leni[0] = $2.to_f * scale memi[1] = $3.to_i leni[1] = $4.to_f * scale if leni[0] > 10 || leni[1] > 10 then STDERR.puts "" STDERR.puts "Please check the scale of branch length!" STDERR.puts "The unit of branch lengths must be 'substitution/site'" STDERR.puts "If the unit is 'substition' in your tree, please" STDERR.puts "use the scale argument," STDERR.puts "% newick2mafft scale in > out" STDERR.puts "where scale = 1/(alignment length)" STDERR.puts "" exit 1 end # STDERR.puts "subtree = " + $& if memi[1] < memi[0] then memi.reverse! leni.reverse! end tree.sub!( /XXX/, memi[0].to_s ) # STDERR.puts "Tree = " + tree printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] ) end mafft-7.123-without-extensions/core/pair2hat3s.c0000640000076500007650000002435612176060445020620 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 1 #define TSUYOSAFACTOR 100 static char *pairfile; static int nhomologs; void strip( char *s ) { char *pt = s; while( *++pt ) if( *pt == '\n' ) *pt = 0; } int searchused( char *q, char **keys, int n ) { int i; for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'p': pairfile = *++argv; fprintf( stderr, "pairfile = %s\n", pairfile ); --argc; goto nextoption; case 't': nhomologs = myatoi( *++argv ); fprintf( stderr, "nhomologs = %d\n", nhomologs ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen ) { FILE *tmpfp; static char dumm1[B], dumm0[B]; int i, j; char *res; FILE *hat3p; static double *effarr1 = NULL; static double *effarr2 = NULL; static char **pseq; LocalHom **localhomtable, *tmpptr; float pscore = 0.0; // by D.Mathog, aguess char *aseq = NULL; // by D.Mathog char **usedseqs = NULL; // by D.Mathog char **usednames = NULL; // by D.Mathog int nused; double tsuyosa; tsuyosa = (double)nhomologs * (nhomologs-1) / njob * TSUYOSAFACTOR; fprintf( stderr, "tsuyosa = %f\n", tsuyosa ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } fclose( tmpfp ); fclose( hat3p ); for( i=0; i%s\n%s\n", usednames[i], usedseqs[i] ); #if 0 fprintf( stderr, "##### writing hat3\n" ); hat3p = fopen( "hat3", "w" ); if( !hat3p ) ErrorExit( "Cannot open hat3." ); ilim = njob-1; for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } } } fclose( hat3p ); #endif #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **bseq; static double *eff; int i; char c; int alloclen; FILE *infp; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( !pairfile ) { fprintf( stderr, "Usage: %s -p pairfile -i inputfile \n", argv[0] ); exit( 1 ); } getnumlen( infp ); rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax*9+1 ); bseq = AllocateCharMtx( njob, nlenmax*9+1 ); alloclen = nlenmax*9; eff = AllocateDoubleVec( njob ); #if 0 Read( name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); free( largeM ); free( Mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } return( 0.0 ); } // fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty ); if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( largeM ); FreeIntVec( Mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); largeM = AllocateFloatVec( ll2+2 ); Mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } mseq1[0] = mseq[0]; mseq2[0] = mseq[1]; if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeIntMtx( commonJP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); commonJP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijpi = commonIP; ijpj = commonJP; #if 0 for( i=0; i", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijpipt = i - 1; *ijpjpt = mpi; } g = *prept; if( g > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "m%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijpipt = *mpjpt; *ijpjpt = j - 1; //IRU! } g = *prept; if( g > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif g = tbk + fpenalty_OP; // g = tbk; if( g > wm ) { wm = g; *ijpipt = tbki; *ijpjpt = tbkj; // fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); } // g = Mi; if( Mi > tbk ) { tbk = Mi; //error desu. tbki = i-1; tbkj = Mpi; } // g = *Mjpt; if( *Mjpt > tbk ) { tbk = *Mjpt; tbki = *Mpjpt; tbkj = j-1; } // tbk += fpenalty_EX;// + foffset; // g = *prept; if( *prept > *Mjpt ) { *Mjpt = *prept; *Mpjpt = i-1; } // *Mjpt += fpenalty_EX;// + foffset; // g = *prept; if( *prept > Mi ) { Mi = *prept; Mpi = j-1; } // Mi += fpenalty_EX;// + foffset; // fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt ); // fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] ); if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); *ijpipt = localstop; // *ijpjpt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt += wm; ijpipt++; ijpjpt++; mjpt++; Mjpt++; prept++; mpjpt++; Mpjpt++; curpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } #if DEBUG2 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif if( ijpi[endali][endalj] == localstop ) // && ijpj[endali][endalj] == localstop ) { strcpy( seq1[0], "" ); strcpy( seq2[0], "" ); *off1pt = *off2pt = 0; return( 0.0 ); } gentracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, endali, endalj ); // fprintf( stderr, "### impmatch = %f\n", *impmatch ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif return( maxwm ); } mafft-7.123-without-extensions/core/JTT.c0000640000076500007650000002045311175505267017277 0ustar katohkatoh#if 0 #include "mltaln.h" #endif #define DEFAULTGOP_J -1530 #define DEFAULTGEP_J -00 #define DEFAULTOFS_J -123 /* +10 -- -50 teido ka ? */ #define DEFAULTPAMN 200 void JTTmtx( double **rsr, double *freq, char locamino[26], char locgrp[26], int isTM ) { int i, j; double r[20][20]; // char locamino0[] = "ARNDCQEGHILKMFPSTWYVBZX.-U"; char locamino0[] = "ARNDCQEGHILKMFPSTWYVBZX.-J"; char locgrp0[] = { 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, 6, 6, 6, 1, }; double freq0[20] = { 0.077, 0.051, 0.043, 0.052, 0.020, 0.041, 0.062, 0.074, 0.023, 0.052, 0.091, 0.059, 0.024, 0.040, 0.051, 0.069, 0.059, 0.014, 0.032, 0.066, }; double freq0_TM[20] = { 0.1051, 0.0157, 0.0185, 0.0089, 0.0219, 0.0141, 0.0097, 0.0758, 0.0168, 0.1188, 0.1635, 0.0112, 0.0333, 0.0777, 0.0260, 0.0568, 0.0523, 0.0223, 0.0324, 0.1195, }; /* Lower triangular is JTT's Accepted point mutations */ r[ 1][ 0]= 247; r[ 2][ 0]= 216; r[ 2][ 1]= 116; r[ 3][ 0]= 386; r[ 3][ 1]= 48; r[ 3][ 2]= 1433; r[ 4][ 0]= 106; r[ 4][ 1]= 125; r[ 4][ 2]= 32; r[ 4][ 3]= 13; r[ 5][ 0]= 208; r[ 5][ 1]= 750; r[ 5][ 2]= 159; r[ 5][ 3]= 130; r[ 5][ 4]= 9; r[ 6][ 0]= 600; r[ 6][ 1]= 119; r[ 6][ 2]= 180; r[ 6][ 3]= 2914; r[ 6][ 4]= 8; r[ 6][ 5]= 1027; r[ 7][ 0]= 1183; r[ 7][ 1]= 614; r[ 7][ 2]= 291; r[ 7][ 3]= 577; r[ 7][ 4]= 98; r[ 7][ 5]= 84; r[ 7][ 6]= 610; r[ 8][ 0]= 46; r[ 8][ 1]= 446; r[ 8][ 2]= 466; r[ 8][ 3]= 144; r[ 8][ 4]= 40; r[ 8][ 5]= 635; r[ 8][ 6]= 41; r[ 8][ 7]= 41; r[ 9][ 0]= 173; r[ 9][ 1]= 76; r[ 9][ 2]= 130; r[ 9][ 3]= 37; r[ 9][ 4]= 19; r[ 9][ 5]= 20; r[ 9][ 6]= 43; r[ 9][ 7]= 25; r[ 9][ 8]= 26; r[10][ 0]= 257; r[10][ 1]= 205; r[10][ 2]= 63; r[10][ 3]= 34; r[10][ 4]= 36; r[10][ 5]= 314; r[10][ 6]= 65; r[10][ 7]= 56; r[10][ 8]= 134; r[10][ 9]= 1324; r[11][ 0]= 200; r[11][ 1]= 2348; r[11][ 2]= 758; r[11][ 3]= 102; r[11][ 4]= 7; r[11][ 5]= 858; r[11][ 6]= 754; r[11][ 7]= 142; r[11][ 8]= 85; r[11][ 9]= 75; r[11][10]= 94; r[12][ 0]= 100; r[12][ 1]= 61; r[12][ 2]= 39; r[12][ 3]= 27; r[12][ 4]= 23; r[12][ 5]= 52; r[12][ 6]= 30; r[12][ 7]= 27; r[12][ 8]= 21; r[12][ 9]= 704; r[12][10]= 974; r[12][11]= 103; r[13][ 0]= 51; r[13][ 1]= 16; r[13][ 2]= 15; r[13][ 3]= 8; r[13][ 4]= 66; r[13][ 5]= 9; r[13][ 6]= 13; r[13][ 7]= 18; r[13][ 8]= 50; r[13][ 9]= 196; r[13][10]= 1093; r[13][11]= 7; r[13][12]= 49; r[14][ 0]= 901; r[14][ 1]= 217; r[14][ 2]= 31; r[14][ 3]= 39; r[14][ 4]= 15; r[14][ 5]= 395; r[14][ 6]= 71; r[14][ 7]= 93; r[14][ 8]= 157; r[14][ 9]= 31; r[14][10]= 578; r[14][11]= 77; r[14][12]= 23; r[14][13]= 36; r[15][ 0]= 2413; r[15][ 1]= 413; r[15][ 2]= 1738; r[15][ 3]= 244; r[15][ 4]= 353; r[15][ 5]= 182; r[15][ 6]= 156; r[15][ 7]= 1131; r[15][ 8]= 138; r[15][ 9]= 172; r[15][10]= 436; r[15][11]= 228; r[15][12]= 54; r[15][13]= 309; r[15][14]= 1138; r[16][ 0]= 2440; r[16][ 1]= 230; r[16][ 2]= 693; r[16][ 3]= 151; r[16][ 4]= 66; r[16][ 5]= 149; r[16][ 6]= 142; r[16][ 7]= 164; r[16][ 8]= 76; r[16][ 9]= 930; r[16][10]= 172; r[16][11]= 398; r[16][12]= 343; r[16][13]= 39; r[16][14]= 412; r[16][15]= 2258; r[17][ 0]= 11; r[17][ 1]= 109; r[17][ 2]= 2; r[17][ 3]= 5; r[17][ 4]= 38; r[17][ 5]= 12; r[17][ 6]= 12; r[17][ 7]= 69; r[17][ 8]= 5; r[17][ 9]= 12; r[17][10]= 82; r[17][11]= 9; r[17][12]= 8; r[17][13]= 37; r[17][14]= 6; r[17][15]= 36; r[17][16]= 8; r[18][ 0]= 41; r[18][ 1]= 46; r[18][ 2]= 114; r[18][ 3]= 89; r[18][ 4]= 164; r[18][ 5]= 40; r[18][ 6]= 15; r[18][ 7]= 15; r[18][ 8]= 514; r[18][ 9]= 61; r[18][10]= 84; r[18][11]= 20; r[18][12]= 17; r[18][13]= 850; r[18][14]= 22; r[18][15]= 164; r[18][16]= 45; r[18][17]= 41; r[19][ 0]= 1766; r[19][ 1]= 69; r[19][ 2]= 55; r[19][ 3]= 127; r[19][ 4]= 99; r[19][ 5]= 58; r[19][ 6]= 226; r[19][ 7]= 276; r[19][ 8]= 22; r[19][ 9]= 3938; r[19][10]= 1261; r[19][11]= 58; r[19][12]= 559; r[19][13]= 189; r[19][14]= 84; r[19][15]= 219; r[19][16]= 526; r[19][17]= 27; r[19][18]= 42; /* Upper triangular is JTT's Accepted point mutations for transmembrane */ r[ 0][ 1]= 21; r[ 0][ 2]= 2; r[ 0][ 3]= 7; r[ 0][ 4]= 13; r[ 0][ 5]= 4; r[ 0][ 6]= 6; r[ 0][ 7]= 160; r[ 0][ 8]= 6; r[ 0][ 9]= 44; r[ 0][10]= 43; r[ 0][11]= 5; r[ 0][12]= 10; r[ 0][13]= 21; r[ 0][14]= 34; r[ 0][15]= 198; r[ 0][16]= 202; r[ 0][17]= 0; r[ 0][18]= 1; r[ 0][19]= 292; r[ 1][ 2]= 0; r[ 1][ 3]= 1; r[ 1][ 4]= 2; r[ 1][ 5]= 21; r[ 1][ 6]= 3; r[ 1][ 7]= 22; r[ 1][ 8]= 21; r[ 1][ 9]= 4; r[ 1][10]= 8; r[ 1][11]= 53; r[ 1][12]= 19; r[ 1][13]= 0; r[ 1][14]= 1; r[ 1][15]= 5; r[ 1][16]= 5; r[ 1][17]= 28; r[ 1][18]= 0; r[ 1][19]= 0; r[ 2][ 3]= 14; r[ 2][ 4]= 1; r[ 2][ 5]= 7; r[ 2][ 6]= 0; r[ 2][ 7]= 0; r[ 2][ 8]= 8; r[ 2][ 9]= 4; r[ 2][10]= 5; r[ 2][11]= 11; r[ 2][12]= 3; r[ 2][13]= 1; r[ 2][14]= 2; r[ 2][15]= 32; r[ 2][16]= 19; r[ 2][17]= 1; r[ 2][18]= 1; r[ 2][19]= 2; r[ 3][ 4]= 0; r[ 3][ 5]= 0; r[ 3][ 6]= 12; r[ 3][ 7]= 15; r[ 3][ 8]= 4; r[ 3][ 9]= 1; r[ 3][10]= 0; r[ 3][11]= 2; r[ 3][12]= 1; r[ 3][13]= 0; r[ 3][14]= 1; r[ 3][15]= 0; r[ 3][16]= 6; r[ 3][17]= 0; r[ 3][18]= 1; r[ 3][19]= 4; r[ 4][ 5]= 0; r[ 4][ 6]= 0; r[ 4][ 7]= 13; r[ 4][ 8]= 2; r[ 4][ 9]= 4; r[ 4][10]= 11; r[ 4][11]= 0; r[ 4][12]= 1; r[ 4][13]= 34; r[ 4][14]= 0; r[ 4][15]= 48; r[ 4][16]= 13; r[ 4][17]= 8; r[ 4][18]= 23; r[ 4][19]= 47; r[ 5][ 6]= 16; r[ 5][ 7]= 1; r[ 5][ 8]= 26; r[ 5][ 9]= 1; r[ 5][10]= 16; r[ 5][11]= 6; r[ 5][12]= 3; r[ 5][13]= 0; r[ 5][14]= 5; r[ 5][15]= 7; r[ 5][16]= 2; r[ 5][17]= 0; r[ 5][18]= 0; r[ 5][19]= 0; r[ 6][ 7]= 21; r[ 6][ 8]= 0; r[ 6][ 9]= 0; r[ 6][10]= 0; r[ 6][11]= 0; r[ 6][12]= 0; r[ 6][13]= 0; r[ 6][14]= 0; r[ 6][15]= 4; r[ 6][16]= 2; r[ 6][17]= 0; r[ 6][18]= 0; r[ 6][19]= 7; r[ 7][ 8]= 1; r[ 7][ 9]= 10; r[ 7][10]= 0; r[ 7][11]= 0; r[ 7][12]= 3; r[ 7][13]= 4; r[ 7][14]= 7; r[ 7][15]= 64; r[ 7][16]= 12; r[ 7][17]= 5; r[ 7][18]= 0; r[ 7][19]= 53; r[ 8][ 9]= 3; r[ 8][10]= 2; r[ 8][11]= 0; r[ 8][12]= 1; r[ 8][13]= 0; r[ 8][14]= 0; r[ 8][15]= 0; r[ 8][16]= 4; r[ 8][17]= 0; r[ 8][18]= 29; r[ 8][19]= 2; r[ 9][10]= 273; r[ 9][11]= 0; r[ 9][12]= 161; r[ 9][13]= 66; r[ 9][14]= 4; r[ 9][15]= 22; r[ 9][16]= 150; r[ 9][17]= 1; r[ 9][18]= 4; r[ 9][19]= 883; r[10][11]= 1; r[10][12]= 153; r[10][13]= 251; r[10][14]= 37; r[10][15]= 43; r[10][16]= 26; r[10][17]= 20; r[10][18]= 6; r[10][19]= 255; r[11][12]= 4; r[11][13]= 0; r[11][14]= 0; r[11][15]= 1; r[11][16]= 2; r[11][17]= 0; r[11][18]= 5; r[11][19]= 1; r[12][13]= 8; r[12][14]= 0; r[12][15]= 1; r[12][16]= 32; r[12][17]= 1; r[12][18]= 5; r[12][19]= 89; r[13][14]= 0; r[13][15]= 32; r[13][16]= 9; r[13][17]= 2; r[13][18]= 54; r[13][19]= 37; r[14][15]= 9; r[14][16]= 10; r[14][17]= 0; r[14][18]= 1; r[14][19]= 1; r[15][16]= 134; r[15][17]= 1; r[15][18]= 22; r[15][19]= 13; r[16][17]= 1; r[16][18]= 3; r[16][19]= 48; r[17][18]= 2; r[17][19]= 18; r[18][19]= 2; for (i = 0; i < 20; i++) r[i][i] = 0.0; if( isTM ) { for (i = 1; i < 20; i++) for (j = 0; j < i; j++) { r[j][i] /= 400.0 * freq0_TM[i] * freq0_TM[j]; r[i][j] = r[j][i]; } for( i=0; i<20; i++ ) freq[i] = freq0_TM[i]; } else { for (i = 1; i < 20; i++) for (j = 0; j < i; j++) { r[i][j] /= 400.0 * freq0[i] * freq0[j]; r[j][i] = r[i][j]; } for( i=0; i<20; i++ ) freq[i] = freq0[i]; } for( i=0; i<26; i++ ) locamino[i] = locamino0[i]; for( i=0; i<26; i++ ) locgrp[(int)locamino[i]] = locgrp0[i]; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) rsr[i][j] = r[i][j]; } mafft-7.123-without-extensions/core/disttbfast.c0000640000076500007650000015157712224732643021015 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define END_OF_VEC -1 static int nadd; static int treein; static int topin; static int treeout; static int noalign; static int distout; static float lenfaca, lenfacb, lenfacc, lenfacd; static int tuplesize; static int subalignment; static int subalignmentoffset; #if 0 #define PLENFACA 0.0123 #define PLENFACB 10252 #define PLENFACC 10822 #define PLENFACD 0.5 #define DLENFACA 0.01 #define DLENFACB 2445 #define DLENFACC 2412 #define DLENFACD 0.1 #else #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define D6LENFACA 0.01 #define D6LENFACB 2500 #define D6LENFACC 2500 #define D6LENFACD 0.1 #define D10LENFACA 0.01 #define D10LENFACB 1000000 #define D10LENFACC 1000000 #define D10LENFACD 0.0 #endif #ifdef enablemultithread typedef struct _treebasethread_arg { int thread_no; int njob; int *nrunpt; int *nlen; int *jobpospt; int ***topol; Treedep *dep; char **aseq; double *effarr; int *alloclenpt; int *fftlog; char *mergeoralign; pthread_mutex_t *mutex; pthread_cond_t *treecond; } treebasethread_arg_t; typedef struct _distancematrixthread_arg { int thread_no; int njob; int *jobpospt; int **pointt; float **mtx; pthread_mutex_t *mutex; } distancematrixthread_arg_t; #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; outnumber = 0; topin = 0; treein = 0; treeout = 0; distout = 0; noalign = 0; nevermemsave = 0; inputfile = NULL; nadd = 0; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty = -1530; ppenalty_ex = NOTSPECIFIED; poffset = -123; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; TMorJTT = JTT; scoreout = 0; tuplesize = 6; subalignment = 0; subalignmentoffset = 0; legacygapcost = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d / kimura 200 \n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case 't': treeout = 1; break; case 'T': noalign = 1; break; #if 0 case 'r': fmodel = -1; break; #endif case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'L': legacygapcost = 1; break; case 'e': fftscore = 0; break; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'R': fftRepeatStop = 1; break; #endif case 'n' : outnumber = 1; break; case 's': treemethod = 's'; break; case 'X': treemethod = 'X'; // mix break; case 'E': treemethod = 'E'; // upg (average) break; case 'q': treemethod = 'q'; // minimum break; #if 0 case 'a': alg = 'a'; break; case 'H': alg = 'H'; break; #endif case 'R': alg = 'R'; break; case 'Q': alg = 'Q'; break; case 'A': alg = 'A'; break; case 'N': nevermemsave = 1; break; case 'M': alg = 'M'; break; case 'S': scoreout = 1; break; case 'B': break; case 'F': use_fft = 1; break; case 'G': use_fft = 1; force_fft = 1; break; case 'V': topin = 1; break; case 'U': treein = 1; break; case 'u': weight = 0; tbrweight = 0; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif case 'J': tbutree = 0; break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'W': tuplesize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } static int maxl; static int tsize; static int nunknown = 0; void seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else nunknown++; } *grp = END_OF_VEC; if( grp - grpbk < tuplesize ) { // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else nunknown++; } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void makecompositiontable_p( int *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } int commonsextet_p( int *table, int *pointt ) { int value = 0; int tmp; int point; static TLS int *memo = NULL; static TLS int *ct = NULL; static TLS int *cp; if( table == NULL ) { if( memo ) free( memo ); if( ct ) free( ct ); return( 0 ); } if( *pointt == -1 ) return( 0 ); if( !memo ) { memo = (int *)calloc( tsize, sizeof( int ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!! if( !ct ) ErrorExit( "Cannot allocate ct\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc_dectet( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ *262144; point += *n++ * 65536; point += *n++ * 16384; point += *n++ * 4096; point += *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ *262144; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable_nuc_octet( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 16384; point += *n++ * 4096; point += *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 16384; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } #ifdef enablemultithread static void *distancematrixthread( void *arg ) { distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; int thread_no = targ->thread_no; int njob = targ->njob; int *jobpospt = targ->jobpospt; int **pointt = targ->pointt; float **mtx = targ->mtx; int *table1; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 10 == 0 ) { fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, njob, thread_no ); } makecompositiontable_p( table1, pointt[i] ); for( j=i; jthread_no; int *nrunpt = targ->nrunpt; int njob = targ->njob; int *nlen = targ->nlen; int *jobpospt = targ->jobpospt; int ***topol = targ->topol; Treedep *dep = targ->dep; char **aseq = targ->aseq; double *effarr = targ->effarr; int *alloclen = targ->alloclenpt; int *fftlog = targ->fftlog; char *mergeoralign = targ->mergeoralign; char **mseq1, **mseq2; char **localcopy; int i, j, l; int len1, len2; int clus1, clus2; float pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; float dumfl = 0.0; int ffttry; int m1, m2; #if 0 int i, j; #endif mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); localcopy = calloc( njob, sizeof( char * ) ); for( i=0; imutex ); l = *jobpospt; if( l == njob-1 ) { pthread_mutex_unlock( targ->mutex ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, 0, 0, 0 ); // iru? free( mseq1 ); free( mseq2 ); free( localcopy ); free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); return( NULL ); } *jobpospt = l+1; if( dep[l].child0 != -1 ) { while( dep[dep[l].child0].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } if( dep[l].child1 != -1 ) { while( dep[dep[l].child1].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } while( *nrunpt >= nthread ) pthread_cond_wait( targ->treecond, targ->mutex ); (*nrunpt)++; if( mergeoralign[l] == 'n' ) { // fprintf( stderr, "SKIP!\n" ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); pthread_mutex_unlock( targ->mutex ); continue; } m1 = topol[l][0][0]; m2 = topol[l][1][0]; len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen <= len1 + len2 ) { fprintf( stderr, "\nReallocating.." ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); fprintf( stderr, "done. *alloclen = %d\n", *alloclen ); } for( i=0; (j=topol[l][0][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); } for( i=0; (j=topol[l][1][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); } if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; } if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode { // fprintf( stderr, "Freeing commonIP (thread %d)\n", thread_no ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } pthread_mutex_unlock( targ->mutex ); #if 1 // CHUUI@@@@ clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 ); clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 ); #else clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1, indication1 ); clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2, indication2 ); #endif #if 0 for( i=0; i 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif /* fprintf( stderr, "before align all\n" ); display( aseq, njob ); fprintf( stderr, "\n" ); fprintf( stderr, "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 ); if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'Q' ): if( clus1 == 1 && clus2 == 1 && 0 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap ); } else { pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); } break; case( 'R' ): pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'H' ): pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); if( disp ) display( localcopy, njob ); pthread_mutex_lock( targ->mutex ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); for( i=0; (j=topol[l][0][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); for( i=0; (j=topol[l][1][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); pthread_mutex_unlock( targ->mutex ); for( i=0; (j=topol[l][0][i])!=-1; i++ ) { if(localcopy[j] ) free( localcopy[j] ); localcopy[j] = NULL; } for( i=0; (j=topol[l][1][i])!=-1; i++ ) { if( localcopy[j] ) free( localcopy[j] ); localcopy[j] = NULL; } if( topol[l][0] ) free( topol[l][0] ); topol[l][0] = NULL; if( topol[l][1] ) free( topol[l][1] ); topol[l][1] = NULL; if( topol[l] ) free( topol[l] ); topol[l] = NULL; // fprintf( stderr, "\n" ); } #if SCOREOUT fprintf( stderr, "totalscore = %10.2f\n\n", tscore ); #endif } #endif static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen ) { int l, len1, len2, i, m; int len1nocommongap, len2nocommongap; int clus1, clus2; float pscore, tscore; static char *indication1, *indication2; static double *effarr1 = NULL; static double *effarr2 = NULL; static int *fftlog; // fixed at 2006/07/26 float dumfl = 0.0; int ffttry; int m1, m2; static int *gaplen; static int *gapmap; static int *alreadyaligned; #if 0 int i, j; #endif if( effarr1 == NULL ) { effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); fftlog = AllocateIntVec( njob ); gaplen = AllocateIntVec( *alloclen+10 ); gapmap = AllocateIntVec( *alloclen+10 ); alreadyaligned = AllocateIntVec( njob ); } for( i=0; i 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif /* fprintf( stderr, "before align all\n" ); display( aseq, njob ); fprintf( stderr, "\n" ); fprintf( stderr, "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 ); if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); // fprintf( stderr, "######### mseq1[0] = %s\n", mseq1[0] ); } } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'Q' ): if( clus1 == 1 && clus2 == 1 && 0 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap ); } else { pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); } break; case( 'R' ): pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'H' ): pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); // writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); // fprintf( stderr, "\n" ); if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. { adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] ); restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus2, 0, mseq2, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' ); for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( mergeoralign[l] == '2' ) { // for( i=0; iSTEP0 mseq1[%d] = \n%s\n", i, mseq1[i] ); // for( i=0; iSTEP0 mseq2[%d] = \n%s\n", i, mseq2[i] ); adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] ); // for( i=0; iSTEP1 mseq1[%d] = \n%s\n", i, mseq1[i] ); // for( i=0; iSTEP1 mseq2[%d] = \n%s\n", i, mseq2[i] ); restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); // for( i=0; iSTEP2 mseq1[%d] = \n%s\n", i, mseq1[i] ); // for( i=0; iSTEP2 mseq2[%d] = \n%s\n", i, mseq2[i] ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' ); // for( i=0; iSTEP3 mseq1[%d] = \n%s\n", i, mseq1[i] ); // for( i=0; iSTEP3 mseq2[%d] = \n%s\n", i, mseq2[i] ); for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); // fprintf( stderr, ">514\n%s\n", aseq[514] ); } #if SCOREOUT fprintf( stderr, "totalscore = %10.2f\n\n", tscore ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'X' ) fprintf( fp, "Tree = UPGMA (mix).\n" ); else if( treemethod == 'E' ) fprintf( fp, "Tree = UPGMA (average).\n" ); else if( treemethod == 'q' ) fprintf( fp, "Tree = Minimum linkage.\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int *nlen; static int *nogaplen; static char **name, **seq; static char **mseq1, **mseq2; static char **bseq; static double *eff; int i, j; static int ***topol; static int *addmem; static Treedep *dep; static float **len; FILE *infp; // FILE *adfp; char c; int alloclen; float longer, shorter; float lenfac; float bunbo; FILE *orderfp, *hat2p; int *grpseq; char *tmpseq; int **pointt; float **mtx = NULL; // by D. Mathog static int *table1; char b[B]; int ien; double unweightedspscore; int alignmentlength; char *mergeoralign; int foundthebranch; int nsubalignments, maxmem; int **subtable; int *insubtable; int *preservegaps; char ***subalnpt; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob > 1000000 ) { fprintf( stderr, "The number of sequences must be < %d\n", 1000000 ); fprintf( stderr, "Please try the --parttree option for such large data.\n" ); exit( 1 ); } if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } if( subalignment ) { readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem ); fprintf( stderr, "nsubalignments = %d\n", nsubalignments ); fprintf( stderr, "maxmem = %d\n", maxmem ); subtable = AllocateIntMtx( nsubalignments, maxmem+1 ); insubtable = AllocateIntVec( njob ); preservegaps = AllocateIntVec( njob ); for( i=0; i maxl ) maxl = nogaplen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); // makepointtable_nuc( pointt[i], grpseq ); // makepointtable_nuc_octet( pointt[i], grpseq ); if( tuplesize == 10 ) makepointtable_nuc_dectet( pointt[i], grpseq ); else if( tuplesize == 6 ) makepointtable_nuc( pointt[i], grpseq ); else { fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize ); exit( 1 ); } } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } if( nunknown ) fprintf( stderr, "\nWARNING : %d unknown characters\n", nunknown ); #ifdef enablemultithread if( nthread > 0 ) { distancematrixthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos = 0; targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i nogaplen[j] ) { longer=(float)nogaplen[i]; shorter=(float)nogaplen[j]; } else { longer=(float)nogaplen[j]; shorter=(float)nogaplen[i]; } // if( tuplesize == 6 ) lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // else // lenfac = 1.0; // fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter ); bunbo = MIN( mtx[i][0], mtx[j][0] ); if( bunbo == 0.0 ) mtx[i][j-i] = 1.0; else mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac; // fprintf( stdout, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo ); } } if( disopt ) { for( i=0; i= njob ) // check sumi { fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 ); fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" ); fprintf( stderr, "#\n" ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); fprintf( stderr, "#\n" ); fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "\n" ); exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 ); fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "############################################################################### \n" ); fprintf( stderr, "\n" ); exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %c\n\n", i, mergeoralign[i] ); } #endif for( i=0; i 0 && nadd == 0 ) { treebasethread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t treecond; int *fftlog; int nrun; int nthread_yoyu; nthread_yoyu = nthread * 1; nrun = 0; jobpos = 0; targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); fftlog = AllocateIntVec( njob ); handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &treecond, NULL ); for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); res = system( com ); if( res ) { fprintf( stderr, "Error in foldalign\n" ); exit( 1 ); } } static void calllara( int nseq, char **mseq, char *laraarg ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); // fprintf( stderr, "calling LaRA\n" ); sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); res = system( com ); if( res ) { fprintf( stderr, "Error in lara\n" ); exit( 1 ); } } static float recalllara( char **mseq1, char **mseq2, int alloclen ) { static FILE *fp = NULL; static char *ungap1; static char *ungap2; static char *ori1; static char *ori2; int res; static char com[10000]; float value; if( fp == NULL ) { fp = fopen( "_laraout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _laraout\n" ); exit( 1 ); } ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); myfgets( com, 9999, fp ); strcpy( *mseq1, com ); myfgets( com, 9999, fp ); strcpy( *mseq2, com ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // fclose( fp ); // saigo dake yatta houga yoi. return( value ); } static float callmxscarna( char **mseq1, char **mseq2, int alloclen ) { FILE *fp; int res; static char com[10000]; float value; t2u( *mseq1 ); t2u( *mseq2 ); fp = fopen( "_mxscarnain", "w" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnain\n" ); exit( 1 ); } fprintf( fp, ">1\n" ); fprintf( fp, "%s\n", *mseq1 ); fprintf( fp, ">2\n" ); fprintf( fp, "%s\n", *mseq2 ); fclose( fp ); sprintf( com, "env PATH=%s mxscarna _mxscarnain > _mxscarnaout 2>/dev/null", whereispairalign ); res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } fp = fopen( "_mxscarnaout", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnaout\n" ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } void arguments( int argc, char *argv[] ) { int c; foldalignopt[0] = 0; laraparams = NULL; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'p': laraparams = *++argv; fprintf( stderr, "laraparams = %s\n", laraparams ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; #if 0 case 'O': fftNoAnchStop = 1; break; #endif case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; #if 0 case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 'L': alg = 'L'; break; case 's': alg = 's'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'o': // foldalignopt = *++argv; strcat( foldalignopt, " " ); strcat( foldalignopt, *++argv ); fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); --argc; goto nextoption; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *effarr, int alloclen ) { int i, j, ilim; int clus1, clus2; int off1, off2; float pscore = 0.0; // by D.Mathog static char *indication1, *indication2; FILE *hat2p, *hat3p; static double **distancemtx; static double *effarr1 = NULL; static double *effarr2 = NULL; char *pt; char *hat2file = "hat2"; LocalHom **localhomtable, *tmpptr; static char **pair; int intdum; double bunbo; localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i%d\n%s\n>%d\n%s\n>\n", i, mseq1[0], j, mseq2[0] ); #endif // putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, countamino( *mseq1, off1 ), countamino( *mseq2, off2 ), pscore, strlen( mseq1[0] ) ); // fprintf( stderr, "pscore = %f\n", pscore ); if( alg == 'H' ) // if( alg == 'H' || alg == 's' || alg == 'B' ) // next version putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); else if( alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); } } for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } } fclose( hat3p ); #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char name[M][B], **seq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double *eff; int i; FILE *infp; char c; int alloclen; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } if( njob > M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*9+1 ); aseq = AllocateCharMtx( njob, nlenmax*9+1 ); bseq = AllocateCharMtx( njob, nlenmax*9+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*9; eff = AllocateDoubleVec( njob ); #if 0 Read( name, nlen, seq ); #else readData( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illegal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( mseq1[0] == '-' || mseq2[0] == '-' ) { for( k=0; k 3 ) code = 36; else code = code1; #else code1 = amino_n[(int)*seqpt]; code2 = amino_n[(int)*seqrpt]; if( code1 > 3 ) { code = 36; } else if( code2 > 3 ) { code = code1; } else if( *dirpt == '5' ) { code = 4 + code2 * 4 + code1; } else if( *dirpt == '3' ) { code = 20 + code2 * 4 + code1; } else // if( *dirpt == 'o' ) // nai { code = code1; } #endif // fprintf( stderr, "%c -> code=%d toa=%d, tog=%d, toc=%d, tot=%d, ton=%d, efee=%f\n", *seqpt, code%4, ribosumdis[code][4+0], ribosumdis[code][4+1], ribosumdis[code][4+2], ribosumdis[code][20+3], ribosumdis[code][36], feff ); seqpt++; seqrpt++; dirpt++; (*cpmxptpt++)[code] += feff; } } } void mseqcat( char **seq1, char **seq2, double **eff, double *effarr1, double *effarr2, char name1[M][B], char name2[M][B], int clus1, int clus2 ) { int i, j; for( i=0; i 0.0 ) peff_kozo[m] += peff[m]; } } else //iranai { for( m=0; m-1; j++ ) if( s1 == topol[step][branch][0] ) value++; for( j=0; (s2=topol[i][1][j])>-1; j++ ) if( s2 == topol[step][branch][0] ) value++; } return( value ); } void BranchLeafNode( int nseq, int ***topol, int *node, int step, int branch ) { int i, j, k, s; for( i=0; i-1; j++ ) node[s]++; for( k=0; k-1; j++ ) node[s]++; } void RootLeafNode( int nseq, int ***topol, int *node ) { int i, j, k, s; for( i=0; i-1; j++ ) node[s]++; } void nodeFromABranch( int nseq, int *result, int **pairwisenode, int ***topol, double **len, int step, int num ) { int i, s, count; int *innergroup; int *outergroup1; #if 0 int outergroup2[nseq]; int table[nseq]; #else static int *outergroup2 = NULL; static int *table = NULL; if( outergroup2 == NULL ) { outergroup2 = AllocateIntVec( nseq ); table = AllocateIntVec( nseq ); } #endif innergroup = topol[step][num]; outergroup1 = topol[step][!num]; for( i=0; i-1; i++ ) table[s] = 0; for( i=0; (s=outergroup1[i])>-1; i++ ) table[s] = 0; for( i=0, count=0; i-1; i++ ) { result[s] = pairwisenode[s][outergroup1[0]] + pairwisenode[s][outergroup2[0]] - pairwisenode[outergroup1[0]][outergroup2[0]] - 1; result[s] /= 2; } for( i=0; (s=outergroup1[i])>-1; i++ ) { result[s] = pairwisenode[s][outergroup2[0]] + pairwisenode[s][innergroup[0]] - pairwisenode[innergroup[0]][outergroup2[0]] + 1; result[s] /= 2; } for( i=0; (s=outergroup2[i])>-1; i++ ) { result[s] = pairwisenode[s][outergroup1[0]] + pairwisenode[s][innergroup[0]] - pairwisenode[innergroup[0]][outergroup1[0]] + 1; result[s] /= 2; } #if 0 for( i=0; i-1; i++ ) { pair[r1] = 1; memlist1[k++] = r1; } memlist1[k] = -1; for( i=0, k=0; i #include "dp.h" #include "mltaln.h" int TLS commonAlloc1 = 0; int TLS commonAlloc2 = 0; int TLS **commonIP = NULL; int TLS **commonJP = NULL; int nthread = 1; int randomseed = 0; int parallelizationstrategy = BAATARI1; char modelname[100]; int njob, nlenmax; int amino_n[0x80]; char amino_grp[0x80]; int amino_dis[0x80][0x80]; int amino_disLN[0x80][0x80]; double amino_dis_consweight_multi[0x80][0x80]; int n_dis[0x80][0x80]; int n_disFFT[0x80][0x80]; float n_dis_consweight_multi[0x80][0x80]; char amino[0x80]; double polarity[0x80]; double volume[0x80]; int ribosumdis[37][37]; int ppid; double thrinter; double fastathreshold; int pslocal, ppslocal; int constraint; int divpairscore; int fmodel; // 1-> fmodel 0->default -1->raw int nblosum; // 45, 50, 62, 80 int kobetsubunkatsu; int bunkatsu; int dorp; int niter; int contin; int calledByXced; int devide; int scmtd; int weight; int utree; int tbutree; int refine; int check; double cut; int cooling; int penalty, ppenalty, penaltyLN; int RNApenalty, RNAppenalty; int RNApenalty_ex, RNAppenalty_ex; int penalty_ex, ppenalty_ex, penalty_exLN; int penalty_EX, ppenalty_EX; int penalty_OP, ppenalty_OP; int RNAthr, RNApthr; int offset, poffset, offsetLN, offsetFFT; int scoremtx; int TMorJTT; char use_fft; char force_fft; int nevermemsave; int fftscore; int fftWinSize; int fftThreshold; int fftRepeatStop; int fftNoAnchStop; int divWinSize; int divThreshold; int disp; int outgap = 1; char alg; int cnst; int mix; int tbitr; int tbweight; int tbrweight; int disopt; int pamN; int checkC; float geta2; int treemethod; int kimuraR; char *swopt; int fftkeika; int score_check; int makedistmtx; char *inputfile; char *addfile; int addprofile = 1; int rnakozo; char rnaprediction; int scoreout = 0; int outnumber = 0; int legacygapcost = 0; char *signalSM; FILE *prep_g; FILE *trap_g; char **seq_g; char **res_g; float consweight_multi = 1.0; float consweight_rna = 0.0; char RNAscoremtx = 'n'; char TLS *newgapstr = "-"; int nalphabets = 26; int nscoredalphabets = 20; mafft-7.123-without-extensions/core/pairash.c0000640000076500007650000007360612176060467020275 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 static int usecache; static char *whereispairalign; static char *odir; static char *pdir; static double scale; static int *alreadyoutput; static int equivthreshold; static int equivwinsize; static int equivshortestlen; static void cutpath( char *s ) { char *pos; pos = s + strlen( s ); while( --pos >= s ) { if( *pos == '/' ) break; } strcpy( s, pos+1 ); } static char getchainid( char *s ) { s += strlen( s ) - 2; if( isspace( s[0] ) && isalnum( s[1] ) ) return( s[1] ); else return( 'A' ); } static void extractfirstword( char *s ) { while( *s ) { if( isspace( *s ) ) break; s++; } *s = 0; } static char *strip( char *s ) { char *v; while( *s ) { if( !isspace( *s ) ) break; s++; } v = s; s += strlen( v ) - 1; while( s>=v ) { if( !isspace( *s ) ) { *(s+1) = 0; break; } s--; } return( v ); } #if 0 static void makeequivdouble( double *d, char *c ) { while( *c ) { *d++ = (double)( *c++ - '0' ); } } static void maskequiv( double *d, int n ) { int halfwin; int ok; int i, j; halfwin = (int)( equivwinsize / 2 ); for( i=0; i 0.0 ) { len += 1; d[i] = 0.0; } else { d[i] = 0.0; if( len >= equivshortestlen ) { len++; while( len-- ) d[i-len] = dbk[i-len]; } len = 0; } } if( len >= equivshortestlen ) { len++; while( len-- ) d[n-len] = dbk[n-len]; } free( dbk ); } #endif static void makeequivdouble_tmalign( double *d, char *c, int n ) { double tmpd; double *dbk; int tmpi; char s; dbk = d; while( *c ) { if( ( s=*c++ ) == ':' ) tmpi = 9; else if( s == '.' ) tmpi = 4; else tmpi = 0; // tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0; // if( tmpd < 0.0 ) tmpd = 0.0; tmpd = (double)( tmpi ); // *d++ = (int)tmpd; *d++ = tmpd; } d = dbk; // maskequiv( d, n ); } static void makeequivdouble_threshold( double *d, char *c, int n ) { double tmpd; double *dbk; int tmpi; dbk = d; while( *c ) { tmpi = (int)( *c++ - '0' ); tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0; if( tmpd < 0.0 ) tmpd = 0.0; // *d++ = (int)tmpd; *d++ = tmpd; } d = dbk; maskequiv( d, n ); } static void readtmalign( FILE *fp, char *seq1, char *seq2, double *equiv ) { static char *line = NULL; static char *equivchar = NULL; int n; if( equivchar == NULL ) { equivchar = calloc( nlenmax * 2 + 1, sizeof( char ) ); line = calloc( nlenmax * 2 + 1, sizeof( char ) ); } seq1[0] = 0; seq2[0] = 0; equivchar[0] = 0; // system( "vi _tmalignout" ); while( 1 ) { if( feof( fp ) ) { fprintf( stderr, "Error in TMalign\n" ); exit( 1 ); } fgets( line, 999, fp ); // fprintf( stdout, "line = :%s:\n", line ); if( !strncmp( line+5, "denotes the residue pairs", 20 ) ) break; } fgets( line, nlenmax*2, fp ); strcat( seq1, strip( line ) ); fgets( line, nlenmax*2, fp ); strcat( equivchar, strip( line ) ); fgets( line, nlenmax*2, fp ); strcat( seq2, strip( line ) ); #if 0 printf( "seq1=%s\n", seq1 ); printf( "seq2=%s\n", seq2 ); printf( "equi=%s\n", equivchar ); exit( 1 ); #endif n = strlen( seq1 ); makeequivdouble_tmalign( equiv, equivchar, n ); #if 0 fprintf( stdout, "\n" ); for( i=0; i_dum", fname1, fname1 ); res = system( com ); sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 ); res = system( com ); #endif sprintf( com, "\"%s/TMalign\" %s.pdb %s.pdb > _tmalignout 2>_dum", whereispairalign, fname1, fname2 ); fprintf( stderr, "command = %s\n", com ); res = system( com ); if( res ) { fprintf( stderr, "Error in TMalign\n" ); exit( 1 ); } } else { fprintf( stderr, "Cache is not supported!\n" ); exit( 1 ); } fp = fopen( "_tmalignout", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _tmalignout\n" ); exit( 1 ); } readtmalign( fp, *mseq1, *mseq2, equiv ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } static float callrash( int mem1, int mem2, char **mseq1, char **mseq2, double *equiv, char *fname1, char *fname2, int alloclen ) { FILE *fp; // int res; static char com[10000]; float value; char cachedir[10000]; char cachefile[10000]; int runnow; char pairid[1000]; sprintf( pairid, "%d-%d", mem1, mem2 ); // fprintf( stderr, "pairid = %s\n", pairid ); if( usecache ) { // sprintf( cachedir, "tmp" ); sprintf( cachedir, "%s", pdir ); sprintf( cachefile, "%s/%s.%s.rash", cachedir, fname1, fname2 ); runnow = 0; fp = fopen( cachefile, "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open %s\n", cachefile ); exit( 1 ); } else { fclose( fp ); } } else { fprintf( stderr, "Not supported!\n" ); exit( 1 ); } #if 0 if( 0 ) { #if 0 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname1, fname1 ); res = system( com ); sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 ); res = system( com ); #endif #if 0 // 091127, pdp nai! sprintf( com, "env PATH=%s PDP_ASH.pl --qf %s.pdb --qc %s --tf %s.pdb --tc %s > _rashout 2>_dum", whereispairalign, fname1, chain1, fname2, chain2 ); #else sprintf( com, "\"%s/rash\" --qf %s.pdb --qc %s --tf %s.pdb --tc %s --of %s.pdbpair > %s.rashout 2>%s.dum", whereispairalign, fname1, chain1, fname2, chain2, pairid, pairid, pairid ); #endif fprintf( stderr, "command = %s\n", com ); res = system( com ); if( res ) { fprintf( stderr, "Error in structural alignment\n" ); exit( 1 ); } sprintf( com, "awk '/^REMARK/,/^TER/' %s.pdbpair > %s.%s-x-%s.%s.pdbpair", pairid, fname1, chain1, fname2, chain2 ); res = system( com ); sprintf( com, "awk '/^REMARK/,/^TER/{next} 1' %s.pdbpair > %s.%s-x-%s.%s.pdbpair", pairid, fname2, chain2, fname1, chain1 ); res = system( com ); sprintf( com, "rm %s.pdbpair", pairid ); res = system( com ); } else #endif { fprintf( stderr, "Use cache! cachefile = %s\n", cachefile ); sprintf( com, "cat %s > %s.rashout", cachefile, pairid ); system( com ); } if( usecache && runnow ) { fprintf( stderr, "Okashii! usechache=%d, runnow=%d\n", usecache, runnow ); exit( 1 ); } sprintf( com, "%s.rashout", pairid ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s\n", com ); exit( 1 ); } readrash( fp, *mseq1, *mseq2, equiv ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } static void preparetmalign( FILE *fp, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt ) { int i, res; char *dumseq; char line[1000]; char fname[1000]; char command[1000]; int linenum, istr, nstr; FILE *checkfp; char *sline; int use[1000]; linenum = 0; nstr = 0; while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); use[linenum] = 1; if( sline[0] == '#' || strlen( sline ) < 2 ) { use[linenum] = 0; linenum++; continue; } extractfirstword( sline ); checkfp = fopen( sline, "r" ); if( checkfp == NULL ) { fprintf( stderr, "Cannot open %s.\n", sline ); exit( 1 ); } #if 0 fgets( linec, 999, checkfp ); if( strncmp( "HEADER ", linec, 7 ) ) { fprintf( stderr, "Check the format of %s.\n", sline ); exit( 1 ); } #endif if( checkcbeta( checkfp ) ) { fprintf( stderr, "%s has no C-beta atoms.\n", sline ); exit( 1 ); } else nstr++; fclose( checkfp ); linenum++; } njob = nstr; fprintf( stderr, "nstr = %d\n", nstr ); *strfiles = AllocateCharMtx( nstr, 1000 ); *chainids = AllocateCharMtx( nstr, 2 ); rewind( fp ); istr = 0; linenum = 0; while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); if( use[linenum++] ) { (*chainids)[istr][0] = getchainid( sline ); (*chainids)[istr][1] = 0; extractfirstword( sline ); sprintf( fname, "%s", sline ); cutpath( fname ); sprintf( command, "cp %s %s.pdb", sline, fname ); system( command ); sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname ); res = system( command ); if( res ) { fprintf( stderr, "error: Install clean.pl\n" ); exit( 1 ); } strcpy( (*strfiles)[istr++], fname ); } } *seqpt = AllocateCharMtx( njob, nlenmax*2+1 ); *mseq1pt = AllocateCharMtx( njob, 0 ); *mseq2pt = AllocateCharMtx( njob, 0 ); *equivpt = AllocateDoubleVec( nlenmax*2+1 ); *alloclenpt = nlenmax*2; dumseq = AllocateCharVec( nlenmax*2+1 ); alreadyoutput = AllocateIntVec( njob ); for( i=0; i%d_%s-%s\n%s\n", i+1, (*strfiles)[i], (*chainids)[i], (*seqpt)[i] ); alreadyoutput[i] = 1; } } static void prepareash( FILE *fp, char *inputfile, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt ) { int i, res; char *dumseq; char line[1000]; char fname[1000]; char command[1000]; int linenum, istr, nstr; // FILE *checkfp; char *sline; int use[1000]; linenum = 0; nstr = 0; fprintf( stderr, "inputfile = %s\n", inputfile ); while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); use[linenum] = 1; if( sline[0] == '#' || strlen( sline ) < 2 ) { use[linenum] = 0; linenum++; continue; } extractfirstword( sline ); #if 0 checkfp = fopen( sline, "r" ); if( checkfp == NULL ) { fprintf( stderr, "Cannot open %s.\n", sline ); exit( 1 ); } if( checkcbeta( checkfp ) ) { fprintf( stderr, "%s has no C-beta atoms.\n", sline ); exit( 1 ); } else nstr++; fclose( checkfp ); #else nstr++; #endif linenum++; } njob = nstr; fprintf( stderr, "nstr = %d\n", nstr ); *strfiles = AllocateCharMtx( nstr, 1000 ); *chainids = AllocateCharMtx( nstr, 2 ); rewind( fp ); istr = 0; linenum = 0; while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); fprintf( stderr, "sline = %s\n", sline ); if( use[linenum++] ) { (*chainids)[istr][0] = getchainid( sline ); (*chainids)[istr][1] = 0; extractfirstword( sline ); sprintf( fname, "%s", sline ); cutpath( fname ); #if 0 sprintf( command, "cp %s %s.pdb", sline, fname ); system( command ); sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname ); res = system( command ); if( res ) { fprintf( stderr, "error: Install clean.pl\n" ); exit( 1 ); } #endif strcpy( (*strfiles)[istr++], fname ); } } *seqpt = AllocateCharMtx( njob, nlenmax*2+1 ); *mseq1pt = AllocateCharMtx( njob, 0 ); *mseq2pt = AllocateCharMtx( njob, 0 ); *equivpt = AllocateDoubleVec( nlenmax*2+1 ); *alloclenpt = nlenmax*2; dumseq = AllocateCharVec( nlenmax*2+1 ); alreadyoutput = AllocateIntVec( njob ); for( i=0; i%d_%s\n%s\n", i+1, (*strfiles)[i], (*seqpt)[i] ); alreadyoutput[i] = 1; } } void arguments( int argc, char *argv[] ) { int c; usecache = 0; scale = 1.0; equivthreshold = 5; equivwinsize = 5; equivshortestlen = 1; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'R'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; odir = ""; pdir = ""; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'o': odir = *++argv; fprintf( stderr, "odir = %s\n", odir ); --argc; goto nextoption; case 'p': pdir = *++argv; fprintf( stderr, "pdir = %s\n", pdir ); --argc; goto nextoption; case 't': equivthreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': equivwinsize = myatoi( *++argv ); --argc; goto nextoption; case 'l': equivshortestlen = myatoi( *++argv ); --argc; goto nextoption; case 's': scale = atof( *++argv ); --argc; goto nextoption; case 'c': usecache = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; #if 0 case 'O': fftNoAnchStop = 1; break; #endif case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; #if 0 case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 'L': alg = 'L'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'K': alg = 'K'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; #endif default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *equiv, double *effarr, char **strfiles, char **chainids, int alloclen ) { int i, j, ilim; int clus1, clus2; int off1, off2; float pscore = 0.0; // by D.Mathog static char *indication1, *indication2; FILE *hat2p, *hat3p; static double **distancemtx; static double *effarr1 = NULL; static double *effarr2 = NULL; char *pt; char *hat2file = "hat2"; LocalHom **localhomtable, *tmpptr; static char **pair; // int intdum; double bunbo; char **checkseq; localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i%d_%s\n%s\n", i+1, strfiles[i], seq[i] ); strcpy( checkseq[i], seq[i] ); } else { gappick0( seq[i], mseq1[0] ); fprintf( stderr, "checking seq%d\n", i ); // fprintf( stderr, " seq=%s\n", seq[i] ); // fprintf( stderr, "checkseq=%s\n", checkseq[i] ); if( strcmp( checkseq[i], seq[i] ) ) { fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" ); fprintf( stderr, "i=%d\n", i ); fprintf( stderr, " seq=%s\n", seq[i] ); fprintf( stderr, "checkseq=%s\n", checkseq[i] ); exit( 1 ); } } if( alreadyoutput[j] == 0 ) { alreadyoutput[j] = 1; gappick0( seq[j], mseq2[0] ); fprintf( stdout, ">%d_%s-%s\n%s\n", j+1, strfiles[j], chainids[j], seq[j] ); strcpy( checkseq[j], seq[j] ); } else { gappick0( seq[j], mseq2[0] ); fprintf( stderr, "checking seq%d\n", j ); if( strcmp( checkseq[j], seq[j] ) ) { fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" ); fprintf( stderr, "j=%d\n", j ); fprintf( stderr, " seq=%s\n", seq[j] ); fprintf( stderr, "checkseq=%s\n", checkseq[j] ); exit( 1 ); } } #endif } } for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d k\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); } } } fclose( hat3p ); #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double *eff; static double *equiv; char **strfiles; char **chainids; int i; FILE *infp; char c; int alloclen; arguments( argc, argv ); if( equivthreshold < 1 || 9 < equivthreshold ) { fprintf( stderr, "-t n, n must be 1..9\n" ); exit( 1 ); } if( ( equivwinsize + 1 ) % 2 != 0 ) { fprintf( stderr, "equivwinsize = %d\n", equivwinsize ); fprintf( stderr, "It must be an odd number.\n" ); exit( 1 ); } if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; nlenmax = 10000; // tekitou if( alg == 'R' ) prepareash( infp, inputfile, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen ); else if( alg == 'T' ) preparetmalign( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen ); fclose( infp ); name = AllocateCharMtx( njob, B+1 ); aseq = AllocateCharMtx( njob, nlenmax*2+1 ); bseq = AllocateCharMtx( njob, nlenmax*2+1 ); eff = AllocateDoubleVec( njob ); for( i=0; i M ) { fprintf( stderr, "The number of structures must be < %d\n", M ); fprintf( stderr, "Please try sequence-based methods for such large data.\n" ); exit( 1 ); } #if 0 readData( infp, name, nlen, seq ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illegal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'o': seedoffset = myatoi( *++argv ); fprintf( stderr, "seedoffset = %d\n", seedoffset ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; int nlenmin; char **name; char **seq; int *nlen; int i; char *usual; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; // dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); readData_pointer_casepreserve( infp, name, nlen, seq ); // for( i=0; i%s\n", name[i]+1 ); if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0; fprintf( origfp, "%s\n", seq[i] ); } fclose( origfp ); #endif if( dorp == 'p' ) { usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-."; replace_unusual( njob, seq, usual, 'X', toupper ); } else { usual = "ATGCUatgcuBDHKMNRSVWYXbdhkmnrsvwyx-"; replace_unusual( njob, seq, usual, 'n', tolower ); } for( i=0; i_os_%d_oe_%s\n", i+seedoffset, name[i]+1 ); fprintf( stdout, "%s\n", seq[i] ); } free( nlen ); FreeCharMtx( seq ); FreeCharMtx( name ); return( 0 ); } mafft-7.123-without-extensions/core/getlag.c0000640000076500007650000002554411320564542020100 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 void arguments( int argc, char *argv[] ) { int c; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'C'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; scoremtx = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'D': scoremtx = -1; break; case 'P': scoremtx = 0; break; case 'i': contin = 1; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 's': treemethod = 's'; break; case 'x': treemethod = 'x'; break; case 'p': treemethod = 'p'; break; case 'a': alg = 'a'; break; case 'A': alg = 'A'; break; case 'S': alg = 'S'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } readOtherOptions( &ppid, &fftThreshold, &fftWinSize ); } void treebase( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double **mtx, int ***topol, double **len, double **eff, int alloclen ) { int i, j, l; int clus1, clus2; int s1, s2, r1, r2; float pscore; static char *indication1, *indication2; static char **name1, **name2; static double **partialmtx = NULL; static int ***partialtopol = NULL; static double **partiallen = NULL; static double **partialeff = NULL; static double *effarr = NULL; static double *effarr1 = NULL; static double *effarr2 = NULL; #if 0 char pair[njob][njob]; #else static char **pair; #endif if( partialtopol == NULL ) { partialmtx = AllocateDoubleMtx( njob, njob ); partialtopol = AllocateIntCub( njob, 2, njob ); partialeff = AllocateDoubleMtx( njob, njob ); partiallen = AllocateDoubleMtx( njob, 2 ); effarr = AllocateDoubleVec( njob ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( njob*3+100 ); indication2 = AllocateCharVec( njob*3+100 ); name1 = AllocateCharMtx( njob, B+1 ); name2 = AllocateCharMtx( njob, B+1 ); #if 0 #else pair = AllocateCharMtx( njob, njob ); #endif } if( checkC ) for( i=0; i-1; i++ ) if( pair[s1][r1] != 1 ) exit( 1 ); s2 = topol[l][1][0]; for( i=0; (r2=topol[l][1][i])>-1; i++ ) if( pair[s2][r2] != 1 ) exit( 1 ); clus1 = conjuction( pair, s1, aseq, mseq1, effarr1, effarr, name, name1, indication1 ); clus2 = conjuction( pair, s2, aseq, mseq2, effarr2, effarr, name, name2, indication2 ); fprintf( trap_g, "\nSTEP-%d\n", l ); fprintf( trap_g, "group1 = %s\n", indication1 ); fprintf( trap_g, "group2 = %s\n", indication2 ); fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 ); fprintf( stderr, "group1 = %.66s", indication1 ); if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); if( checkC ) for( i=0; i-1; i++ ) { pair[s1][r2] = 1; pair[s2][r2] = 0; } writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); fprintf( stderr, "\n" ); } } static void WriteOptions( FILE *fp ) { fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "Dayhoff( machigai ga aru )\n" ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); else if( scoremtx == -1 ) fprintf( fp, "DNA\n" ); if( scoremtx == 0 || scoremtx == -1 ) fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); else fprintf( fp, "Gap Penalty = %+5.2f\n", (double)ppenalty/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Apgorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( scoremtx == -1 ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double **pscore; static double **eff; static double **node0, **node1; int i, j; static int ***topol; static double **len; FILE *prep; char c; int alloclen; arguments( argc, argv ); getnumlen( stdin ); rewind( stdin ); name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax*5+1 ); aseq = AllocateCharMtx( njob, nlenmax*5+1 ); bseq = AllocateCharMtx( njob, nlenmax*5+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*5; topol = AllocateIntCub( njob, 2, njob ); len = AllocateDoubleMtx( njob, 2 ); pscore = AllocateDoubleMtx( njob, njob ); eff = AllocateDoubleMtx( njob, njob ); node0 = AllocateDoubleMtx( njob, njob ); node1 = AllocateDoubleMtx( njob, njob ); #if 0 Read( name, nlen, seq ); #else readData_pointer( stdin, name, nlen, seq ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } writePre( njob, name, nlen, seq, 0 ); if( tbutree == 0 ) { for( i=1; i #include #include #endif #define IMA_YONDERU 'x' /* iranai */ #define IMA_KAITERU 0 /* iranai */ #define KAKIOWATTA 'w' #define YOMIOWATTA 'r' #define OSHIMAI 'd' #define ISRUNNING 0 #define SEMAPHORE 1 #define STATUS 2 #define IPC_ALLOC 0100000 mafft-7.123-without-extensions/core/mltaln.h0000640000076500007650000001515512227115401020117 0ustar katohkatoh#define USE_XCED 0 #if USE_XCED #include "config.h" #include "share.h" #else #endif #include #include #include #include #include #include #include #include #include "mtxutl.h" #include #ifdef enablemultithread #include #endif #define VERSION "7.123b" #define SHOWVERSION fprintf( stderr, "%s (%s) Version " VERSION " alg=%c, model=%s\n%d thread(s)\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, nthread ) #define FFT_THRESHOLD 80 #define FFT_WINSIZE_P 20 #define FFT_WINSIZE_D 100 #define DISPSEQF 60 #define DISPSITEI 0 #define MAXITERATION 500 #define M 500000 /* njob no saidaiti */ #define N 5000000 /* nlen no saidaiti */ #define MAXSEG 100000 #define B 256 #define C 60 /* 1 gyou no mojisuu */ #define D 6 #define rnd() ( ( 1.0 / ( RAND_MAX + 1.0 ) ) * rand() ) #define MAX(X,Y) ( ((X)>(Y))?(X):(Y) ) #define MIN(X,Y) ( ((X)<(Y))?(X):(Y) ) #define G(X) ( ((X)>(0))?(X):(0) ) #define BEFF 1.0 /* 0.6 ni suruto zureru */ #define WIN 3 #define SGAPP -1000 #define GETA2 0.001 #define GETA3 0.001 #define NOTSPECIFIED 100009 #define SUEFF 0.1 /* upg/(spg+upg) -> sueff.sed */ #define DIVLOCAL 0 #define INTMTXSCALE 1000000.0 #define JTT 201 #define TM 202 extern char modelname[100]; extern int njob, nlenmax; extern int amino_n[0x80]; extern char amino_grp[0x80]; extern int amino_dis[0x80][0x80]; extern int amino_disLN[0x80][0x80]; extern double amino_dis_consweight_multi[0x80][0x80]; extern int n_dis[0x80][0x80]; extern int n_disFFT[0x80][0x80]; extern float n_dis_consweight_multi[0x80][0x80]; extern char amino[0x80]; extern double polarity[0x80]; extern double volume[0x80]; extern int ribosumdis[37][37]; extern int ppid; extern double thrinter; extern double fastathreshold; extern int pslocal, ppslocal; extern int constraint; extern int divpairscore; extern int fmodel; // 1-> fmodel 0->default -1->raw extern int nblosum; // 45, 50, 62, 80 extern int kobetsubunkatsu; extern int bunkatsu; extern int dorp; extern int niter; extern int contin; extern int calledByXced; extern int devide; extern int scmtd; extern int weight; extern int utree; extern int tbutree; extern int refine; extern int check; extern double cut; extern int cooling; extern int penalty, ppenalty, penaltyLN; extern int RNApenalty, RNAppenalty; extern int RNApenalty_ex, RNAppenalty_ex; extern int penalty_ex, ppenalty_ex, penalty_exLN; extern int penalty_EX, ppenalty_EX; extern int penalty_OP, ppenalty_OP; extern int offset, poffset, offsetLN, offsetFFT; extern int RNAthr, RNApthr; extern int scoremtx; extern int TMorJTT; extern char use_fft; extern char force_fft; extern int nevermemsave; extern int fftscore; extern int fftWinSize; extern int fftThreshold; extern int fftRepeatStop; extern int fftNoAnchStop; extern int divWinSize; extern int divThreshold; extern int disp; extern int outgap; extern char alg; extern int cnst; extern int mix; extern int tbitr; extern int tbweight; extern int tbrweight; extern int disopt; extern int pamN; extern int checkC; extern float geta2; extern int treemethod; extern int kimuraR; extern char *swopt; extern int fftkeika; extern int score_check; extern char *inputfile; extern char *addfile; extern int addprofile; extern float consweight_multi; extern float consweight_rna; extern char RNAscoremtx; extern char *signalSM; extern FILE *prep_g; extern FILE *trap_g; extern char **seq_g; extern char **res_g; extern int rnakozo; extern char rnaprediction; /* sengen no ichi ha koko dake de ha nai */ extern void constants(); extern char **Calignm1(); extern char **Dalignm1(); extern char **align0(); extern double Cscore_m_1( char **, int, int, double ** ); extern double score_m_1( char **, int, int, double ** ); extern double score_calc0( char **, int, double **, int ); extern char seqcheck( char ** ); extern float substitution( char *, char * ); extern float substitution_score( char *, char * ); extern float substitution_nid( char *, char * ); extern float substitution_hosei( char *, char * ); extern double ipower( double, int ); extern float translate_and_Calign(); extern float A__align(); extern float A__align11(); extern float A__align_gapmap(); extern float partA__align(); extern float L__align11(); extern float G__align11(); extern float Falign(); extern float Falign_localhom(); extern float Conalign(); extern float Aalign(); extern float imp_match_out_sc( int, int ); extern float part_imp_match_out_sc( int, int ); extern void ErrorExit(); extern void cpmx_calc(); extern void intergroup_score( char **, char **, double *, double *, int, int, int, double * ); extern int conjuctionfortbfast(); extern int fastconjuction(); extern char seqcheck( char ** ); typedef struct _LocalHom { int nokori; struct _LocalHom *next; struct _LocalHom *last; int start1; int end1; int start2; int end2; double opt; int overlapaa; int extended; double importance; float fimportance; double wimportance; char korh; } LocalHom; typedef struct _NodeInCub { int step; int LorR; } NodeInCub; typedef struct _Node { struct _Node *children[3]; int tmpChildren[3]; double length[3]; double *weightptr[3]; int top[3]; int *members[3]; } Node; typedef struct _Segment { int start; int end; int center; double score; int skipForeward; int skipBackward; struct _Segment *pair; int number; } Segment; typedef struct _Segments { Segment group1; Segment group2; int number1; int number2; } Segments; typedef struct _Bchain { struct _Bchain *next; struct _Bchain *prev; int pos; } Bchain; typedef struct _Achain { int next; int prev; // int curr; } Achain; typedef struct _Fukusosuu { double R; double I; } Fukusosuu; typedef struct _Gappattern { int len; float freq; } Gappat; typedef struct _RNApair { int uppos; float upscore; int downpos; float downscore; int bestpos; float bestscore; } RNApair; typedef struct _Treedep { int child0; int child1; int done; float distfromtip; } Treedep; typedef struct _Addtree { int nearest; float dist1; char *neighbors; float dist2; } Addtree; #include "fft.h" #include "dp.h" #include "functions.h" #ifdef enablemultithread #define TLS __thread #else #define TLS #endif extern TLS int commonAlloc1; extern TLS int commonAlloc2; extern TLS int **commonIP; extern TLS int **commonJP; extern int nthread; extern int randomseed; extern int parallelizationstrategy; #define BESTFIRST 0 #define BAATARI0 1 #define BAATARI1 2 #define BAATARI2 3 extern int scoreout; extern int outnumber; extern int legacygapcost; extern TLS char *newgapstr; extern int nalphabets; extern int nscoredalphabets; mafft-7.123-without-extensions/core/mafft.bat0000750000076500007650000000044411443061662020251 0ustar katohkatoh@echo off setlocal if not "x%PROCESSOR_ARCHITECTURE%" == "xAMD64" goto _NotX64 set COMSPEC=%WINDIR%\SysWOW64\cmd.exe %COMSPEC% /c %0 %* goto EOF :_NotX64 set ROOTDIR="%~d0%~p0\ms" set PATH=/bin/ set MAFFT_BINARIES=/lib/mafft %ROOTDIR%\bin\sh %ROOTDIR%\bin\mafft %* :EOF mafft-7.123-without-extensions/core/Lalign11.c0000640000076500007650000004442712224723614020207 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define DEBUG 0 #define DEBUG2 0 #define XXXXXXX 0 #define USE_PENALTY_EX 1 static TLS int localstop; // 060910 #if 1 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) { char *seq2 = s2[0]; int *intptr; intptr = amino_dis[(int)s1[0][i1]]; while( lgth2-- ) *match++ = intptr[(int)*seq2++]; } #else static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) { int j; for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } #else for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } #endif } #endif static float Ltracking( float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int *off1pt, int *off2pt, int endi, int endj ) { int i, j, l, iin, jin, lgth1, lgth2, k, limk; int ifi=0, jfi=0; // by D.Mathog, a guess // char gap[] = "-"; char *gap; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--mseq1[0] = seq1[0][ifi+l]; *--mseq2[0] = *gap; k++; } l= jin - jfi; while( --l ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][jfi+l]; k++; } if( iin <= 0 || jin <= 0 ) break; *--mseq1[0] = seq1[0][ifi]; *--mseq2[0] = seq2[0][jfi]; if( ijp[ifi][jfi] == localstop ) break; k++; iin = ifi; jin = jfi; } if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi; if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi; // fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi ); return( 0.0 ); } float L__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; int i, j; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; // static TLS int **intwork; // static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float maxwm; int endali = 0, endalj = 0; // by D.Mathog, a guess // int endali, endalj; float localthr = -offset; float localthr2 = -offset; // float localthr = 100; // float localthr2 = 100; float fpenalty = (float)penalty; float fpenalty_ex = (float)penalty_ex; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); } return( 0.0 ); } if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); // FreeFloatMtx( floatwork ); // FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); // floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); // intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } mseq1[0] = mseq[0]; mseq2[0] = mseq[1]; if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; *ijppt = -( j - mpi ); } if( *prept > mi ) { mi = *prept; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt+fpenalty) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } if( *prept > *mjpt ) { *mjpt = *prept; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); *ijppt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } #if 0 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif if( ijp[endali][endalj] == localstop ) { strcpy( seq1[0], "" ); strcpy( seq2[0], "" ); *off1pt = *off2pt = 0; fprintf( stderr, "maxwm <- 0.0 \n" ); return( 0.0 ); } Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "wm=%f\n", wm ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, " wm = %f\n", wm ); #endif return( maxwm ); } float L__align11_noalign( char **seq1, char **seq2 ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; int i, j; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; // int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; #if 1 float *wtmp; // int *ijppt; float *mjpt, *prept, *curpt; // int *mpjpt; #endif static TLS float mi, *m; // static TLS int **ijp; // static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ // static TLS char **mseq1; // static TLS char **mseq2; // static TLS char **mseq; // static TLS int **intwork; // static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float maxwm; // int endali = 0, endalj = 0; // by D.Mathog, a guess // int endali, endalj; float localthr = -offset; float localthr2 = -offset; // float localthr = 100; // float localthr2 = 100; float fpenalty = (float)penalty; float fpenalty_ex = (float)penalty_ex; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; // free( mseq1 ); // free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); // FreeIntVec( mp ); // FreeCharMtx( mseq ); } return( 0.0 ); } // if( orlgth1 == 0 ) // { // mseq1 = AllocateCharMtx( njob, 0 ); // mseq2 = AllocateCharMtx( njob, 0 ); // } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); // FreeIntVec( mp ); // FreeCharMtx( mseq ); // FreeFloatMtx( floatwork ); // FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); // mp = AllocateIntVec( ll2+2 ); // mseq = AllocateCharMtx( njob, ll1+ll2 ); // floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); // intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } // mseq1[0] = mseq[0]; // mseq2[0] = mseq[1]; // if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) // { // int ll1, ll2; // // if( commonAlloc1 && commonAlloc2 ) // { // FreeIntMtx( commonIP ); // } // // ll1 = MAX( orlgth1, commonAlloc1 ); // ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG // fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif // commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG // fprintf( stderr, "succeeded\n\n" ); #endif // commonAlloc1 = ll1; // commonAlloc2 = ll2; // } // ijp = commonIP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; // *ijppt = -( j - mpi ); } if( *prept > mi ) { mi = *prept; // mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt+fpenalty) > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } if( *prept > *mjpt ) { *mjpt = *prept; // *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif if( maxwm < wm ) { maxwm = wm; // endali = i; // endalj = j; } #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); // *ijppt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt++ += wm; // ijppt++; mjpt++; prept++; // mpjpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } #if 0 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif #if 0 // IRUKAMO!!!! if( ijp[endali][endalj] == localstop ) { strcpy( seq1[0], "" ); strcpy( seq2[0], "" ); *off1pt = *off2pt = 0; fprintf( stderr, "maxwm <- 0.0 \n" ); return( 0.0 ); } #else if( maxwm < localthr ) { fprintf( stderr, "maxwm <- 0.0 \n" ); return( 0.0 ); } #endif // Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj ); // resultlen = strlen( mseq1[0] ); // if( alloclen < resultlen || resultlen > N ) // { // fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); // ErrorExit( "LENGTH OVER!\n" ); // } // strcpy( seq1[0], mseq1[0] ); // strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "wm=%f\n", wm ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, " wm = %f\n", wm ); #endif return( maxwm ); } mafft-7.123-without-extensions/core/fft.h0000640000076500007650000000043711210161050017374 0ustar katohkatoh#include #include #include #include #include "mtxutl.h" #define PI 3.14159265358979323846 #define END_OF_VEC -1 #define NKOUHO 20 #define NKOUHO_LONG 500 #define MAX(X,Y) ( ((X)>(Y))?(X):(Y) ) #define MIN(X,Y) ( ((X)<(Y))?(X):(Y) ) mafft-7.123-without-extensions/core/suboptalign11.c0000640000076500007650000003344412224731077021327 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define DEBUG 0 #define DEBUG2 0 #define XXXXXXX 0 #define USE_PENALTY_EX 1 typedef struct _shuryoten { int i; int j; float wm; struct _shuryoten *next; struct _shuryoten *prev; } Shuryoten; static int localstop; static int compshuryo( Shuryoten *s1_arg, Shuryoten *s2_arg ) { Shuryoten *s1 = (Shuryoten *)s1_arg; Shuryoten *s2 = (Shuryoten *)s2_arg; if ( s1->wm > s2->wm ) return( -1 ); else if ( s1->wm < s2->wm ) return( 1 ); else return( 0 ); } static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) { int j; for( j=0; j lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; // by D.Mathog, float g; float *currentw, *previousw; #if 1 float *wtmp; int *ijpipt; int *ijpjpt; float *mjpt, *Mjpt, *prept, *curpt; int *mpjpt, *Mpjpt; #endif static float mi, *m; static float Mi, *largeM; static int **ijpi; static int **ijpj; static int mpi, *mp; static int Mpi, *Mp; static float *w1, *w2; // static float *match; static float *initverticalw; /* kufuu sureba iranai */ static float *lastverticalw; /* kufuu sureba iranai */ static char **mseq1; static char **mseq2; static float **cpmx1; static float **cpmx2; static int **intwork; static float **floatwork; static int orlgth1 = 0, orlgth2 = 0; float maxwm; float tbk; int tbki, tbkj; int endali, endalj; // float localthr = 0.0; // float localthr2 = 0.0; float fpenalty = (float)penalty; float fpenalty_OP = (float)penalty_OP; float fpenalty_ex = (float)penalty_ex; // float fpenalty_EX = (float)penalty_EX; float foffset = (float)offset; float localthr = -foffset; float localthr2 = -foffset; static Shuryoten *shuryo = NULL; int numshuryo; float minshuryowm = 0.0; // by D.Mathog int minshuryopos = 0; // by D.Mathog float resf; // fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty ); fprintf( stderr, "in suboptalign11\n" ); if( !shuryo ) { shuryo = (Shuryoten *)calloc( 100, sizeof( Shuryoten ) ); } for( i=0; i<100; i++ ) { shuryo[i].i = -1; shuryo[i].j = -1; shuryo[i].wm = 0.0; } numshuryo = 0; if( orlgth1 == 0 ) { } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); fprintf( stderr, "in suboptalign11 step 1\n" ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; fprintf( stderr, "in suboptalign11 step 1.3\n" ); if( orlgth1 > 0 && orlgth2 > 0 ) { fprintf( stderr, "in suboptalign11 step 1.4\n" ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); // FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); fprintf( stderr, "in suboptalign11 step 1.5\n" ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( largeM ); FreeIntVec( Mp ); fprintf( stderr, "in suboptalign11 step 1.6\n" ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); fprintf( stderr, "in suboptalign11 step 1.7\n" ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); // match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); largeM = AllocateFloatVec( ll2+2 ); Mp = AllocateIntVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); mseq1 = AllocateCharMtx( njob, ll1+ll2 ); mseq2 = AllocateCharMtx( njob, ll1+ll2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } fprintf( stderr, "in suboptalign11 step 1.6\n" ); fprintf( stderr, "in suboptalign11 step 2\n" ); if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeIntMtx( commonJP ); FreeIntMtx( used ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif used = AllocateIntMtx( ll1+10, ll2+10 ); commonIP = AllocateIntMtx( ll1+10, ll2+10 ); commonJP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijpi = commonIP; ijpj = commonJP; #if 0 for( i=0; i", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijpipt = i - 1; *ijpjpt = mpi; } g = *prept; if( g > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "m%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijpipt = *mpjpt; // *ijpjpt = j - 1; } g = *prept; if( g > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif g = tbk + fpenalty_OP; // g = tbk; if( g > wm ) { wm = g; *ijpipt = tbki; *ijpjpt = tbkj; // fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); } g = Mi; if( g > tbk ) { tbk = g; tbki = i-1; tbkj = Mpi; } g = *Mjpt; if( g > tbk ) { tbk = g; tbki = *Mpjpt; tbkj = j-1; } // tbk += fpenalty_EX;// + foffset; g = *prept; if( g > *Mjpt ) { *Mjpt = g; *Mpjpt = i-1; } // *Mjpt += fpenalty_EX;// + foffset; g = *prept; if( g > Mi ) { Mi = g; Mpi = j-1; } // Mi += fpenalty_EX;// + foffset; // fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt ); // fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] ); if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( numshuryo < 100 ) { shuryo[numshuryo].i = i; shuryo[numshuryo].j = j; shuryo[numshuryo].wm = wm; if( minshuryowm > wm ) { minshuryowm = wm; minshuryopos = numshuryo; } numshuryo++; } else { if( wm > minshuryowm ) { shuryo[minshuryopos].i = i; shuryo[minshuryopos].j = j; shuryo[minshuryopos].wm = wm; minshuryowm = wm; for( k=0; k<100; k++ ) // muda { if( shuryo[k].wm < minshuryowm ) { minshuryowm = shuryo[k].wm; minshuryopos = k; break; } } } } #endif #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); *ijpipt = localstop; // *ijpjpt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt += wm; ijpipt++; ijpjpt++; mjpt++; Mjpt++; prept++; mpjpt++; Mpjpt++; curpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } for( k=0; k<100; k++ ) { fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); } #if 1 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif qsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo ); for( k=0; k<100; k++ ) { fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); } lasti = lgth1+1; for( i=0; i\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif } for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stderr, "%2d ", used[i][j] ); } fprintf( stderr, "\n" ); } // fprintf( stderr, "### impmatch = %f\n", *impmatch ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } return( wm ); } mafft-7.123-without-extensions/core/Makefile0000640000076500007650000004636212225677171020142 0ustar katohkatohPREFIX = /usr/local LIBDIR = $(PREFIX)/libexec/mafft BINDIR = $(PREFIX)/bin MANDIR = $(PREFIX)/share/man/man1 #MNO_CYGWIN = -mno-cygwin ENABLE_MULTITHREAD = -Denablemultithread # Comment out the above line if your compiler # does not support TLS (thread-local strage). CC = gcc CFLAGS = -O3 #CC = icc #CFLAGS = -fast # if you have icc, use this. #CFLAGS = -O0 -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3 #CFLAGS = -O0 -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(CFLAGS) ifdef ENABLE_MULTITHREAD LIBS = -lm -lpthread else LIBS = -lm endif INSTALL = install PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \ pair2hat3s multi2hat3s rnatest pairash addsingle \ splittbfast disttbfast tbfast mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \ seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version PERLPROGS = mafftash_premafft.pl SCRIPTS = mafft mafft-homologs.rb OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o OBJSEQ2REGTABLE = mtxutl.o io.o seq2regtable.o defs.o OBJCOUNTLEN = mtxutl.o io.o countlen.o defs.o OBJF2CL = mtxutl.o io.o f2cl.o constants.o defs.o OBJMCCASKILLWRAP = mtxutl.o io.o mccaskillwrap.o constants.o defs.o mltaln9.o OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \ multi2hat3s.o defs.o fft.o fftFunctions.o OBJPAIR2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \ pair2hat3s.o defs.o fft.o fftFunctions.o OBJRNATEST = mtxutl.o io.o mltaln9.o tddis.o constants.o Lalignmm.o \ rna.o rnatest.o defs.o fft.o fftFunctions.o OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genGalign11.o genalign11.o Lalign11.o SAalignmm.o \ pairash.o defs.o fft.o fftFunctions.o OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genGalign11.o genalign11.o Lalign11.o SAalignmm.o \ pairlocalalign.o defs.o fft.o fftFunctions.o OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ disttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o splitfromaln.o defs.o fft.o fftFunctions.o OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o splittbfast.o defs.o fft.o fftFunctions.o OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o splittbfast2.o defs.o fft.o fftFunctions.o OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o defs.o fft.o fftFunctions.o OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o OBJMAKEDIRECTIONLIST = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o makedirectionlist.o defs.o fft.o fftFunctions.o addfunctions.o OBJTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \ tbfast.o defs.o fft.o fftFunctions.o addfunctions.o OBJADDSINGLE = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \ addsingle.o defs.o fft.o fftFunctions.o addfunctions.o OBJTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o MSalignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \ tbfast2.o defs.o fft.o fftFunctions.o OBJSETCORE = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ setcore.o defs.o fft.o fftFunctions.o OBJTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o fftFunctions.o fft.o \ tditeration.o tditr.o defs.o SAalignmm.o treeOperation.o OBJDVTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o MSalignmm.o fftFunctions.o fft.o \ tditeration.o dvtditr.o defs.o SAalignmm.o treeOperation.o addfunctions.o OBJGETLAG = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ getlag.o defs.o fft.o fftFunctions.o OBJGAPFILL = mtxutl.o io.o constants.o gapfill.o defs.o OBJDNDFAST5 = dndfast5.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDBLAST = dndblast.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST7 = dndfast7.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST4 = dndfast4.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJSEXTET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o sextet5.o defs.o OBJDISTANCE = io.o constants.o mtxutl.o mltaln9.o tddis.o mafft-distance.o defs.o OBJTRIPLET6 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet6.o defs.o OBJTRIPLET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet5.o defs.o OBJOCTET4 = io.o constants.o mtxutl.o mltaln9.o tddis.o octet4.o defs.o OBJDNDPRE = dndpre.o io.o constants.o mtxutl.o mltaln9.o defs.o OBJGALN = io.o mtxutl.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o MSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \ SAalignmm.o Galign11.o Falign.o Falign_localhom.o fftFunctions.o fft.o mafft-profile.o defs.o OBJSCORE = io.o mtxutl.o mltaln9.o score.o constants.o defs.o HEADER = mltaln.h mtxutl.h FFTHEADER = fft.h MANPAGES = mafft.1 mafft-homologs.1 all : $(PERLPROGS) $(PROGS) $(SCRIPTS) cp $(SCRIPTS) ../scripts chmod 755 ../scripts/* cp $(PERLPROGS) $(PROGS) ../binaries chmod 755 ../binaries/* cp $(MANPAGES) ../binaries @echo done. univscript: univscript.tmpl Makefile sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript mafft: mafft.tmpl mltaln.h sed "s:_LIBDIR:$(LIBDIR):" mafft.tmpl > mafft mafft-homologs.rb: mafft-homologs.tmpl # cp mafft-homologs.tmpl mafft-homologs.rb sed "s:_BINDIR:$(BINDIR):" mafft-homologs.tmpl > mafft-homologs.rb mltaln.h : functions.h touch mltaln.h version : version.c mltaln.h $(CC) -o $@ version.c $(MYCFLAGS) $(LDFLAGS) tbfast : $(OBJTBFAST) $(CC) -o $@ $(OBJTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) addsingle : $(OBJADDSINGLE) $(CC) -o $@ $(OBJADDSINGLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) tbfast2 : $(OBJTBFAST2) $(CC) -o $@ $(OBJTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS) disttbfast : $(OBJDISTTBFAST) $(CC) -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) makedirectionlist : $(OBJMAKEDIRECTIONLIST) $(CC) -o $@ $(OBJMAKEDIRECTIONLIST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splittbfast : $(OBJSPLITTBFAST) $(CC) -o $@ $(OBJSPLITTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splitfromaln : $(OBJSPLITFROMALN) $(CC) -o $@ $(OBJSPLITFROMALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splittbfast2 : $(OBJSPLITTBFAST2) $(CC) -o $@ $(OBJSPLITTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dummy : $(OBJDUMMY) $(CC) -o $@ $(OBJDUMMY) $(MYCFLAGS) $(LDFLAGS) $(LIBS) setcore : $(OBJSETCORE) $(CC) -o $@ $(OBJSETCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) countlen : $(OBJCOUNTLEN) $(CC) -o $@ $(OBJCOUNTLEN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) seq2regtable : $(OBJSEQ2REGTABLE) $(CC) -o $@ $(OBJSEQ2REGTABLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) regtable2seq : $(OBJREGTABLE2SEQ) $(CC) -o $@ $(OBJREGTABLE2SEQ) $(MYCFLAGS) $(LDFLAGS) $(LIBS) setdirection : $(OBJSETDIRECTION) $(CC) -o $@ $(OBJSETDIRECTION) $(MYCFLAGS) $(LDFLAGS) $(LIBS) replaceu : $(OBJREPLACEU) $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) restoreu : $(OBJRESTOREU) $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) f2cl : $(OBJF2CL) $(CC) -o $@ $(OBJF2CL) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mccaskillwrap : $(OBJMCCASKILLWRAP) $(CC) -o $@ $(OBJMCCASKILLWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS) contrafoldwrap : $(OBJCONTRAFOLDWRAP) $(CC) -o $@ $(OBJCONTRAFOLDWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pairlocalalign : $(OBJPAIRLOCALALIGN) $(CC) -o $@ $(OBJPAIRLOCALALIGN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pairash : $(OBJPAIRASH) $(CC) -o $@ $(OBJPAIRASH) $(MYCFLAGS) $(LDFLAGS) $(LIBS) rnatest : $(OBJRNATEST) $(CC) -o $@ $(OBJRNATEST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pair2hat3s : $(OBJPAIR2HAT3S) $(CC) -o $@ $(OBJPAIR2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS) multi2hat3s : $(OBJMULTI2HAT3S) $(CC) -o $@ $(OBJMULTI2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS) getlag : $(OBJGETLAG) $(CC) -o $@ $(OBJGETLAG) $(MYCFLAGS) $(LDFLAGS) $(LIBS) tditr : $(OBJTDITR) $(CC) -o $@ $(OBJTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dvtditr : $(OBJDVTDITR) $(CC) -o $@ $(OBJDVTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafft-profile : $(OBJGALN) $(CC) -o $@ $(OBJGALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) gapfill : $(OBJGAPFILL) $(CC) -o $@ $(OBJGAPFILL) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast4 : $(OBJDNDFAST4) $(CC) -o $@ $(OBJDNDFAST4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast5 : $(OBJDNDFAST5) $(CC) -o $@ $(OBJDNDFAST5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast6 : $(OBJDNDFAST6) $(CC) -o $@ $(OBJDNDFAST6) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast7 : $(OBJDNDFAST7) $(CC) -o $@ $(OBJDNDFAST7) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndblast : $(OBJDNDBLAST) $(CC) -o $@ $(OBJDNDBLAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast3 : $(OBJDNDFAST3) $(CC) -o $@ $(OBJDNDFAST3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet : $(OBJTRIPLET) $(CC) -o $@ $(OBJTRIPLET) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet3 : $(OBJTRIPLET3) $(CC) -o $@ $(OBJTRIPLET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet3 : $(OBJSEXTET3) $(CC) -o $@ $(OBJSEXTET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet4 : $(OBJSEXTET4) $(CC) -o $@ $(OBJSEXTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet5 : $(OBJSEXTET5) $(CC) -o $@ $(OBJSEXTET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafft-distance : $(OBJDISTANCE) $(CC) -o $@ $(OBJDISTANCE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet5 : $(OBJTRIPLET5) $(CC) -o $@ $(OBJTRIPLET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet6 : $(OBJTRIPLET6) $(CC) -o $@ $(OBJTRIPLET6) $(MYCFLAGS) $(LDFLAGS) $(LIBS) octet4 : $(OBJOCTET4) $(CC) -o $@ $(OBJOCTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndpre : $(OBJDNDPRE) $(CC) -o $@ $(OBJDNDPRE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) score : $(OBJSCORE) $(CC) -o $@ $(OBJSCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) genMtx : $(OBJGENMTX) $(CC) -o $@ $(OBJGENMTX) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafftash_premafft.pl : mafftash_premafft.tmpl cp mafftash_premafft.tmpl mafftash_premafft.pl gapfill.o : gapfill.c $(HEADER) $(CC) $(MYCFLAGS) -c gapfill.c mltaln9.o : mltaln9.c $(HEADER) $(CC) $(MYCFLAGS) -c mltaln9.c tddis.o : tddis.c $(HEADER) $(CC) $(MYCFLAGS) -c tddis.c constants.o : constants.c miyata.h miyata5.h blosum.c DNA.h JTT.c $(HEADER) $(CC) $(MYCFLAGS) -c constants.c defs.o : defs.c $(CC) $(MYCFLAGS) -c defs.c #A+++alignmm.o : SA+++alignmm.c $(HEADER) # $(CC) $(MYCFLAGS) -c SA+++alignmm.c -o A+++alignmm.o Salignmm.o : Salignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Salignmm.c Halignmm.o : Halignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Halignmm.c Ralignmm.o : Ralignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Ralignmm.c Qalignmm.o : Qalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Qalignmm.c MSalignmm.o : MSalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c MSalignmm.c partSalignmm.o : partSalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c partSalignmm.c partQalignmm.o : partQalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c partQalignmm.c Lalign11.o : Lalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c Lalign11.c genalign11.o : genalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c genalign11.c genGalign11.o : genGalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c genGalign11.c suboptalign11.o : suboptalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c suboptalign11.c Galign11.o : Galign11.c $(HEADER) $(CC) $(MYCFLAGS) -c Galign11.c MSalign11.o : MSalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c MSalign11.c SAalignmm.o : SAalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c SAalignmm.c -o SAalignmm.o Lalignmm.o : Lalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Lalignmm.c rna.o : rna.c $(HEADER) $(CC) $(MYCFLAGS) -c rna.c disttbfast.o : disttbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c disttbfast.c splitfromaln.o : splitfromaln.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splitfromaln.c splittbfast.o : splittbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splittbfast.c splittbfast2.o : splittbfast2.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splittbfast2.c makedirectionlist.o : makedirectionlist.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c makedirectionlist.c disttbfast_dummy.o : disttbfast_dummy.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c disttbfast_dummy.c dummy.o : dummy.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c dummy.c tbfast.o : tbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c tbfast.c addsingle.o : addsingle.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c addsingle.c tbfast2.o : tbfast2.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c tbfast2.c setcore.o : setcore.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c setcore.c getlag.o : getlag.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c getlag.c tditr.o : tditr.c $(HEADER) $(CC) $(MYCFLAGS) -c tditr.c dvtditr.o : dvtditr.c $(HEADER) $(CC) $(MYCFLAGS) -c dvtditr.c tditeration.o : tditeration.c $(HEADER) $(CC) $(MYCFLAGS) -c tditeration.c mafft-profile.o : mafft-profile.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c mafft-profile.c dndfast4.o : dndfast4.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast4.c dndfast5.o : dndfast5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast5.c dndfast6.o : dndfast6.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast6.c dndfast7.o : dndfast7.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast7.c dndblast.o : dndblast.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndblast.c dndfast3.o : dndfast3.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast3.c dndpre.o : dndpre.c $(HEADER) $(CC) $(MYCFLAGS) -c dndpre.c countlen.o : countlen.c $(HEADER) $(CC) $(MYCFLAGS) -c countlen.c seq2regtable.o : seq2regtable.c $(HEADER) $(CC) $(MYCFLAGS) -c seq2regtable.c regtable2seq.o : regtable2seq.c $(HEADER) $(CC) $(MYCFLAGS) -c regtable2seq.c f2cl.o : f2cl.c $(HEADER) $(CC) $(MYCFLAGS) -c f2cl.c setdirection.o : setdirection.c $(HEADER) $(CC) $(MYCFLAGS) -c setdirection.c replaceu.o : replaceu.c $(HEADER) $(CC) $(MYCFLAGS) -c replaceu.c restoreu.o : restoreu.c $(HEADER) $(CC) $(MYCFLAGS) -c restoreu.c mccaskillwrap.o : mccaskillwrap.c $(HEADER) $(CC) $(MYCFLAGS) -c mccaskillwrap.c contrafoldwrap.o : contrafoldwrap.c $(HEADER) $(CC) $(MYCFLAGS) -c contrafoldwrap.c pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairlocalalign.c pairash.o : pairash.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairash.c rnatest.o : rnatest.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c rnatest.c multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c multi2hat3s.c pair2hat3s.o : pair2hat3s.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pair2hat3s.c io.o : io.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c io.c nj.o : nj.c $(HEADER) $(CC) $(MYCFLAGS) -c nj.c treeOperation.o : treeOperation.c $(HEADER) $(CC) $(MYCFLAGS) -c treeOperation.c sextet5.o : sextet5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c sextet5.c mafft-distance.o : mafft-distance.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c mafft-distance.c triplet5.o : triplet5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c triplet5.c triplet6.o : triplet6.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c triplet6.c fft.o : fft.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c fft.c fftFunctions.o : fftFunctions.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c fftFunctions.c Falign.o : Falign.c $(HEADER) $(FFTHEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c Falign.c Falign_localhom.o : Falign_localhom.c $(HEADER) $(FFTHEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c Falign_localhom.c mtxutl.o : mtxutl.c $(CC) $(MYCFLAGS) -c mtxutl.c addfunctions.o : addfunctions.c $(HEADER) $(CC) $(MYCFLAGS) -c addfunctions.c score.o : score.c $(HEADER) $(CC) $(MYCFLAGS) -c score.c clean : rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) # rm -f ../binaries/* ../scripts/* install : all mkdir -p $(LIBDIR) chmod 755 $(LIBDIR) mkdir -p $(BINDIR) chmod 755 $(BINDIR) chmod 755 $(SCRIPTS) $(INSTALL) $(SCRIPTS) $(BINDIR) chmod 755 $(PROGS) $(INSTALL) -s $(PROGS) $(LIBDIR) $(INSTALL) $(PERLPROGS) $(LIBDIR) $(INSTALL) -m 644 $(MANPAGES) $(LIBDIR) ( cd $(BINDIR); \ rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \ rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi; \ ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \ ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \ ln -s mafft einsi; \ ln -s mafft mafft-linsi; ln -s mafft mafft-ginsi; ln -s mafft mafft-fftns; \ ln -s mafft mafft-fftnsi; ln -s mafft mafft-nwns; ln -s mafft mafft-nwnsi; \ ln -s mafft mafft-einsi; ln -s mafft mafft-xinsi; ln -s mafft mafft-qinsi;\ rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \ rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . ) mkdir -p $(MANDIR) chmod 755 $(MANDIR) $(INSTALL) -m 644 $(MANPAGES) $(MANDIR) # remove incorrectly installed manpages by previous versions # rm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1 mafft-7.123-without-extensions/core/rna.c0000640000076500007650000003613412172746471017423 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define MEMSAVE 1 #define DEBUG 1 #define USE_PENALTY_EX 1 #define STOREWM 1 static float singleribosumscore( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2 ) { float val; int i, j; int code1, code2; val = 0.0; for( i=0; i 3 ) code1 = 36; code2 = amino_n[(int)s2[j][p2]]; if( code2 > 3 ) code2 = 36; // fprintf( stderr, "'l'%c-%c: %f\n", s1[i][p1], s2[j][p2], (float)ribosumdis[code1][code2] ); val += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j]; } return( val ); } static float pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 ) { float val; int i, j; int code1o, code1u, code2o, code2u, code1, code2; val = 0.0; for( i=0; i 3 ) code1 = code1o = 36; else if( code1u > 3 ) code1 = 36; else code1 = 4 + code1o * 4 + code1u; code2o = amino_n[(int)s2[j][p2]]; code2u = amino_n[(int)s2[j][c2]]; if( code2o > 3 ) code2 = code1o = 36; else if( code2u > 3 ) code2 = 36; else code2 = 4 + code2o * 4 + code2u; // fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (float)ribosumdis[code1][code2] ); if( code1 == 36 || code2 == 36 ) val += (float)n_dis[code1o][code2o] * eff1[i] * eff2[j]; else val += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j]; } return( val ); } static float pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 ) { float val; int i, j; int code1o, code1u, code2o, code2u, code1, code2; val = 0.0; for( i=0; i 3 ) code1 = code1o = 36; else if( code1u > 3 ) code1 = 36; else code1 = 4 + code1u * 4 + code1o; code2o = amino_n[(int)s2[j][p2]]; code2u = amino_n[(int)s2[j][c2]]; if( code2o > 3 ) code2 = code1o = 36; else if( code2u > 3 ) code2 = 36; else code2 = 4 + code2u * 4 + code2o; // fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (float)ribosumdis[code1][code2] ); if( code1 == 36 || code2 == 36 ) val += (float)n_dis[code1o][code2o] * eff1[i] * eff2[j]; else val += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j]; } return( val ); } static void mccaskillextract( char **seq, char **nogap, int nseq, RNApair **pairprob, RNApair ***single, int **sgapmap, double *eff ) { int lgth; int nogaplgth; int i, j; int left, right, adpos; float prob; static TLS int *pairnum; RNApair *pt, *pt2; lgth = strlen( seq[0] ); pairnum = calloc( lgth, sizeof( int ) ); for( i=0; ibestpos!=-1; pt++ ) { left = sgapmap[i][j]; right = sgapmap[i][pt->bestpos]; prob = pt->bestscore; for( pt2=pairprob[left]; pt2->bestpos!=-1; pt2++ ) if( pt2->bestpos == right ) break; // fprintf( stderr, "i,j=%d,%d, left=%d, right=%d, pt=%d, pt2->bestpos = %d\n", i, j, left, right, pt-single[i][j], pt2->bestpos ); if( pt2->bestpos == -1 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); adpos = pairnum[left]; pairnum[left]++; pairprob[left][adpos].bestscore = 0.0; pairprob[left][adpos].bestpos = right; pairprob[left][adpos+1].bestscore = -1.0; pairprob[left][adpos+1].bestpos = -1; pt2 = pairprob[left]+adpos; } else adpos = pt2-pairprob[left]; pt2->bestscore += prob * eff[i]; if( pt2->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); // fprintf( stderr, "pairprob[0][0].bestpos=%d\n", pairprob[0][0].bestpos ); // fprintf( stderr, "pairprob[0][0].bestscore=%f\n", pairprob[0][0].bestscore ); } } // fprintf( stderr, "before taikakuka\n" ); for( i=0; i -1 ) { // pairprob[i][j].bestscore /= (float)nseq; // fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][j].bestpos, pairprob[i][j].bestscore, seq[0][i], seq[0][pairprob[i][j].bestpos] ); } } #if 0 for( i=0; i %d\n", i, j, right, i ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = pairprob[i][j].bestscore; pairprob[right][pairnum[right]].bestpos = i; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; } #endif free( pairnum ); } void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob ) { int lgth; int i; static TLS int *order = NULL; static TLS char **name = NULL; char gett[1000]; FILE *fp; int left, right, dumm; float prob; static TLS int pid; static TLS char fnamein[100]; static TLS char cmd[1000]; static TLS int *pairnum; lgth = strlen( seq[0] ); if( order == NULL ) { pid = (int)getpid(); sprintf( fnamein, "/tmp/_rnaalifoldin.%d", pid ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, 10 ); for( i=0; i 50.0 && prob > pairprob[left][0].bestscore ) { pairprob[left][0].bestscore = prob; pairprob[left][0].bestpos = right; #else if( prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob / 100.0; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob / 100.0; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; fprintf( stderr, "%d-%d, %f\n", left, right, prob ); #endif } } fclose( fp ); sprintf( cmd, "rm -f %s", fnamein ); system( cmd ); for( i=0; i -1 ) { pairprob[right][0].bestpos = i; pairprob[right][0].bestscore = pairprob[i][0].bestscore; } } #if 0 for( i=0; i -1 ) pairprob[i][0].bestscore = 1.0; // atode kesu #endif // fprintf( stderr, "after taikakuka in rnaalifoldcall\n" ); // for( i=0; iori\n%s\n", oseq1[0] ); fprintf( stdout, ">rev\n%s\n", oseq1r[0] ); } #endif /* similarity score */ Lalignmm_hmout( oseq1, oseq2, eff1, eff2, nseq1, nseq2, 10000, NULL, NULL, NULL, NULL, map ); if( 1 ) { if( RNAscoremtx == 'n' ) { for( i=0; ibestpos!=-1; pairpt1++ ) { for( j=0; jbestpos!=-1; pairpt2++ ) { uido = pairpt1->bestpos; ujdo = pairpt2->bestpos; prob = pairpt1->bestscore * pairpt2->bestscore; if( uido > -1 && ujdo > -1 ) { if( uido > i && j > ujdo ) { impmtx2[i][j] += prob * pairedribosumscore53( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j, uido, ujdo ) * consweight_multi; tbppmtx[i][j] -= prob; } else if( i < uido && j < ujdo ) { impmtx2[i][j] += prob * pairedribosumscore35( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j, uido, ujdo ) * consweight_multi; tbppmtx[i][j] -= prob; } } } } for( i=0; ibestpos!=-1; pairpt1++ ) { // if( pairprob1[i] == NULL ) continue; for( j=0; jbestpos!=-1; pairpt2++ ) { // fprintf( stderr, "i=%d, j=%d, pn1=%d, pn2=%d\n", i, j, pairpt1-pairprob1[i], pairpt2-pairprob2[j] ); // if( pairprob2[j] == NULL ) continue; uido = pairpt1->bestpos; ujdo = pairpt2->bestpos; prob = pairpt1->bestscore * pairpt2->bestscore; // prob = 1.0; // fprintf( stderr, "i=%d->uido=%d, j=%d->ujdo=%d\n", i, uido, j, ujdo ); // fprintf( stderr, "impmtx2[%d][%d] = %f\n", i, j, impmtx2[i][j] ); // if( i < uido && j > ujdo ) continue; // if( i > uido && j < ujdo ) continue; // posdistj = abs( ujdo-j ); // if( uido > -1 && ujdo > -1 ) if( uido > -1 && ujdo > -1 && ( ( i > uido && j > ujdo ) || ( i < uido && j < ujdo ) ) ) { { impmtx2[i][j] += MAX( 0, map[uido][ujdo] ) * consweight_rna * 600 * prob; // osoi } } } } for( i=0; istart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = pt - seq1[i] - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = pt - seq1[i] - 0; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = pt - seq1[i] - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = pt - seq2[j] - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = pt - seq2[j] - 0; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = pt - seq2[j] - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 1 k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; // impmtx[k1][k2] += tmpptr->fimportance * effij; if( tmpptr->korh == 'k' ) impmtx[k1][k2] += tmpptr->fimportance * effij_kozo; else impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "#### impmtx[k1][k2] = %f, tmpptr->fimportance=%f, effij=%f\n", impmtx[k1][k2], tmpptr->fimportance, effij ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); // fprintf( stderr, "%d (%c) - %d (%c) - %f\n", k1, *pt1, k2, *pt2, tmpptr->fimportance * effij ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } #else while( k1 <= end1 && k2 <= end2 ) { fprintf( stderr, "k1,k2=%d,%d - ", k1, k2 ); if( !nocount1[k1] && !nocount2[k2] ) { impmtx[k1][k2] += tmpptr->wimportance * eff1[i] * eff2[j] * fastathreshold; fprintf( stderr, "marked\n" ); } else fprintf( stderr, "no count\n" ); k1++; k2++; } #endif tmpptr = tmpptr->next; } } } #if 0 if( clus1 == 1 && clus2 == 1 ) { fprintf( stderr, "writing impmtx\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "impmtx = \n" ); for( k2=0; k2start1 ); fprintf( stderr, "end1 = %d\n", localhom[i][j]->end1 ); fprintf( stderr, "j = %d, seq2 = %s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->start1 ) break; } start1 = pt - seq1[i] - 1; while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, localhom[i][j].end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->end1 ) break; } end1 = pt - seq1[i] - 1; pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->start2 ) break; } start2 = pt - seq2[j] - 1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->end2 ) break; } end2 = pt - seq2[j] - 1; // fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); k1 = start1; k2 = start2; fprintf( stderr, "step 0\n" ); while( k1 <= end1 && k2 <= end2 ) { #if 0 if( !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] += localhom[i][j].wimportance * eff1[i] * eff2[j]; k1++; k2++; #else if( !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] += localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; #endif } dif = ( end1 - start1 ) - ( end2 - start2 ); fprintf( stderr, "dif = %d\n", dif ); if( dif > 0 ) { do { fprintf( stderr, "dif = %d\n", dif ); k1 = start1; k2 = start2 - dif; while( k1 <= end1 && k2 <= end2 ) { if( 0 <= k2 && start2 <= k2 && !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; } } while( dif-- ); } else { do { k1 = start1 + dif; k2 = start2; while( k1 <= end1 ) { if( k1 >= 0 && k1 >= start1 && !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; } } while( dif++ ); } } } #if 0 fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; float wm; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_sc( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_sc( gapmap1[iin], gapmap2[jin] ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = '-'; *--gaptable2 = '-'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; // float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS float *ogcp1; static TLS float *ogcp2; static TLS float *fgcp1; static TLS float *fgcp2; static TLS float **cpmx1; static TLS float **cpmx2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS float *gapfreq1; static TLS float *gapfreq2; float fpenalty = (float)penalty; float *fgcp2pt; float *ogcp2pt; float fgcp1va; float ogcp1va; float *gf2pt; float *gf2ptpre; float gf1va; float gf1vapre; float headgapfreq1; float headgapfreq2; if( seq1 == NULL ) { if( orlgth1 ) { // fprintf( stderr, "## Freeing local arrays in A__align\n" ); orlgth1 = 0; orlgth2 = 0; imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0 ); free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } else { // fprintf( stderr, "## Not allocated\n" ); } return( 0.0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 == 0 || lgth2 == 0 ) { fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif if( lgth1 == 0 && lgth2 == 0 ) return( 0.0 ); if( lgth1 == 0 ) { for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); gapfreq1 = AllocateFloatVec( ll1+2 ); gapfreq2 = AllocateFloatVec( ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { float t = 0.0; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( headgp == 1 ) { for( i=1; i", wm ); fprintf( stderr, "%5.0f? (penal=%5.2f)", g=mi+*fgcp2pt*(1.0-gapfreq1[i]), *fgcp2pt*(1.0-gapfreq1[i]) ); #endif if( (g=mi+*fgcp2pt*gf1va) > wm ) { wm = g; *ijppt = -( j - mpi ); // fprintf( stderr, "Jump to %d (%c)!", mpi, seq2[0][mpi] ); } if( (g=*prept+*ogcp2pt*gf1vapre) >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); fprintf( stderr, "%5.0f? (penal=%5.2f)", g=*mjpt+fgcp1va*(1.0-gapfreq2[j]), fgcp1va*(1.0-gapfreq2[j]) ); #endif if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); // fprintf( stderr, "Jump to %d (%c)!", *mpjpt, seq1[0][*mpjpt] ); } if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fgcp2pt++; ogcp2pt++; gf2ptpre++; gf2pt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i #include #include double getrusage_sec() { struct rusage t; struct timeval tv; getrusage(RUSAGE_SELF, &t); tv = t.ru_utime; return tv.tv_sec + (double)tv.tv_usec*1e-6; } #endif int intcmp( int *str1, int *str2 ) { while( *str1 != -1 && *str2 != -1 ) if( *str1++ != *str2++ ) return( 1 ); if( *str1 != *str2 ) return( 1 ); return( 0 ); } char **arguments( int argc, char *argv[] ) { int c = 0; fmodel = 0; nblosum = 62; calledByXced = 0; devide = 0; fftscore = 1; use_fft = 1; alg = 'A'; weight = 0; utree = 1; tbutree = 0; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 0; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; scoremtx = 1; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = 0; // chokusetsu yobareru kara kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; TMorJTT = JTT; treemethod = 'x'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'P': dorp = 'p'; break; case 'D': dorp = 'd'; break; case 'F': use_fft = 1; break; case 'N': use_fft = 0; break; case 'e': fftscore = 0; break; case 'Q': alg = 'Q'; break; case 'H': alg = 'H'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'd': disp = 1; break; case 'O': outgap = 0; break; case 'a': fmodel = 1; break; case 'u': tbrweight = 0; break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 2 ) { fprintf( stderr, "options: Check source file ! %c ?\n", c ); exit( 1 ); } fprintf( stderr, "tbitr = %d, tbrweight = %d, tbweight = %d\n", tbitr, tbrweight, tbweight ); // readOtherOptions( &ppid, &fftThreshold, &fftWinSize ); return( argv ); } void GroupAlign( int nseq1, int nseq2, char **name, int *nlen, char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double **len, double *eff, int alloclen ) { int i; int clus1, clus2; int s1, s2; float pscore; static char **name1, **name2; double *effarr = eff; double *effarr1 = NULL; double *effarr2 = NULL; static char *indication1, *indication2; float dumfl = 0.0; int intdum; #if DEBUG double time1, time2; #endif // fprintf( stderr, "in GroupAlign fftWinSize = %d\n", fftWinSize ); // fprintf( stderr, "in GroupAlign fftThreshold = %d\n", fftThreshold ); if( effarr1 == NULL ) { name1 = AllocateCharMtx( nseq1, B ); name2 = AllocateCharMtx( nseq2, B ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); #if 0 #else #endif } for( i=0; i 30000 || len2 > 30000 ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; } GroupAlign( nseq1, nseq2, name, nlen, seq, aseq, mseq1, mseq2, topol, len, eff, alloclen ); #if 0 writePre( njob, name, nlen, aseq, 1 ); #else writeDataforgaln( stdout, njob, name, nlen, aseq ); #endif SHOWVERSION; return( 0 ); } mafft-7.123-without-extensions/core/sextet5.c0000640000076500007650000001371012042130647020223 0ustar katohkatoh#include "mltaln.h" #include "mtxutl.h" #define DEBUG 0 #define TEST 0 #define END_OF_VEC -1 static int maxl; static int tsize; void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; disopt = 0; scoremtx = 1; nblosum = 62; dorp = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'I': disopt = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } void seq_grp_nuc( int *grp, char *seq ) { int tmp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) ); } *grp = END_OF_VEC; } void seq_grp( int *grp, char *seq ) { int tmp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) ); } *grp = END_OF_VEC; } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } int commonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; // fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize ); } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } int main( int argc, char **argv ) { int i, j; FILE *fp, *infp; char **seq; int *grpseq; char *tmpseq; int **pointt; static char **name; static int nlen[M]; double **mtx; double **mtx2; double score, score0; static short *table1; char b[B]; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); tmpseq = AllocateCharVec( nlenmax+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); pamN = NOTSPECIFIED; #if 0 FRead( infp, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); else tsize = (int)pow( 6, 6 ); maxl = 0; for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } for( i=0; i all positive double ribosum4[4][4] = { // a g c t { 2.22, -1.46, -1.86, -1.39, }, // a { -1.46, 1.03, -2.48, -1.74, }, // g { -1.86, -2.48, 1.16, -1.05, }, // c { -1.39, -1.74, -1.05, 1.65, }, // t }; double ribosum16[16][16] = { // aa ag ac at ga gg gc gt ca cg cc ct ta tg tc tt { -2.49, -8.24, -7.04, -4.32, -6.86, -8.39, -5.03, -5.84, -8.84, -4.68, -14.37, -12.64, -4.01, -6.16, -11.32, -9.05, }, // aa { -8.24, -0.80, -8.89, -5.13, -8.61, -5.38, -5.77, -6.60, -10.41, -4.57, -14.53, -10.14, -5.43, -5.94, -8.87, -11.07, }, // ag { -7.04, -8.89, -2.11, -2.04, -9.73, -11.05, -3.81, -4.72, -9.37, -5.86, -9.08, -10.45, -5.33, -6.93, -8.67, -7.83, }, // ac { -4.32, -5.13, -2.04, 4.49, -5.33, -5.61, 2.70, 0.59, -5.56, 1.67, -6.71, -5.17, 1.61, -0.51, -4.81, -2.98, }, // at { -6.86, -8.61, -9.73, -5.33, -1.05, -8.67, -4.88, -6.10, -7.98, -6.00, -12.43, -7.71, -5.85, -7.55, -6.63, -11.54, }, // ga { -8.39, -5.38, -11.05, -5.61, -8.67, -1.98, -4.13, -5.77, -11.36, -4.66, -12.58, -13.69, -5.75, -4.27, -12.01, -10.79, }, // gg { -5.03, -5.77, -3.81, 2.70, -4.88, -4.13, 5.62, 1.21, -5.95, 2.11, -3.70, -5.84, 1.60, -0.08, -4.49, -3.90, }, // gc { -5.84, -6.60, -4.72, 0.59, -6.10, -5.77, 1.21, 3.47, -7.93, -0.27, -7.88, -5.61, -0.57, -2.09, -5.30, -4.45, }, // gt { -8.84, -10.41, -9.37, -5.56, -7.98, -11.36, -5.95, -7.93, -5.13, -3.57, -10.45, -8.49, -2.42, -5.63, -7.08, -8.39, }, // ca { -4.68, -4.57, -5.86, 1.67, -6.00, -4.66, 2.11, -0.27, -3.57, 5.36, -5.71, -4.96, 2.75, 1.32, -4.91, -3.67, }, // cg { -14.37, -14.53, -9.08, -6.71, -12.43, -12.58, -3.70, -7.88, -10.45, -5.71, -3.59, -5.77, -6.88, -8.41, -7.40, -5.41, }, // cc { -12.64, -10.14, -10.45, -5.17, -7.71, -13.69, -5.84, -5.61, -8.49, -4.96, -5.77, -2.28, -4.72, -7.36, -3.83, -5.21, }, // ct { -4.01, -5.43, -5.33, 1.61, -5.85, -5.75, 1.60, -0.57, -2.42, 2.75, -6.88, -4.72, 4.97, 1.14, -2.98, -3.39, }, // ta { -6.16, -5.94, -6.93, -0.51, -7.55, -4.27, -0.08, -2.09, -5.63, 1.32, -8.41, -7.36, 1.14, 3.36, -4.76, -4.28, }, // tg { -11.32, -8.87, -8.67, -4.81, -6.63, -12.01, -4.49, -5.30, -7.08, -4.91, -7.40, -3.83, -2.98, -4.76, -3.21, -5.97, }, // tc { -9.05, -11.07, -7.83, -2.98, -11.54, -10.79, -3.90, -4.45, -8.39, -3.67, -5.41, -5.21, -3.39, -4.28, -5.97, -0.02, }, // tt }; int locpenaltyn = -1750; char locaminon[] = "agctuAGCTUnNbdhkmnrsvwyx-O"; char locgrpn[] = { 0, 1, 2, 3, 3, 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; int exgpn = +00; int locn_disn[26][26] = /* u ha constants.c no nakade shori */ /* 0 - 4 dake yomareru. */ { { 1000, 600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 600, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 1000, 600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 600, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 500, 500, 0, 0, 0, 500, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, 0, 500, }, }; mafft-7.123-without-extensions/core/rnatest.c0000640000076500007650000002353412176060631020312 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'R': fftRepeatStop = 1; break; #endif case 'Q': calledByXced = 1; break; case 's': treemethod = 's'; break; case 'x': disp = 1; break; case 'p': treemethod = 'p'; break; #if 0 case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 'L': alg = 'L'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int *nlen; static char **name, **seq, **useq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double *eff; int i; FILE *infp; char c; int alloclen; RNApair **pair1; RNApair **pair2; float **map; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob > M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); seq = AllocateCharMtx( njob, nlenmax*5+1 ); useq = AllocateCharMtx( njob, nlenmax*5+1 ); aseq = AllocateCharMtx( njob, nlenmax*5+1 ); bseq = AllocateCharMtx( njob, nlenmax*5+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*5; pair1 = calloc( nlenmax*5+1, sizeof( RNApair *) ); pair2 = calloc( nlenmax*5+1, sizeof( RNApair *) ); map = AllocateFloatMtx( nlenmax+1, nlenmax ); eff = AllocateDoubleVec( njob ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); for( i=0; i %c ", seq[0][pair1[i].pos], seq[1][map12[pair1[i].pos].pos] ); if( pair2[map12[pair1[i].pos].pos].pos == -1 ) continue; fprintf( stderr, "%d:%d (%c)", map12[pair1[i].pos].pos, pair2[map12[pair1[i].pos].pos].pos, seq[1][pair2[map12[pair1[i].pos].pos].pos] ); } #endif exit( 1 ); pairalign( name, nlen, bseq, aseq, mseq1, mseq2, eff, alloclen ); fprintf( trap_g, "done.\n" ); #if DEBUG fprintf( stderr, "closing trap_g\n" ); #endif fclose( trap_g ); // writePre( njob, name, nlen, aseq, !contin ); #if 0 writeData( stdout, njob, name, nlen, aseq ); #endif #if IODEBUG fprintf( stderr, "OSHIMAI\n" ); #endif SHOWVERSION; return( 0 ); #endif } mafft-7.123-without-extensions/core/dndfast7.c0000640000076500007650000001635612042126713020344 0ustar katohkatoh#include "mltaln.h" #include #include #define DEBUG 0 #define TEST 0 int howmanyx( char *s ) { int val = 0; if( scoremtx == -1 ) { do { if( !strchr( "atgcuATGCU", *s ) ) val++; } while( *++s ); } else { do { if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; } while( *++s ); } return( val ); } void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; disopt = 0; divpairscore = 0; swopt = ""; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': disopt = 1; break; case 'A': swopt = "-A"; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { int ktuple; int i, j; FILE *hat2p; FILE *hat3p; FILE *infp; char **seq = NULL; // by D.Mathog char **seq1; char **name; char **name1; static int nlen1[M]; double **mtx; double **mtx2; static int nlen[M]; static char b[B]; double max; char com[1000]; int opt[M]; int res; char *home; char *fastapath; char queryfile[B]; char datafile[B]; char fastafile[B]; char hat2file[B]; int pid = (int)getpid(); LocalHom **localhomtable, *tmpptr; #if 0 home = getenv( "HOME" ); #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ home = NULL; #endif fastapath = getenv( "FASTA_4_MAFFT" ); if( !fastapath ) fastapath = "fasta34"; #if DEBUG if( home ) fprintf( stderr, "home = %s\n", home ); #endif if( !home ) home = ""; sprintf( queryfile, "%s/tmp/query-%d", home, pid ); sprintf( datafile, "%s/tmp/data-%d", home, pid ); sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); sprintf( hat2file, "hat2-%d", pid ); arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else dorp = NOTSPECIFIED; getnumlen( infp ); #endif if( dorp == 'd' ) { scoremtx = -1; pamN = NOTSPECIFIED; } else { nblosum = 62; scoremtx = 1; } constants( njob, seq ); rewind( infp ); name = AllocateCharMtx( njob, B+1 ); name1 = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); seq1 = AllocateCharMtx( 2, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i %s", fastapath, swopt, M, M, M, queryfile, datafile, ktuple, fastafile ); else sprintf( com, "%s %s -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", fastapath, swopt, M, M, M, queryfile, datafile, ktuple, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); if( scoremtx == -1 ) res = ReadFasta34m10_nuc( hat2p, mtx[i], i, name1, localhomtable[i] ); else res = ReadFasta34m10( hat2p, mtx[i], i, name1, localhomtable[i] ); fclose( hat2p ); if( res < njob - i ) { fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); exit( 1 ); } if( i == 0 ) for( j=0; j %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); } } } for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } fclose( hat3p ); #endif sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile ); system( com ); #if 0 sprintf( com, ALNDIR "/supgsdl < %s", hat2file ); res = system( com ); if( res ) ErrorExit( "error in spgsdl" ); #endif sprintf( com, "mv %s hat2", hat2file ); res = system( com ); if( res ) ErrorExit( "error in mv" ); SHOWVERSION; exit( 0 ); } mafft-7.123-without-extensions/core/countlen.c0000640000076500007650000000204511020654703020447 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 void arguments( int argc, char *argv[] ) { int c; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; int nlenmin; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; dorp = NOTSPECIFIED; getnumlen_nogap( infp, &nlenmin ); fprintf( stdout, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); return( 0 ); } mafft-7.123-without-extensions/core/mafftash_premafft.tmpl0000640000076500007650000001514312222164347023037 0ustar katohkatoh#!/usr/bin/perl ########################################################### # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) # # Date Changelog ########################################################### # 07.26.13 Initial release # 09.03.13 Added extensive warnings and error messages # ########################################################### use Getopt::Long; use LWP::Simple; use LWP::UserAgent; my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi"; my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE ); GetOptions ( 'd=s' => \$WORKDIR, 'p=s' => \$PDBLIST, 'o=s' => \$OWNLIST, 'h=s' => \$HAT3FILE, 'i=s' => \$INSTRFILE, ); my $PDBLISTTMP = "/tmp/mafftash-rest-$$.pdb.inp"; unlink $PDBLISTTMP if -e $PDBLISTTMP; ###### # validation &help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST); &help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR; $HAT3FILE = "hat3" unless defined $HAT3FILE; $INSTRFILE = "instr" unless defined $INSTRFILE; chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g; ###### # prepare inputs my @files = (); push(@files, "strweight" => "0.5"); push(@files, "premafft" => "1"); # pdb entries if ( defined $PDBLIST ) { &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST; if ( open(INPF,"<$PDBLIST") ) { if ( open(OUTF,">$PDBLISTTMP") ) { while() { chomp; if (/^(\w{5})$/) { print OUTF ">PDBID\n$1\n"; } } close OUTF; } else { close INPF; &bail("Error: Cannot open temporary file $PDBLISTTMP for writing!"); } close INPF; } else { &bail("Error: Cannot open file $PDBLIST for reading!"); } push(@files, "inputfile" => ["$PDBLISTTMP"]); } # upload own structures my %ownids = (); if ( defined $OWNLIST ) { &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST; if ( open(OWNINPF,"<$OWNLIST") ) { while() { chomp; if ( /^(\w{5})$/ ) { my $fileref = "$WORKDIR/$1.pdb"; unless (-e $fileref) { close OWNINPF; &bail("Error: File $fileref does not exists!"); } push(@files, "inputownfile[]" => ["$fileref"]); $ownids{$1} = 1; } } close OWNINPF; } else { &bail("Error: Cannot open file $OWNLIST for reading!"); } } ###### # start rest service my $browser = LWP::UserAgent->new; $browser->timeout(0); # post: running a mafftash job my $postResponse = $browser->post ( $BASEURL, \@files, 'Content_Type' => 'form-data' ); #&bail(sprintf("[%d] %s\n", $postResponse->code, $postResponse->message)) unless($postResponse->is_success); &bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success); # get response from post request my ($status, $mafftashid) = &parseResponse($postResponse->content); # wait for results until it becomes available while(1) { sleep 5; # get: get results for mafftash job my $getResponse = $browser->get("$BASEURL/premafft/$mafftashid"); if ( $getResponse->is_success ) { # get response from get request ($status, $mafftashid) = &parseResponse($getResponse->content); # job is not yet done. wait if ( $status eq "done") { &bail("Error retrieving hat3 file!") unless ( getstore("$BASEURL/premafft/hat3/$mafftashid", $HAT3FILE) == 200 ); &bail("Error retrieving instr file!") unless ( getstore("$BASEURL/premafft/instr/$mafftashid", $INSTRFILE) == 200 ); last; } next; } else { #&bail(sprintf("[%d] %s\n", $getResponse->code, $getResponse->message)); &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content))); } } # make sure outputs were generated &bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE; &bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE; # warn if some ownids were ommitted if ( scalar keys(%ownids) > 0 ) { my %instrids = (); if ( open(INSTRF,"<$INSTRFILE") ) { while() { chomp; if ( /^>\d+_(\w{5})$/ ) { $instrids{$1} = 1; } } close INSTRF; foreach my $id ( keys %ownids ) { warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id}; } } else { &bail("Error: Cannot open file $INSTRFILE for reading!"); } } unlink $PDBLISTTMP if defined $PDBLISTTMP && -e $PDBLISTTMP; #################### #################### sub parseResponse { my $response = shift; #"status":"wait","mafftashid":"Ma8211432R" my $status = ( $response =~ /\"status\":\"([^\s\"]+)\"/ ) ? $1 : ""; my $mafftashid = ( $response =~ /\"mafftashid\":\"([^\s\"]+)\"/ ) ? $1 : ""; return ($status, $mafftashid); } sub parseError { my $response = shift; #"error":"Invalid number of inputs found." my $errorstr = ( $response =~ /\"error\":\"([^\"]+)\"/ ) ? $1 : ""; return $errorstr; } sub bail { my $str = shift; print STDERR "$str\n" if defined $str; unlink $PDBLISTTMP if defined $PDBLISTTMP && -e $PDBLISTTMP; exit(1); } sub help { my $str = shift; print <<'HELPME'; USAGE ./mafftash_premafft.pl -p [FILE] ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY] ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY] PARAMETERS -p [FILE] FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format -o [FILE] -d [DIRECTORY] FILE contains a list of IDs from your own structure/pdb files (one entry per line), IDs should be in the standard 5-character pdbid+chain naming format; for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY -h [HATFILE] save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory -i [INSTRFILE] save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory HELPME &bail($str); } mafft-7.123-without-extensions/core/tbfast.c0000640000076500007650000016374612225727262020134 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 static int nadd; static int treein; static int topin; static int treeout; static int distout; static int noalign; static int multidist; static int subalignment; static int subalignmentoffset; #ifdef enablemultithread typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _distancematrixthread_arg { int njob; int thread_no; float *selfscore; float **iscore; char **seq; Jobtable *jobpospt; pthread_mutex_t *mutex; } distancematrixthread_arg_t; typedef struct _treebasethread_arg { int thread_no; int *nrunpt; int njob; int *nlen; int *jobpospt; int ***topol; Treedep *dep; char **aseq; double *effarr; int *alloclenpt; LocalHom **localhomtable; RNApair ***singlerna; double *effarr_kozo; int *fftlog; char *mergeoralign; pthread_mutex_t *mutex; pthread_cond_t *treecond; } treebasethread_arg_t; #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; outnumber = 0; scoreout = 0; treein = 0; topin = 0; rnaprediction = 'm'; rnakozo = 0; nevermemsave = 0; inputfile = NULL; addfile = NULL; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; // chuui force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNAppenalty_ex = NOTSPECIFIED; RNApthr = NOTSPECIFIED; TMorJTT = JTT; consweight_multi = 1.0; consweight_rna = 0.0; multidist = 0; subalignment = 0; subalignmentoffset = 0; legacygapcost = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; case 'R': rnaprediction = 'r'; break; case 's': RNAscoremtx = 'r'; break; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case 't': treeout = 1; break; case 'T': noalign = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'L': legacygapcost = 1; break; #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif case 'S': scoreout = 1; break; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; case 's': treemethod = 's'; break; #endif case 'X': treemethod = 'X'; break; case 'E': treemethod = 'E'; break; case 'q': treemethod = 'q'; break; case 'n' : outnumber = 1; break; #if 0 case 'a': alg = 'a'; break; case 'H': alg = 'H'; break; #endif case 'Q': alg = 'Q'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'N': nevermemsave = 1; break; case 'B': break; case 'F': use_fft = 1; break; case 'G': force_fft = 1; use_fft = 1; break; case 'U': treein = 1; break; case 'V': topin = 1; break; case 'u': tbrweight = 0; weight = 0; break; case 'v': tbrweight = 3; break; case 'd': multidist = 1; break; #if 0 case 'd': disp = 1; break; #endif /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } #if 0 static void *distancematrixthread2( void *arg ) { distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; float *selfscore = targ->selfscore; float **iscore = targ->iscore; char **seq = targ->seq; Jobtable *jobpospt = targ->jobpospt; float ssi, ssj, bunbo; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); i = jobpospt->i; i++; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } jobpospt->i = i; pthread_mutex_unlock( targ->mutex ); ssi = selfscore[i]; if( i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); for( j=i+1; jnjob; int thread_no = targ->thread_no; float *selfscore = targ->selfscore; float **iscore = targ->iscore; char **seq = targ->seq; Jobtable *jobpospt = targ->jobpospt; float ssi, ssj, bunbo; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; j = i + 1; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } } jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex ); if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); ssi = selfscore[i]; ssj = selfscore[j]; bunbo = MIN( ssi, ssj ); if( bunbo == 0.0 ) iscore[i][j-i] = 1.0; else iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty ) / bunbo; } } static void *treebasethread( void *arg ) { treebasethread_arg_t *targ = (treebasethread_arg_t *)arg; int *nrunpt = targ->nrunpt; int thread_no = targ->thread_no; int njob = targ->njob; int *nlen = targ->nlen; int *jobpospt = targ->jobpospt; int ***topol = targ->topol; Treedep *dep = targ->dep; char **aseq = targ->aseq; double *effarr = targ->effarr; int *alloclen = targ->alloclenpt; LocalHom **localhomtable = targ->localhomtable; RNApair ***singlerna = targ->singlerna; double *effarr_kozo = targ->effarr_kozo; int *fftlog = targ->fftlog; char *mergeoralign = targ->mergeoralign; char **mseq1, **mseq2; char **localcopy; int i, j, l; int len1, len2; int clus1, clus2; float pscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; double *effarr1_kozo = NULL; double *effarr2_kozo = NULL; LocalHom ***localhomshrink = NULL; int m1, m2; float dumfl = 0.0; int ffttry; RNApair ***grouprna1 = NULL, ***grouprna2 = NULL; mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); localcopy = calloc( njob, sizeof( char * ) ); if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); #if 0 #else if( constraint ) { localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) ); for( i=0; i main thread if( constraint ) calcimportance( njob, effarr, aseq, localhomtable ); #endif // writePre( njob, name, nlen, aseq, 0 ); // for( l=0; lmutex ); l = *jobpospt; if( l == njob-1 ) { pthread_mutex_unlock( targ->mutex ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); free( mseq1 ); free( mseq2 ); free( localcopy ); free( effarr1 ); free( effarr2 ); free( effarr1_kozo ); free( effarr2_kozo ); free( indication1 ); free( indication2 ); if( rnakozo && rnaprediction == 'm' ) { if( grouprna1 ) free( grouprna1 ); // nakami ha? if( grouprna2 ) free( grouprna2 ); // nakami ha? grouprna1 = grouprna2 = NULL; } if( constraint ) { if( localhomshrink ) // nen no tame { for( i=0; itreecond, targ->mutex ); } if( dep[l].child1 != -1 ) { while( dep[dep[l].child1].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } // while( *nrunpt >= nthread ) // pthread_cond_wait( targ->treecond, targ->mutex ); (*nrunpt)++; // pthread_mutex_unlock( targ->mutex ); if( mergeoralign[l] == 'n' ) { // fprintf( stderr, "SKIP!\n" ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); pthread_mutex_unlock( targ->mutex ); continue; } m1 = topol[l][0][0]; m2 = topol[l][1][0]; // pthread_mutex_lock( targ->mutex ); len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen <= len1 + len2 ) { fprintf( stderr, "\nReallocating (by thread %d) ..", thread_no ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); fprintf( stderr, "done. *alloclen = %d\n", *alloclen ); } for( i=0; (j=topol[l][0][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); } for( i=0; (j=topol[l][1][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); } pthread_mutex_unlock( targ->mutex ); if( effarr_kozo ) { clus1 = fastconjuction_noname_kozo( topol[l][0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); clus2 = fastconjuction_noname_kozo( topol[l][1], localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 ); } else { clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 ); clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 ); } #if 1 fprintf( stderr, "\rSTEP % 5d /%d (thread %4d) ", l+1, njob-1, thread_no ); #else fprintf( stderr, "STEP %d /%d (thread %d) \n", l+1, njob-1, thread_no ); fprintf( stderr, "group1 = %.66s", indication1 ); if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, ", child1 = %d\n", dep[l].child0 ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, ", child2 = %d\n", dep[l].child1 ); fprintf( stderr, "Group1's lengths = " ); for( i=0; i 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } else if( alg == 'H' ) { imp_match_init_strictH( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } else if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } else if( alg == 'R' ) { imp_match_init_strictR( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'A' ): pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'Q' ): pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'R' ): pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'H' ): pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif /* fprintf( stderr, "after align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "after align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ // writePre( njob, name, nlen, localcopy, 0 ); if( disp ) display( localcopy, njob ); pthread_mutex_lock( targ->mutex ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); // pthread_mutex_unlock( targ->mutex ); // pthread_mutex_lock( targ->mutex ); for( i=0; (j=topol[l][0][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); for( i=0; (j=topol[l][1][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); pthread_mutex_unlock( targ->mutex ); for( i=0; (j=topol[l][0][i])!=-1; i++ ) free( localcopy[j] ); for( i=0; (j=topol[l][1][i])!=-1; i++ ) free( localcopy[j] ); free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); } } #endif void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo ) { int i, l, m; int len1nocommongap, len2nocommongap; int len1, len2; int clus1, clus2; float pscore, tscore; static char *indication1, *indication2; static double *effarr1 = NULL; static double *effarr2 = NULL; static double *effarr1_kozo = NULL; static double *effarr2_kozo = NULL; static LocalHom ***localhomshrink = NULL; static int *fftlog; int m1, m2; static int *gaplen; static int *gapmap; static int *alreadyaligned; float dumfl = 0.0; int ffttry; RNApair ***grouprna1 = NULL, ***grouprna2 = NULL; if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } if( effarr1 == NULL ) { fftlog = AllocateIntVec( njob ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); gaplen = AllocateIntVec( *alloclen+10 ); gapmap = AllocateIntVec( *alloclen+10 ); alreadyaligned = AllocateIntVec( njob ); #if 0 #else if( constraint ) { localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) ); for( i=0; i 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // for( i=0; i 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } else if( alg == 'H' ) { imp_match_init_strictH( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } else if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } else if( alg == 'R' ) { imp_match_init_strictR( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'A' ): pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'Q' ): pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'R' ): pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'H' ): pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; /* fprintf( stderr, "after align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "after align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ // writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. { adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] ); restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus2, 0, mseq2, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' ); for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( mergeoralign[l] == '2' ) { // fprintf( stderr, ">mseq1[0] = \n%s\n", mseq1[0] ); // fprintf( stderr, ">mseq2[0] = \n%s\n", mseq2[0] ); adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] ); restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' ); for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); } #if SCOREOUT fprintf( stderr, "totalscore = %10.2f\n\n", tscore ); #endif if( rnakozo && rnaprediction == 'm' ) { if( grouprna1 ) free( grouprna1 ); // nakami ha? if( grouprna2 ) free( grouprna2 ); // nakami ha? grouprna1 = grouprna2 = NULL; } if( constraint ) { if( localhomshrink ) // nen no tame { for( i=0; i 0 ) { distancematrixthread_arg_t *targ; Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i= njob ) { fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 ); fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" ); fprintf( stderr, "#\n" ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); fprintf( stderr, "#\n" ); fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "\n" ); exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d does not form a monophyletic cluster\n", i+1 ); fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "############################################################################### \n" ); fprintf( stderr, "\n" ); exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %c\n\n", i, mergeoralign[i] ); } #endif for( i=0; i 0 && nadd == 0 ) { treebasethread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t treecond; int *fftlog; int nrun; int nthread_yoyu; nthread_yoyu = nthread * 1; nrun = 0; jobpos = 0; targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); fftlog = AllocateIntVec( njob ); handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &treecond, NULL ); for( i=0; inext ) { free( (void *)tmppt1 ); tmppt1 = tmppt2; } free( (void *)tmppt1 ); } free( (void *)(localhomtable[i]+j) ); } free( (void *)localhomtable ); } #endif fprintf( trap_g, "done.\n" ); fclose( trap_g ); free( mergeoralign ); if( rnakozo && rnaprediction == 'm' ) { if( singlerna ) // nen no tame { for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi(*++argv); if( nadd == 0 ) { fprintf( stderr, "nadd = %d?\n", nadd ); exit( 1 ); } --argc; goto nextoption; case 'p': outputformat = 'p'; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( inputfile == NULL ) { argc--; inputfile = *argv; fprintf( stderr, "inputfile = %s\n", inputfile ); } if( argc != 0 ) { fprintf( stderr, "Usage: mafft-distance [-PD] [-i inputfile] inputfile > outputfile\n" ); exit( 1 ); } } void seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } int commonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( *pointt == -1 ) return( 0 ); if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; // fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize ); } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } int main( int argc, char **argv ) { int i, j, initj; FILE *infp; char **seq; int *grpseq; char *tmpseq; int **pointt; static char **name; static int *nlen; double *mtxself; float score; static short *table1; float longer, shorter; float lenfac; float bunbo; int norg; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } tmpseq = AllocateCharVec( nlenmax+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); mtxself = AllocateDoubleVec( njob ); pamN = NOTSPECIFIED; name = AllocateCharMtx( njob, B ); nlen = AllocateIntVec( njob ); #if 0 FRead( infp, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); if( nadd ) outputformat = 's'; norg = njob - nadd; if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); else tsize = (int)pow( 6, 6 ); if( dorp == 'd' ) { lenfaca = DLENFACA; lenfacb = DLENFACB; lenfacc = DLENFACC; lenfacd = DLENFACD; } else { lenfaca = PLENFACA; lenfacb = PLENFACB; lenfacc = PLENFACC; lenfacd = PLENFACD; } maxl = 0; for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } fprintf( stderr, "\nCalculating i-i scores ... " ); for( i=0; i nlen[j] ) { longer=(float)nlen[i]; shorter=(float)nlen[j]; } else { longer=(float)nlen[j]; shorter=(float)nlen[i]; } // lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD ); lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0; // fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter ); score = commonsextet_p( table1, pointt[j] ); bunbo = MIN( mtxself[i], mtxself[j] ); if( outputformat == 'p' ) { if( bunbo == 0.0 ) fprintf( stdout, " %8.6f", 1.0 ); else fprintf( stdout, " %8.6f", ( 1.0 - score / bunbo ) * lenfac ); if( j % 7 == 6 ) fprintf( stdout, "\n" ); } else { if( bunbo == 0.0 ) fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, 1.0, nlen[i], nlen[j] ); else fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / bunbo ) * lenfac, nlen[i], nlen[j] ); } // fprintf( stderr, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo ); // score = (double)commonsextet_p( table1, pointt[j] ); // fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / MIN( mtxself[i], mtxself[j] ) ) * 3, nlen[i], nlen[j] ); } free( table1 ); } fprintf( stderr, "\n" ); if( outputformat == 'p' ) fprintf( stdout, "\n" ); SHOWVERSION; exit( 0 ); } mafft-7.123-without-extensions/core/mingw64mingw320000740000076500007650000000067211753636722021124 0ustar katohkatoh#!/usr/bin/env bash make clean make ENABLE_MULTITHREAD="" rm -rf binaries32 mkdir binaries32 mv ../binaries/* binaries32/ export PATH=/MinGW64/mingw-w64-1.0-bin_i686-mingw_20100702/bin:$PATH export C_INCLUDE_PATH=/MinGW64/mingw-w64-1.0-bin_i686-mingw_20100702/include export LIBRARY_PATH=/MinGW64/mingw-w64-1.0-bin_i686-mingw_20100702/lib make clean make ENABLE_MULTITHREAD="" rm -rf binaries64 mkdir binaries64 mv ../binaries/* binaries64/ mafft-7.123-without-extensions/core/restoreu.c0000640000076500007650000001175711752621325020510 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 char *alignmentfile; static void fillorichar( int nseq, int *oripos, char **a, char **s ) { int i; char *pta, *pts; for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; --argc; goto nextoption; case 'a': alignmentfile = *++argv; --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *alfp; char **name; char **aname; char **oname; char **seq; char **aseq; int *nlen; int *oripos; char *npt, *npt0, *npt2, *pt, *pt2; int i, o, prelen; int nlenmin; int njobs, njoba; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( alignmentfile ) { alfp = fopen( alignmentfile, "r" ); if( !alfp ) { fprintf( stderr, "Cannot open %s\n", alignmentfile ); exit( 1 ); } } else { fprintf( stderr, "No alignment is given.\n" ); exit( 1 ); } dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); njobs = njob; // fprintf( stderr, "in infp, %d x %d - %d %c\n", njob, nlenmin, nlenmax, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); oripos = AllocateIntVec( njob ); readData_pointer_casepreserve( infp, name, nlen, seq ); dorp = NOTSPECIFIED; getnumlen( alfp ); njoba = njob; // fprintf( stderr, "in alfp, %d x %d %c\n", njob, nlenmax, dorp ); aseq = AllocateCharMtx( njob, nlenmax+1 ); aname = AllocateCharMtx( njob, B+1 ); oname = AllocateCharMtx( njob, B+1 ); readData_pointer( alfp, aname, nlen, aseq ); for( i=0; istart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = pt - seq1[i] - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = pt - seq1[i] - 0; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = pt - seq1[i] - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = pt - seq2[j] - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = pt - seq2[j] - 0; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = pt - seq2[j] - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 1 k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "#### impmtx[k1][k2] = %f, tmpptr->fimportance=%f, effij=%f\n", impmtx[k1][k2], tmpptr->fimportance, effij ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); // fprintf( stderr, "%d (%c) - %d (%c) - %f\n", k1, *pt1, k2, *pt2, tmpptr->fimportance * effij ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } #else while( k1 <= end1 && k2 <= end2 ) { fprintf( stderr, "k1,k2=%d,%d - ", k1, k2 ); if( !nocount1[k1] && !nocount2[k2] ) { impmtx[k1][k2] += tmpptr->wimportance * eff1[i] * eff2[j] * fastathreshold; fprintf( stderr, "marked\n" ); } else fprintf( stderr, "no count\n" ); k1++; k2++; } #endif tmpptr = tmpptr->next; } } } #if 0 if( clus1 == 1 && clus2 == 6 ) { fprintf( stderr, "\n" ); fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom_gapmap( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc, int *gapmap1, int *gapmap2 ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; // char gap[] = "-"; char *gap; float wm; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_scQ( gapmap1[iin], gapmap2[jin] ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = '-'; *--gaptable2 = '-'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_scQ( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; // float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; fprintf( stderr, "fpenalty_ex = %f\n", fpenalty_ex ); #endif #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS float *digf1; static TLS float *digf2; static TLS float *diaf1; static TLS float *diaf2; static TLS float *gapz1; static TLS float *gapz2; static TLS float *gapf1; static TLS float *gapf2; static TLS float *ogcp1g; static TLS float *ogcp2g; static TLS float *fgcp1g; static TLS float *fgcp2g; static TLS float *og_h_dg_n1_p; static TLS float *og_h_dg_n2_p; static TLS float *fg_h_dg_n1_p; static TLS float *fg_h_dg_n2_p; static TLS float *og_t_fg_h_dg_n1_p; static TLS float *og_t_fg_h_dg_n2_p; static TLS float *fg_t_og_h_dg_n1_p; static TLS float *fg_t_og_h_dg_n2_p; static TLS float *gapz_n1; static TLS float *gapz_n2; static TLS float **cpmx1; static TLS float **cpmx2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float tmppenal; float *fg_t_og_h_dg_n2_p_pt; float *og_t_fg_h_dg_n2_p_pt; float *og_h_dg_n2_p_pt; float *fg_h_dg_n2_p_pt; float *gapz_n2_pt0; float *gapz_n2_pt1; float *fgcp2pt; float *ogcp2pt; float fg_t_og_h_dg_n1_p_va; float og_t_fg_h_dg_n1_p_va; float og_h_dg_n1_p_va; float fg_h_dg_n1_p_va; float gapz_n1_va0; float gapz_n1_va1; float fgcp1va; float ogcp1va; float kyokaipenal; #if 1 float fpenalty = (float)penalty; #else float fpenalty; if( RNAscoremtx != 'r' ) fpenalty = (float)penalty; else fpenalty = (float)penalty * 10; #endif #if 0 fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "#### seq2[0] = %s\n", seq2[0] ); #endif if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 == 0 || lgth2 == 0 ) { fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( digf1 ); FreeFloatVec( digf2 ); FreeFloatVec( diaf1 ); FreeFloatVec( diaf2 ); FreeFloatVec( gapz1 ); FreeFloatVec( gapz2 ); FreeFloatVec( gapf1 ); FreeFloatVec( gapf2 ); FreeFloatVec( ogcp1g ); FreeFloatVec( ogcp2g ); FreeFloatVec( fgcp1g ); FreeFloatVec( fgcp2g ); FreeFloatVec( og_h_dg_n1_p ); FreeFloatVec( og_h_dg_n2_p ); FreeFloatVec( fg_h_dg_n1_p ); FreeFloatVec( fg_h_dg_n2_p ); FreeFloatVec( og_t_fg_h_dg_n1_p ); FreeFloatVec( og_t_fg_h_dg_n2_p ); FreeFloatVec( fg_t_og_h_dg_n1_p ); FreeFloatVec( fg_t_og_h_dg_n2_p ); FreeFloatVec( gapz_n1 ); FreeFloatVec( gapz_n2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); digf1 = AllocateFloatVec( ll1+2 ); digf2 = AllocateFloatVec( ll2+2 ); diaf1 = AllocateFloatVec( ll1+2 ); diaf2 = AllocateFloatVec( ll2+2 ); gapz1 = AllocateFloatVec( ll1+2 ); gapz2 = AllocateFloatVec( ll2+2 ); gapf1 = AllocateFloatVec( ll1+2 ); gapf2 = AllocateFloatVec( ll2+2 ); ogcp1g = AllocateFloatVec( ll1+2 ); ogcp2g = AllocateFloatVec( ll2+2 ); fgcp1g = AllocateFloatVec( ll1+2 ); fgcp2g = AllocateFloatVec( ll2+2 ); og_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); og_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); fg_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); fg_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); og_t_fg_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); og_t_fg_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); fg_t_og_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); fg_t_og_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); gapz_n1 = AllocateFloatVec( ll1+2 ); gapz_n2 = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { float t = 0.0; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif kyokaipenal = 0.0; if( outgap == 1 ) { g = 0.0; g += ogcp1g[0] * og_h_dg_n2_p[0]; // g += ogcp1g[0] * ( 1.0-ogcp2g[0]-digf2[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal1=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); g += ogcp2g[0] * og_h_dg_n1_p[0]; // g += ogcp2g[0] * ( 1.0-ogcp1g[0]-digf1[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal2=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); g += fgcp1g[0] * fg_h_dg_n2_p[0]; // g += fgcp1g[0] * ( 1.0-fgcp2g[0]-digf2[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal1=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); g += fgcp2g[0] * fg_h_dg_n1_p[0]; // g += fgcp2g[0] * ( 1.0-fgcp1g[0]-digf1[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal2=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); kyokaipenal = g; initverticalw[0] += g; currentw[0] += g; for( i=1; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif tmppenal = gapz_n1_va1 * *fg_t_og_h_dg_n2_p_pt; // tmppenal = gapz_n1[i+1] * fg_t_og_h_dg_n2_p[j]; // tmppenal = ( (1.0-gapz1[i+1])*(1.0-fgcp2g[j]+ogcp2g[j]-digf2[j]) ) * 0.5 * fpenalty; // mada if( (g=mi+tmppenal) > wm ) { // fprintf( stderr, "jump i start=%f (j=%d, fgcp2g[j]=%f, digf2[j]=%f, diaf2[j]=%f), %c-%c\n", g-mi, j, fgcp2g[j], digf2[j], diaf2[j], seq1[0][i], seq2[0][j] ); wm = g; *ijppt = -( j - mpi ); } tmppenal = gapz_n1_va0 * *og_t_fg_h_dg_n2_p_pt; // tmppenal = gapz_n1[i] * og_t_fg_h_dg_n2_p[j]; // tmppenal = ( (1.0-gapz1[i])*(1.0-ogcp2g[j]+fgcp2g[j]-digf2[j]) ) * 0.5 * fpenalty; // mada if( (g=*prept+tmppenal) >= mi ) { // fprintf( stderr, "jump i end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif tmppenal = *gapz_n2_pt1 * fg_t_og_h_dg_n1_p_va; // tmppenal = gapz_n2[j+1] * fg_t_og_h_dg_n1_p[i]; // tmppenal = ( (1.0-gapz2[j+1])*(1.0-fgcp1g[i]+ogcp1g[i]-digf1[i]) ) * 0.5 * fpenalty; // mada if( (g=*mjpt+tmppenal) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } tmppenal = *gapz_n2_pt0 * og_t_fg_h_dg_n1_p_va; // tmppenal = gapz_n2[j] * og_t_fg_h_dg_n1_p[i]; // tmppenal = ( (1.0-gapz2[j])*(1.0-ogcp1g[i]+fgcp1g[i]-digf1[i]) ) * 0.5 * fpenalty; // mada if( (g=*prept+tmppenal) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fg_t_og_h_dg_n2_p_pt++; og_t_fg_h_dg_n2_p_pt++; og_h_dg_n2_p_pt++; fg_h_dg_n2_p_pt++; gapz_n2_pt0++; gapz_n2_pt1++; fgcp2pt++; ogcp2pt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; // float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; fprintf( stderr, "fpenalty_ex = %f\n", fpenalty_ex ); #endif #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS float *digf1; static TLS float *digf2; static TLS float *diaf1; static TLS float *diaf2; static TLS float *gapz1; static TLS float *gapz2; static TLS float *gapf1; static TLS float *gapf2; static TLS float *ogcp1g; static TLS float *ogcp2g; static TLS float *fgcp1g; static TLS float *fgcp2g; static TLS float *og_h_dg_n1_p; static TLS float *og_h_dg_n2_p; static TLS float *fg_h_dg_n1_p; static TLS float *fg_h_dg_n2_p; static TLS float *og_t_fg_h_dg_n1_p; static TLS float *og_t_fg_h_dg_n2_p; static TLS float *fg_t_og_h_dg_n1_p; static TLS float *fg_t_og_h_dg_n2_p; static TLS float *gapz_n1; static TLS float *gapz_n2; static TLS float **cpmx1; static TLS float **cpmx2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float tmppenal; float *fg_t_og_h_dg_n2_p_pt; float *og_t_fg_h_dg_n2_p_pt; float *og_h_dg_n2_p_pt; float *fg_h_dg_n2_p_pt; float *gapz_n2_pt0; float *gapz_n2_pt1; float *fgcp2pt; float *ogcp2pt; float fg_t_og_h_dg_n1_p_va; float og_t_fg_h_dg_n1_p_va; float og_h_dg_n1_p_va; float fg_h_dg_n1_p_va; float gapz_n1_va0; float gapz_n1_va1; float fgcp1va; float ogcp1va; float kyokaipenal; float fpenalty = (float)penalty; #if 0 fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "#### seq2[0] = %s\n", seq2[0] ); #endif if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 == 0 || lgth2 == 0 ) { fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( digf1 ); FreeFloatVec( digf2 ); FreeFloatVec( diaf1 ); FreeFloatVec( diaf2 ); FreeFloatVec( gapz1 ); FreeFloatVec( gapz2 ); FreeFloatVec( gapf1 ); FreeFloatVec( gapf2 ); FreeFloatVec( ogcp1g ); FreeFloatVec( ogcp2g ); FreeFloatVec( fgcp1g ); FreeFloatVec( fgcp2g ); FreeFloatVec( og_h_dg_n1_p ); FreeFloatVec( og_h_dg_n2_p ); FreeFloatVec( fg_h_dg_n1_p ); FreeFloatVec( fg_h_dg_n2_p ); FreeFloatVec( og_t_fg_h_dg_n1_p ); FreeFloatVec( og_t_fg_h_dg_n2_p ); FreeFloatVec( fg_t_og_h_dg_n1_p ); FreeFloatVec( fg_t_og_h_dg_n2_p ); FreeFloatVec( gapz_n1 ); FreeFloatVec( gapz_n2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); digf1 = AllocateFloatVec( ll1+2 ); digf2 = AllocateFloatVec( ll2+2 ); diaf1 = AllocateFloatVec( ll1+2 ); diaf2 = AllocateFloatVec( ll2+2 ); gapz1 = AllocateFloatVec( ll1+2 ); gapz2 = AllocateFloatVec( ll2+2 ); gapf1 = AllocateFloatVec( ll1+2 ); gapf2 = AllocateFloatVec( ll2+2 ); ogcp1g = AllocateFloatVec( ll1+2 ); ogcp2g = AllocateFloatVec( ll2+2 ); fgcp1g = AllocateFloatVec( ll1+2 ); fgcp2g = AllocateFloatVec( ll2+2 ); og_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); og_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); fg_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); fg_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); og_t_fg_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); og_t_fg_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); fg_t_og_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); fg_t_og_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); gapz_n1 = AllocateFloatVec( ll1+2 ); gapz_n2 = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { float t = 0.0; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif kyokaipenal = 0.0; if( outgap == 1 ) { g = 0.0; g += ogcp1g[0] * og_h_dg_n2_p[0]; // g += ogcp1g[0] * ( 1.0-ogcp2g[0]-digf2[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal1=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); g += ogcp2g[0] * og_h_dg_n1_p[0]; // g += ogcp2g[0] * ( 1.0-ogcp1g[0]-digf1[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal2=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); g += fgcp1g[0] * fg_h_dg_n2_p[0]; // g += fgcp1g[0] * ( 1.0-fgcp2g[0]-digf2[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal1=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); g += fgcp2g[0] * fg_h_dg_n1_p[0]; // g += fgcp2g[0] * ( 1.0-fgcp1g[0]-digf1[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal2=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); kyokaipenal = g; initverticalw[0] += g; currentw[0] += g; for( i=1; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif tmppenal = gapz_n1_va1 * *fg_t_og_h_dg_n2_p_pt; // tmppenal = gapz_n1[i+1] * fg_t_og_h_dg_n2_p[j]; // tmppenal = ( (1.0-gapz1[i+1])*(1.0-fgcp2g[j]+ogcp2g[j]-digf2[j]) ) * 0.5 * fpenalty; // mada if( (g=mi+tmppenal) > wm ) { // fprintf( stderr, "jump i start=%f (j=%d, fgcp2g[j]=%f, digf2[j]=%f, diaf2[j]=%f), %c-%c\n", g-mi, j, fgcp2g[j], digf2[j], diaf2[j], seq1[0][i], seq2[0][j] ); wm = g; *ijppt = -( j - mpi ); } tmppenal = gapz_n1_va0 * *og_t_fg_h_dg_n2_p_pt; // tmppenal = gapz_n1[i] * og_t_fg_h_dg_n2_p[j]; // tmppenal = ( (1.0-gapz1[i])*(1.0-ogcp2g[j]+fgcp2g[j]-digf2[j]) ) * 0.5 * fpenalty; // mada if( (g=*prept+tmppenal) >= mi ) { // fprintf( stderr, "jump i end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif tmppenal = *gapz_n2_pt1 * fg_t_og_h_dg_n1_p_va; // tmppenal = gapz_n2[j+1] * fg_t_og_h_dg_n1_p[i]; // tmppenal = ( (1.0-gapz2[j+1])*(1.0-fgcp1g[i]+ogcp1g[i]-digf1[i]) ) * 0.5 * fpenalty; // mada if( (g=*mjpt+tmppenal) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } tmppenal = *gapz_n2_pt0 * og_t_fg_h_dg_n1_p_va; // tmppenal = gapz_n2[j] * og_t_fg_h_dg_n1_p[i]; // tmppenal = ( (1.0-gapz2[j])*(1.0-ogcp1g[i]+fgcp1g[i]-digf1[i]) ) * 0.5 * fpenalty; // mada if( (g=*prept+tmppenal) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fg_t_og_h_dg_n2_p_pt++; og_t_fg_h_dg_n2_p_pt++; og_h_dg_n2_p_pt++; fg_h_dg_n2_p_pt++; gapz_n2_pt0++; gapz_n2_pt1++; fgcp2pt++; ogcp2pt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i 0.0 ) out = ( (int)( in + 0.5 ) ); else if( in == 0.0 ) out = ( 0 ); else if( in < 0.0 ) out = ( (int)( in - 0.5 ) ); else out = 0; return( out ); } static void ambiguousscore( int *amino_n, int n_dis[0x80][0x80] ) { int i; for( i=0; i<26; i++ ) { n_dis[i][amino_n['r']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] ) ); n_dis[i][amino_n['y']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['k']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['m']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] ) ); n_dis[i][amino_n['s']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['c']][i] ) ); n_dis[i][amino_n['w']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['b']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['d']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['h']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['v']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] ) ); n_dis[amino_n['r']][i] = n_dis[i][amino_n['r']]; n_dis[amino_n['y']][i] = n_dis[i][amino_n['y']]; n_dis[amino_n['k']][i] = n_dis[i][amino_n['k']]; n_dis[amino_n['m']][i] = n_dis[i][amino_n['m']]; n_dis[amino_n['s']][i] = n_dis[i][amino_n['s']]; n_dis[amino_n['w']][i] = n_dis[i][amino_n['w']]; n_dis[amino_n['b']][i] = n_dis[i][amino_n['b']]; n_dis[amino_n['d']][i] = n_dis[i][amino_n['d']]; n_dis[amino_n['h']][i] = n_dis[i][amino_n['h']]; n_dis[amino_n['v']][i] = n_dis[i][amino_n['v']]; } i = amino_n['r']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] ) ); i = amino_n['y']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['k']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['m']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] ) ); i = amino_n['s']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['c']][amino_n['c']] ) ); i = amino_n['w']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['b']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['d']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['h']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['v']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] ) ); } static void calcfreq_nuc( int nseq, char **seq, double *datafreq ) { int i, j, l; int aan; double total; for( i=0; i<4; i++ ) datafreq[i] = 0.0; total = 0.0; for( i=0; i= 0 && aan < 4 ) { datafreq[aan] += 1.0; total += 1.0; } } } total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i]; for( i=0; i<4; i++ ) datafreq[i] /= (double)total; for( i=0; i<4; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001; total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i]; // fprintf( stderr, "total = %f\n", total ); for( i=0; i<4; i++ ) datafreq[i] /= (double)total; #if 0 fprintf( stderr, "\ndatafreq = " ); for( i=0; i<4; i++ ) fprintf( stderr, "%10.5f ", datafreq[i] ); fprintf( stderr, "\n" ); exit( 1 ); #endif } static void calcfreq( int nseq, char **seq, double *datafreq ) { int i, j, l; int aan; double total; for( i=0; i= 0 && aan < nscoredalphabets && seq[i][j] != '-' ) { datafreq[aan] += 1.0; total += 1.0; } } } total = 0.0; for( i=0; iR = result->I = 0.0; result++; } } #if 0 // by D.Mathog static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed ) { int i; for( i=st; i= 0 ) result->R += incr * score[n]; #if 0 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R ); #endif } } static void seq_vec_3( Fukusosuu **result, double incr, char *seq ) { int i; int n; for( i=0; *seq; i++ ) { n = amino_n[(int)*seq++]; if( n < n20or4or2 && n >= 0 ) result[n][i].R += incr; } } static void seq_vec_5( Fukusosuu *result, double *score1, double *score2, double incr, char *seq ) { int n; for( ; *seq; result++ ) { n = amino_n[(int)*seq++]; if( n > 20 ) continue; result->R += incr * score1[n]; result->I += incr * score2[n]; #if 0 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R ); #endif } } static void seq_vec_4( Fukusosuu *result, double incr, char *seq ) { char s; for( ; *seq; result++ ) { s = *seq++; if( s == 'a' ) result->R += incr; else if( s == 't' ) result->R -= incr; else if( s == 'g' ) result->I += incr; else if( s == 'c' ) result->I -= incr; } } #if 0 // by D.Mathog static void seq_vec( Fukusosuu *result, char query, double incr, char *seq ) { #if 0 int bk = nlen; #endif while( *seq ) { if( *seq++ == query ) result->R += incr; result++; #if 0 fprintf( stderr, "i = %d result->R = %f\n", bk-nlen, (result-1)->R ); #endif } } static int checkRepeat( int num, int *cutpos ) { int tmp, buf; buf = *cutpos; while( num-- ) { if( ( tmp = *cutpos++ ) < buf ) return( 1 ); buf = tmp; } return( 0 ); } static int segcmp( void *ptr1, void *ptr2 ) { int diff; Segment **seg1 = (Segment **)ptr1; Segment **seg2 = (Segment **)ptr2; #if 0 return( (*seg1)->center - (*seg2)->center ); #else diff = (*seg1)->center - (*seg2)->center; if( diff ) return( diff ); diff = (*seg1)->start - (*seg2)->start; if( diff ) return( diff ); diff = (*seg1)->end - (*seg2)->end; if( diff ) return( diff ); fprintf( stderr, "USE STABLE SORT !!\n" ); exit( 1 ); return( 0 ); #endif } #endif static void mymergesort( int first, int last, Segment **seg ) { int middle; static TLS int i, j, k, p; static TLS int allo = 0; static TLS Segment **work = NULL; if( seg == NULL ) { if( work ) free( work ); work = NULL; return; } if( last > allo ) { allo = last; if( work ) free( work ); work = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) ); } if( first < last ) { middle = ( first + last ) / 2; mymergesort( first, middle, seg ); mymergesort( middle+1, last, seg ); p = 0; for( i=first; i<=middle; i++ ) work[p++] = seg[i]; i = middle + 1; j = 0; k = first; while( i <= last && j < p ) { if( work[j]->center <= seg[i]->center ) seg[k++] = work[j++]; else seg[k++] = seg[i++]; } while( j < p ) seg[k++] = work[j++]; } } double Fgetlag( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen ) { int i, j, k, l, m; int nlen, nlen2, nlen4; static TLS int crossscoresize = 0; static TLS char **tmpseq1 = NULL; static TLS char **tmpseq2 = NULL; static TLS char **tmpptr1 = NULL; static TLS char **tmpptr2 = NULL; static TLS char **tmpres1 = NULL; static TLS char **tmpres2 = NULL; static TLS char **result1 = NULL; static TLS char **result2 = NULL; #if RND static TLS char **rndseq1 = NULL; static TLS char **rndseq2 = NULL; #endif static TLS Fukusosuu **seqVector1 = NULL; static TLS Fukusosuu **seqVector2 = NULL; static TLS Fukusosuu **naiseki = NULL; static TLS Fukusosuu *naisekiNoWa = NULL; static TLS double *soukan = NULL; static TLS double **crossscore = NULL; int nlentmp; static TLS int *kouho = NULL; static TLS Segment *segment = NULL; static TLS Segment *segment1 = NULL; static TLS Segment *segment2 = NULL; static TLS Segment **sortedseg1 = NULL; static TLS Segment **sortedseg2 = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; static TLS int localalloclen = 0; int lag; int tmpint; int count, count0; int len1, len2; int totallen; float dumfl = 0.0; int headgp, tailgp; len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( !localalloclen ) { kouho = AllocateIntVec( NKOUHO ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 4; else if( fftscore == 1 ) n20or4or2 = 2; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i /dev/tty" ); #endif if( fftkeika ) fprintf( stderr, " FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; m /dev/tty" ); #if 0 fftfp = fopen( "list.plot", "w" ); fprintf( fftfp, "plot 'frt'\n pause +1" ); fclose( fftfp ); system( "/usr/bin/gnuplot list.plot" ); #endif #endif getKouho( kouho, NKOUHO, soukan, nlen ); #if 0 for( i=0; iCandidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, ">Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fprintf( fftfp, ">\n", k+1, lag ); fclose( fftfp ); #endif tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); if( tmpint == 0 ) break; // 060430 iinoka ? while( tmpint-- > 0 ) { if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "Goukaku=%dko\n", tmpint ); fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 fprintf( stderr, "done. (%d anchors)\r", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stdout, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( crossscoresize < count+2 ) { crossscoresize = count+2; fprintf( stderr, "####################################################################################################################################allocating crossscore, size = %d\n", crossscoresize ); if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if DEBUG fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { fprintf( stderr, "REPEAT!? \n" ); if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); fprintf( stderr, "done. (%d anchors)\n", count ); #endif } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l alloclen ) ErrorExit( "LENGTH OVER in Falign\n " ); for( j=0; j= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( prevalloclen != alloclen ) // Falign_noudp mo kaeru { if( prevalloclen ) { FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); } // fprintf( stderr, "\n\n\nreallocating ...\n" ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); prevalloclen = alloclen; } if( !localalloclen ) { sgap1 = AllocateCharVec( njob ); egap1 = AllocateCharVec( njob ); sgap2 = AllocateCharVec( njob ); egap2 = AllocateCharVec( njob ); kouho = AllocateIntVec( NKOUHO ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 1; else if( fftscore ) n20or4or2 = 1; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); if( !kobetsubunkatsu ) { naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); } #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i /dev/tty" ); #endif if( !kobetsubunkatsu ) { if( fftkeika ) fprintf( stderr, " FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; mCandidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, ">Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fprintf( fftfp, ">\n", k+1, lag ); fclose( fftfp ); #endif // fprintf( stderr, "lag = %d\n", lag ); tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); // if( lag == -50 ) exit( 1 ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); if( tmpint == 0 ) break; // 060430 iinoka ? while( tmpint-- > 0 ) { #if 0 if( segment[count].end - segment[count].start < fftWinSize ) { count++; continue; } #endif if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; } } #if 0 if( !kobetsubunkatsu && fftkeika ) fprintf( stderr, "%d anchors found\r", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stderr, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 if( fftkeika ) fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if 0 fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l%d of GROUP1\n", j ); fprintf( stdout, "%s\n", tmpres1[j] ); } for( j=0; j%d of GROUP2\n", j ); fprintf( stdout, "%s\n", tmpres2[j] ); } fflush( stdout ); #endif switch( alg ) { case( 'a' ): totalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen ); break; case( 'M' ): totalscore += MSalignmm( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { totalscore += G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp ); } else totalscore += A__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); break; case( 'H' ): if( clus1 == 1 && clus2 == 1 ) { totalscore += G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp ); } else totalscore += H__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, sgap1, sgap2, egap1, egap2 ); break; case( 'Q' ): if( clus1 == 1 && clus2 == 1 ) { totalscore += G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp ); } else totalscore += Q__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, sgap1, sgap2, egap1, egap2 ); break; default: fprintf( stderr, "alg = %c\n", alg ); ErrorExit( "ERROR IN SOURCE FILE Falign.c" ); break; } #ifdef enablemultithread if( chudanres && *chudanres ) { // fprintf( stderr, "\n\n## CHUUDAN!!! at Falign_localhom\n" ); return( -1.0 ); } #endif nlen = strlen( tmpres1[0] ); if( totallen + nlen > alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if 0 fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( prevalloclen != alloclen ) // Falign_noudp mo kaeru { if( prevalloclen ) { FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); } // fprintf( stderr, "\n\n\nreallocating ...\n" ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); prevalloclen = alloclen; } if( !localalloclen ) { sgap1 = AllocateCharVec( njob ); egap1 = AllocateCharVec( njob ); sgap2 = AllocateCharVec( njob ); egap2 = AllocateCharVec( njob ); kouho = AllocateIntVec( NKOUHO_LONG ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 1; else if( fftscore ) n20or4or2 = 1; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); if( !kobetsubunkatsu ) { naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); } #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i /dev/tty" ); #endif if( !kobetsubunkatsu ) { fprintf( stderr, " FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; mCandidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, ">Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fprintf( fftfp, ">\n", k+1, lag ); fclose( fftfp ); #endif // fprintf( stderr, "lag = %d\n", lag ); tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); // fprintf( stderr, "lag = %d, %d found\n", lag, tmpint ); // if( lag == -50 ) exit( 1 ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); // fprintf( stderr, "##### k=%d / %d\n", k, maxk ); // if( tmpint == 0 ) break; // 060430 iinoka ? // 090530 yameta while( tmpint-- > 0 ) { #if 0 if( segment[count].end - segment[count].start < fftWinSize ) { count++; continue; } #endif if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "##### k=%d / %d\n", k, maxk ); fprintf( stderr, "anchor %d, score = %f\n", count, segment1[count].score ); fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 if( !kobetsubunkatsu ) fprintf( stderr, "done. (%d anchors) ", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stderr, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( count < 5000 ) { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 if( fftkeika ) fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if 0 fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } } else { fprintf( stderr, "\nMany anchors were found. The upper-level DP is skipped.\n\n" ); cut1[0] = 0; cut2[0] = 0; count0 = 0; for( i=0; icenter, sortedseg1[i]->pair->center ); if( sortedseg1[i]->center > cut1[count0] && sortedseg1[i]->pair->center > cut2[count0] ) { count0++; cut1[count0] = sortedseg1[i]->center; cut2[count0] = sortedseg1[i]->pair->center; } else { if( i && sortedseg1[i]->score > sortedseg1[i-1]->score ) { if( sortedseg1[i]->center > cut1[count0-1] && sortedseg1[i]->pair->center > cut2[count0-1] ) { cut1[count0] = sortedseg1[i]->center; cut2[count0] = sortedseg1[i]->pair->center; } else { // count0--; } } } } // if( count-count0 ) // fprintf( stderr, "%d anchors unused\n", count-count0 ); cut1[count0+1] = len1; cut2[count0+1] = len2; count = count0 + 2; count0 = count; } } // exit( 0 ); #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l%d of GROUP1\n", j ); fprintf( stdout, "%s\n", tmpres1[j] ); } for( j=0; j%d of GROUP2\n", j ); fprintf( stdout, "%s\n", tmpres2[j] ); } fflush( stdout ); #endif switch( alg ) { case( 'M' ): totalscore += MSalignmm( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp ); break; default: fprintf( stderr, "alg = %c\n", alg ); ErrorExit( "ERROR IN SOURCE FILE Falign.c" ); break; } nlen = strlen( tmpres1[0] ); if( totallen + nlen > alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j output\n" ); fprintf( stderr, "=== \n" ); fprintf( stderr, "========================================================================= \n" ); fprintf( stderr, "========================================================================= \n" ); return( (int)(*seq)[i] ); } } seq++; } return( 0 ); } void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx ) { int i, j, lgth; lgth = strlen( aseq[0] ); for( j=0; j DISPSEQF ) imax = DISPSEQF; else imax = nseq; fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); for( i=0; i<+imax; i++ ) { strncpy( b, seq[i]+DISPSITEI, 120 ); b[120] = 0; fprintf( stderr, "%3d %s\n", i+1, b ); } } #if 0 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len ) { int i, j, k; double score; double tmpscore; char *mseq1, *mseq2; double efficient; char xxx[100]; // totaleff1 = 0.0; for( i=0; ilen-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; #if 1 sprintf( xxx, "%f", score ); // fprintf( stderr, "## score in intergroup_score = %f\n", score ); #endif } #if 0 fprintf( stderr, "###score = %f\n", score ); #endif #if 0 fprintf( stderr, "## score in intergroup_score = %f\n", score ); #endif return( score ); } #endif void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) ; // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // fprintf( stderr, "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) ; // tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // fprintf( stderr, "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; double gaptmpscore; double gapscore = 0.0; // fprintf( stderr, "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } #if 0 fprintf( stderr, "###gapscore = %f\n", gapscore ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; static double efficient[1]; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)*efficient; } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ { int i, j, k; double c; int len = strlen( seq[0] ); double score; double tmpscore; char *mseq1, *mseq2; double efficient; #if DEBUG FILE *fp; #endif score = 0.0; c = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; /* fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient ); */ } /* fprintf( stdout, "total score = %f\n", score ); */ for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; } } /* fprintf( stderr, "score in score_calc5 = %f\n", score ); */ return( (double)score ); /* fprintf( trap_g, "score by fast = %f\n", (float)score ); tmpscore = score = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[24][0]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ); */ score += (double)tmpscore * efficient; } } score /= c; return( (double)score ); } void upg2( int nseq, double **eff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; static char **pair = NULL; if( !pair ) { pair = AllocateCharMtx( njob, njob ); } for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos ) { int j; float tmpfloat; float mindisfrom; int nearest; // float **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpfloat=*effpt++) < *mindisfrompt ) if( (tmpfloat=eff[pos][j-pos]) < mindisfrom ) { mindisfrom = tmpfloat; nearest = j; } } // effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpfloat=eff[j][pos-j]) < mindisfrom ) { mindisfrom = tmpfloat; nearest = j; } } *mindisfrompt = mindisfrom; *nearestpt = nearest; } static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { int j; double tmpfloat; double **effptpt; Bchain *acptj; *mindisfrompt = 999.9; *nearestpt = -1; // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpfloat=*effpt++) < *mindisfrompt ) if( (tmpfloat=eff[pos][j]) < *mindisfrompt ) { *mindisfrompt = tmpfloat; *nearestpt = j; } } effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpfloat=eff[j][pos]) < *mindisfrompt ) { *mindisfrompt = tmpfloat; *nearestpt = j; } } } static void loadtreeoneline( int *ar, float *len, FILE *fp ) { static char gett[1000]; int res; char *p; p = fgets( gett, 999, fp ); if( p == NULL ) { fprintf( stderr, "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" ); fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } res = sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 ); if( res != 4 ) { fprintf( stderr, "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" ); fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } ar[0]--; ar[1]--; if( ar[0] >= ar[1] ) { fprintf( stderr, "\n\nIncorrect guide tree\n" ); fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } // fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); // fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] ); } void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int *hist = NULL; Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; int nmemim, nmemjm; char **tree; char *treetmp; char *nametmp, *nameptr, *tmpptr; char namec; FILE *fp; int node[2]; fp = fopen( "_guidetree", "r" ); if( !fp ) { fprintf( stderr, "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob*50 ); treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*50 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { fprintf( stderr, "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } #else len[k][0] = len[k][1] = -1.0; loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { fprintf( stderr, "\n\nCheck the guide tree.\n" ); fprintf( stderr, "im=%d, jm=%d\n", im+1, jm+1 ); fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } if( len[k][0] == -1.0 || len[k][1] == -1.0 ) { fprintf( stderr, "\n\nERROR: Branch length is not given.\n" ); exit( 1 ); } if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; #endif prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; // fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } nmemjm = nmemar[jm]; prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; // fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } // len[k][0] = ( minscore - tmptmplen[im] ); // len[k][1] = ( minscore - tmptmplen[jm] ); // len[k][0] = -1; // len[k][1] = -1; hist[im] = k; nmemar[im] = nmemim + nmemjm; // mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { fprintf( stderr, "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { fprintf( stderr, "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fprintf( fp, "#by loadtree\n" ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); free( hist ); free( (char *)ac ); free( (void *)nmemar ); } static float sueff1, sueff05; static double sueff1_double, sueff05_double; static float cluster_mix_float( float d1, float d2 ) { return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); } static float cluster_average_float( float d1, float d2 ) { return( ( d1 + d2 ) * 0.5 ); } static float cluster_minimum_float( float d1, float d2 ) { return( MIN( d1, d2 ) ); } static double cluster_mix_double( double d1, double d2 ) { return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); } static double cluster_average_double( double d1, double d2 ) { return( ( d1 + d2 ) * 0.5 ); } static double cluster_minimum_double( double d1, double d2 ) { return( MIN( d1, d2 ) ); } void fixed_supg_float_realloc_nobk_halfmtx_treeout_constrained( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep, int ngroup, int **groups ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; float tmpfloat; float eff1, eff0; float *tmptmplen = NULL; //static? int *hist = NULL; //static? Bchain *ac = NULL; //static? int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti, *acptj; int *pt1, *pt2, *pt11, *pt22; int *nmemar; //static? int nmemim, nmemjm; float minscore; int *nearest = NULL; // by D.Mathog, a guess float *mindisfrom = NULL; // by D.Mathog, a guess char **tree; //static? char *treetmp; //static? char *nametmp, *nameptr, *tmpptr; //static? FILE *fp; float (*clusterfuncpt[1])(float,float); char namec; int *testtopol, **inconsistent; int **inconsistentpairlist; int ninconsistentpairs; int *warned; int allinconsistent; int firsttime; sueff1 = 1 - SUEFF; sueff05 = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_float; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_float; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_float; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); testtopol = AllocateIntVec( njob + 1 ); inconsistent = AllocateIntMtx( njob, njob ); // muda inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda warned = AllocateIntVec( ngroup ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { fprintf( stderr, "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!! ninconsistentpairs = 0; firsttime = 1; while( 1 ) { if( firsttime ) { firsttime = 0; minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } } else { minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[i][j] && (tmpfloat=eff[i][j-i]) < minscore ) { minscore = tmpfloat; im = i; jm = j; } } for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[j][i] && (tmpfloat=eff[j][i-j]) < minscore ) { minscore = tmpfloat; im = j; jm = i; } } } } allinconsistent = 1; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { if( inconsistent[acpti->pos][acptj->pos] == 0 ) { allinconsistent = 0; goto exitloop_f; } } } exitloop_f: if( allinconsistent ) { fprintf( stderr, "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); exit( 1 ); } #if 1 intpt = testtopol; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } *intpt = -1; // fprintf( stderr, "testtopol = \n" ); // for( i=0; testtopol[i]>-1; i++ ) fprintf( stderr, " %03d", testtopol[i]+1 ); // fprintf( stderr, "\n" ); #endif for( i=0; i-1; j++ ) fprintf( stderr, " %03d", groups[i][j]+1 ); // fprintf( stderr, "\n" ); if( overlapmember( groups[i], testtopol ) ) { if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) { if( !warned[i] ) { warned[i] = 1; fprintf( stderr, "\n###################################################################\n" ); fprintf( stderr, "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); fprintf( stderr, "###################################################################\n" ); } inconsistent[im][jm] = 1; inconsistentpairlist[ninconsistentpairs][0] = im; inconsistentpairlist[ninconsistentpairs][1] = jm; ninconsistentpairs++; break; } } } if( i == ngroup ) { // fprintf( stderr, "OK\n" ); break; } } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // fprintf( stderr, "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; eff[im][jm-im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpfloat = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpfloat = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif #if 1 if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } #endif } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { fprintf( stderr, "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { fprintf( stderr, "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stderr, "\noSTEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); fprintf( stderr, "\n\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); free( testtopol ); FreeIntMtx( inconsistent ); FreeIntMtx( inconsistentpairlist ); free( warned ); } void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; float tmpfloat; float eff1, eff0; float *tmptmplen = NULL; //static? int *hist = NULL; //static? Bchain *ac = NULL; //static? int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; //static? int nmemim, nmemjm; float minscore; int *nearest = NULL; // by D.Mathog, a guess float *mindisfrom = NULL; // by D.Mathog, a guess char **tree; //static? char *treetmp; //static? char *nametmp, *nameptr, *tmpptr; //static? FILE *fp; float (*clusterfuncpt[1])(float,float); char namec; sueff1 = 1 - SUEFF; sueff05 = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_float; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_float; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_float; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { fprintf( stderr, "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // fprintf( stderr, "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpfloat = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpfloat = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { fprintf( stderr, "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { fprintf( stderr, "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stderr, "\nooSTEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); fprintf( stderr, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpfloat; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp, *nameptr, *tmpptr; FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; sueff1_double = 1 - SUEFF; sueff05_double = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } #if 0 if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*150 ); nametmp = AllocateCharVec( 91 ); tree = AllocateCharMtx( njob, njob*150 ); } for( i=0; i _ no tame sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } #else if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { fprintf( stderr, "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } #endif for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; nmemim = nmemar[im]; // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); intpt = topol[k][0]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 tmpfloat = eff[miniim][maxiim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpfloat = eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } #if 0 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #else treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { fprintf( stderr, "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { fprintf( stderr, "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); #endif acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "\nvSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fclose( fp ); #if 0 FreeCharMtx( tree ); #else free( tree[0] ); free( tree ); #endif free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpfloat; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti, *acptj; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp, *nameptr, *tmpptr; FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; int *testtopol, **inconsistent; int **inconsistentpairlist; int ninconsistentpairs; int *warned; int allinconsistent; int firsttime; sueff1_double = 1 - SUEFF; sueff05_double = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } #if 0 if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*150 ); nametmp = AllocateCharVec( 91 ); tree = AllocateCharMtx( njob, njob*150 ); } for( i=0; i _ no tame sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } #else if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); testtopol = AllocateIntVec( njob + 1 ); inconsistent = AllocateIntMtx( njob, njob ); // muda inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda warned = AllocateIntVec( ngroup ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { fprintf( stderr, "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } #endif for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } } else { minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[i][j] && (tmpfloat=eff[i][j]) < minscore ) { minscore = tmpfloat; im = i; jm = j; } } for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[j][i] && (tmpfloat=eff[j][i]) < minscore ) { minscore = tmpfloat; im = j; jm = i; } } } } allinconsistent = 1; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { if( inconsistent[acpti->pos][acptj->pos] == 0 ) { allinconsistent = 0; goto exitloop_d; } } } exitloop_d: if( allinconsistent ) { fprintf( stderr, "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); exit( 1 ); } #if 1 intpt = testtopol; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } *intpt = -1; // fprintf( stderr, "testtopol = \n" ); // for( i=0; testtopol[i]>-1; i++ ) fprintf( stderr, " %03d", testtopol[i]+1 ); // fprintf( stderr, "\n" ); #endif for( i=0; i-1; j++ ) fprintf( stderr, " %03d", groups[i][j]+1 ); // fprintf( stderr, "\n" ); if( overlapmember( testtopol, groups[i] ) ) { if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) { if( !warned[i] ) { warned[i] = 1; fprintf( stderr, "\n###################################################################\n" ); fprintf( stderr, "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); fprintf( stderr, "###################################################################\n" ); } inconsistent[im][jm] = 1; inconsistentpairlist[ninconsistentpairs][0] = im; inconsistentpairlist[ninconsistentpairs][1] = jm; ninconsistentpairs++; break; } } } if( i == ngroup ) { // fprintf( stderr, "OK\n" ); break; } } prevnode = hist[im]; nmemim = nmemar[im]; // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); intpt = topol[k][0]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; eff[im][jm] = 999.9; // eff[im][jm-im] = 999.9; // bug?? for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 tmpfloat = eff[miniim][maxiim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpfloat = eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif #if 1 if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } #endif } } #if 0 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #else treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { fprintf( stderr, "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { fprintf( stderr, "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); #endif acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "\ncSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fclose( fp ); #if 0 FreeCharMtx( tree ); #else free( tree[0] ); free( tree ); #endif free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); free( testtopol ); FreeIntMtx( inconsistent ); FreeIntMtx( inconsistentpairlist ); free( warned ); } void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *dep, int progressout ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; float tmpfloat; float eff1, eff0; float *tmptmplen = NULL; // static TLS -> local, 2012/02/25 int *hist = NULL; // static TLS -> local, 2012/02/25 Bchain *ac = NULL; // static TLS -> local, 2012/02/25 int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; // static TLS -> local, 2012/02/25 int nmemim, nmemjm; float minscore; // float sueff1 = 1 - SUEFF; // float sueff05 = SUEFF * 0.5; int *nearest = NULL; // by Mathog, a guess float *mindisfrom = NULL; // by Mathog, a guess float (*clusterfuncpt[1])(float,float); sueff1 = 1 - SUEFF; sueff05 = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_float; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_float; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_float; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; tmpfloat = eff[miniim][maxiim-miniim] = #if 0 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double eff1, eff0; int *hist = NULL; Achain *ac = NULL; double minscore; char **tree; char *treetmp; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; FILE *fp; int node[2]; float lenfl[2]; char *nametmp, *nameptr, *tmpptr; //static? char namec; fp = fopen( "_guidetree", "r" ); if( !fp ) { fprintf( stderr, "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { // treetmp = AllocateCharVec( njob*50 ); treetmp = NULL; // tree = AllocateCharMtx( njob, njob*50 ); tree = AllocateCharMtx( njob, 0 ); nametmp = AllocateCharVec( 1000 ); // nagasugi hist = AllocateIntVec( njob ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { fprintf( stderr, "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; i nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { fprintf( stderr, "\n\nCheck the guide tree.\n" ); fprintf( stderr, "im=%d, jm=%d\n", im+1, jm+1 ); fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); if( lenfl[0] == -1.0 || lenfl[1] == -1.0 ) { fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" ); exit( 1 ); } if( lenfl[0] < 0.0 ) lenfl[0] = 0.0; if( lenfl[1] < 0.0 ) lenfl[1] = 0.0; #endif // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); intpt = topol[k][0]; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; #if 0 len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; #else len[k][0] = lenfl[0]; len[k][1] = lenfl[1]; #endif hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { fprintf( stderr, "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { fprintf( stderr, "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); // strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); // fprintf( fp, "by veryfastsupg_double_loadtree\n" ); fclose( fp ); #if 1 fprintf( stderr, "\n" ); free( hist ); free( (char *)ac ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif } #if 0 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 fprintf( stderr, "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } #endif void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; static char **tree; static char *treetmp; static char *nametmp; FILE *fpout; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; double (*clusterfuncpt[1])(double,double); sueff1_double = 1 - SUEFF; sueff05_double = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); nametmp = AllocateCharVec( 31 ); } // for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; #else eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fpout = fopen( "infile.tree", "w" ); fprintf( fpout, "%s\n", treetmp ); // fprintf( fpout, "by veryfastsupg_double_outtree\n" ); fclose( fpout ); #if 1 fprintf( stderr, "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif } void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int tmpint; int eff1, eff0; static double *tmptmplen = NULL; static int **eff = NULL; static int *hist = NULL; static Achain *ac = NULL; int minscore; double minscoref; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !eff ) { eff = AllocateIntMtx( njob, njob ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } #else intpt = topol[k][0]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; #endif len[k][0] = minscoref - tmptmplen[im]; len[k][1] = minscoref - tmptmplen[jm]; tmptmplen[im] = minscoref; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 FreeIntMtx( eff ); eff = NULL; free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) /* len$B$O!"(B oeff$B$,@0?t!#(Blen$B$b *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = (double)( minscore - tmptmplen[im] ); len[k][1] = (double)( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; #if 0 free( tmptmplen ); tmptmplen = AllocateIntVec( nseq ); #endif hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF ); } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } FreeIntMtx( eff ); eff = NULL; free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; } void fastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static float *tmplen; int *intpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static char **pair = NULL; static Achain *ac; float minscore; int im = -1, jm = -1; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); ac = (Achain *)calloc( njob, sizeof( Achain ) ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; minscore /= 2.0; len[k][0] = (double)minscore - tmplen[im]; len[k][1] = (double)minscore - tmplen[jm]; tmplen[im] = (double)minscore; for( i=0; i 0 ); for( i=0; i-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] ); fprintf( stderr, "\n" ); #endif } fprintf( stderr, "\n" ); // FreeFloatMtx( eff ); // FreeCharMtx( pair ); // FreeFloatVec( tmplen ); // free( ac ); } void supg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static float *tmplen; int *intpt; float **floatptpt; float *floatpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static char **pair = NULL; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = (double)minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } void spg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else double **eff = NULL; char **pair = NULL; if( !eff ) { eff = AllocateDoubleMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); } #endif for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } double ipower( double x, int n ) /* n > 0 */ { double r; r = 1; while( n != 0 ) { if( n & 1 ) r *= x; x *= x; n >>= 1; } return( r ); } void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ { int i, j, k, s1, s2; static double rootnode[M]; if( nseq-2 < 0 ) { fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq ); exit( 1 ); } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } } void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ { int i, j, k, s1, s2; int rootnode[M]; for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (float)penalty; tmpscore += (float)amino_dis[ms1][ms2]; while( (ms2=(int)seq2[++k]) == (int)'-' ) tmpscore += (float)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } return( tmpscore ); } float score_calc1( char *seq1, char *seq2 ) /* method 1 */ { int k; float score = 0.0; int count = 0; int len = strlen( seq1 ); for( k=0; k 1 ) { if( utree == 0 ) { for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco1+i, "%c", ch+0x61 ); } sco1[len] = 0; for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); tmp = ( tmp - 400 * !scoremtx ) * 2; if( tmp < 0 ) tmp = 0; ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco2+i, "%c", ch+0x61 ); sco[i] = tmp; } sco2[len] = 0; for( i=WIN; i= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog while( str2 >= bk ) { *str2-- = *str1--; } } int isaligned( int nseq, char **seq ) { int i; int len = strlen( seq[0] ); for( i=1; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[0][24]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore / (double)c; #if DEBUG printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); printf( "tmpscore / c = %f\n", tmpscore/(double)c ); #endif } } fprintf( stderr, "raw score = %f\n", score ); score /= (double)nseq * ( nseq-1.0 ) / 2.0; score += 400.0; #if DEBUG printf( "score in mltaln9.c = %f\n", score ); #endif return( (double)score ); } void floatncpy( float *vec1, float *vec2, int len ) { while( len-- ) *vec1++ = *vec2++; } float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ { int i, j, k; int gb1, gb2, gc1, gc2; int cob; int nglen; int len = strlen( seq[0] ); float score; score = 0; nglen = 0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore; } } return( score ); } #define SEGMENTSIZE 150 int searchAnchors( int nseq, char **seq, Segment *seg ) { int i, j, k, kcyc; int status; double score; int value = 0; int len; int length; static double *stra = NULL; static int alloclen = 0; double cumscore; static double threshold; len = strlen( seq[0] ); if( alloclen < len ) { if( alloclen ) { FreeDoubleVec( stra ); } else { threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; } stra = AllocateDoubleVec( len ); alloclen = len; } for( i=0; iskipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; length = 0; /* modified at 01/09/11 */ for( j=0; j threshold ) fprintf( stderr, "YES\n" ); else fprintf( stderr, "NO\n" ); #endif if( score > threshold ) { if( !status ) { status = 1; seg->start = i; length = 0; cumscore = 0.0; } length++; cumscore += score; } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } length = 0; cumscore = 0.0; status = 0; value++; seg++; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } return( value ); } void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 ptr->importance = ptr->opt / ptr->overlapaa; ptr->fimportance = (float)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, nseq1; LocalHom *ptr; #if 1 #else int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt * 0.5; // tekitou ptr->fimportance = (float)ptr->importance; // fprintf( stderr, "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt ); #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } #if 1 #else free( nogaplen ); #endif } void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, pos, len; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_float ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( nlenmax ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) #if 1 importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } #if 0 fprintf( stderr, "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; tmpptr->fimportance = (float)tmpptr->importance; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B } #endif // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 fprintf( stderr, "before averaging:\n" ); for( i=0; inext ) { fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // fprintf( stderr, "average?\n" ); for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { // fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; tmpptr1->fimportance = tmpptr2->fimportance = (float)imp; // fprintf( stderr, "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 fprintf( stderr, "after averaging:\n" ); for( i=0; inext ) { if( tmpptr->end1 ) fprintf( stderr, "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } #endif free( importance ); free( nogaplen ); free( ieff ); } #if 0 void weightimportance( int nseq, double **eff, LocalHom **localhom ) { int i, j, pos, len; static double *importance; double tmpdouble; LocalHom *tmpptr, *tmpptr1, *tmpptr2; if( importance == NULL ) importance = AllocateDoubleVec( nlenmax ); fprintf( stderr, "effmtx = :\n" ); for( i=0; istart1; pos<=tmpptr->end1; pos++ ) // importance[pos] += eff[i][j] * tmpptr->importance; importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0; fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance ); tmpptr = tmpptr->next; if( tmpptr == NULL ) break; } } #if 0 fprintf( stderr, "position specific importance of seq %d:\n", i ); for( pos=0; posstart1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble; fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); tmpptr = tmpptr->next; } while( tmpptr ); } } #if 1 for( i=0; iimportance += tmpptr2->importance; tmpptr1->importance *= 0.5; tmpptr2->importance *= tmpptr1->importance; fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance ); tmpptr1 = tmpptr1->next; tmpptr2 = tmpptr2->next; fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 ); } } #endif } void weightimportance2( int nseq, double *eff, LocalHom **localhom ) { int i, j, pos, len; static double *wimportance; double tmpdouble; if( wimportance == NULL ) wimportance = AllocateDoubleVec( nlenmax ); fprintf( stderr, "effmtx = :\n" ); for( i=0; iwimportance = tmpptr->importance * eff1[i] * eff2[j]; tmpptr = tmpptr->next; } while( tmpptr ); } } } static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt ) { LocalHom *tmpptr; tmpptr = localhom; fprintf( stderr, "adding localhom\n" ); while( tmpptr->next ) tmpptr = tmpptr->next; fprintf( stderr, "allocating localhom\n" ); tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); fprintf( stderr, "done\n" ); tmpptr = tmpptr->next; tmpptr->start1 = start1; tmpptr->start2 = start2; tmpptr->end1 = end1; tmpptr->end2 = end2; tmpptr->opt = opt; fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 0 #endif void extendlocalhom( int nseq, LocalHom **localhom ) { int i, j, k, pos0, pos1, pos2, st; int start1, start2, end1, end2; static int *tmpint1 = NULL; static int *tmpint2 = NULL; static int *tmpdouble1 = NULL; static int *tmpdouble2 = NULL; double opt; LocalHom *tmpptr; if( tmpint1 == NULL ) { tmpint1 = AllocateIntVec( nlenmax ); tmpint2 = AllocateIntVec( nlenmax ); tmpdouble1 = AllocateIntVec( nlenmax ); tmpdouble2 = AllocateIntVec( nlenmax ); } for( k=0; kstart1; pos1 = tmpptr->start2; while( pos0<=tmpptr->end1 ) { tmpint1[pos0] = pos1++; tmpdouble1[pos0] = tmpptr->opt; pos0++; } } while( tmpptr = tmpptr->next ); for( j=i+1; jstart1; pos2 = tmpptr->start2; while( pos0<=tmpptr->end1 ) { tmpint2[pos0] = pos2++; tmpdouble2[pos0++] = tmpptr->opt; } } while( tmpptr = tmpptr->next ); #if 0 fprintf( stderr, "i,j=%d,%d\n", i, j ); for( pos0=0; pos0= 0 && tmpint2[pos0] >= 0 ) { if( st == 0 ) { st = 1; start1 = tmpint1[pos0]; start2 = tmpint2[pos0]; opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); } else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 ) { addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); start1 = tmpint1[pos0]; start2 = tmpint2[pos0]; opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); } } if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 ) { if( st == 1 ) { st = 0; addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); } } } } } } } #endif static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) { // dokka machigatteru if( pt != lh ) // susumeru { pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); pt = pt->next; pt->next = NULL; lh->last = pt; } else // sonomamatsukau { lh->last = pt; } lh->nokori++; // fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); pt->start1 = sti; pt->start2 = stj; pt->end1 = eni; pt->end2 = enj; pt->opt = opt; pt->extended = interm; pt->overlapaa = overlp; #if 0 fprintf( stderr, "i: %d-%d\n", sti, eni ); fprintf( stderr, "j: %d-%d\n", stj, enj ); fprintf( stderr, "opt=%f\n", opt ); fprintf( stderr, "overlp=%d\n", overlp ); #endif } void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) { int overlp, plim; int i, j, k; int pi, pj, pk, len; int status, sti, stj; int *ipt; int co; static int *ini = NULL; static int *inj = NULL; LocalHom *pt; sti = 0; // by D.Mathog, a guess stj = 0; // by D.Mathog, a guess if( ini == NULL ) { ini = AllocateIntVec( nlenmax+1 ); inj = AllocateIntVec( nlenmax+1 ); } for( i=0; i dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; ipt = ini; co = nlenmax+1; while( co-- ) *ipt++ = -1; ipt = inj; co = nlenmax+1; while( co-- ) *ipt++ = -1; overlp = 0; { for( pt=localhom[i]+k; pt; pt=pt->next ) { // fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); if( pt->opt == -1 ) { fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pi = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = ini + pk; while( len-- ) *ipt++ = pi++; } } { for( pt=localhom[j]+k; pt; pt=pt->next ) { if( pt->opt == -1 ) { fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pj = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = inj + pk; while( len-- ) *ipt++ = pj++; } } #if 0 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; fprintf( stderr, " %d", inj[pk] ); } fprintf( stderr, "\n" ); fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; fprintf( stderr, " %d", ini[pk] ); } fprintf( stderr, "\n" ); #endif overlp = 0; plim = nlenmax+1; for( pk = 0; pk < plim; pk++ ) if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; status = 0; plim = nlenmax+1; for( pk=0; pknext = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); pt = localhom[j][i].last; // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); } } if( !status ) // else deha arimasenn. { if( ini[pk] == -1 || inj[pk] == -1 ) continue; sti = ini[pk]; stj = inj[pk]; // fprintf( stderr, "start here!\n" ); status = 1; } } // if( status ) fprintf( stderr, "end here\n" ); // exit( 1 ); // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #if 0 for( pt=localhomtable[i]+j; pt; pt=pt->next ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #endif } } } int makelocal( char *s1, char *s2, int thr ) { int start, maxstart, maxend; char *pt1, *pt2; double score; double maxscore; pt1 = s1; pt2 = s2; maxend = 0; // by D.Mathog, a guess // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); maxscore = 0.0; score = 0.0; start = 0; maxstart = 0; while( *pt1 ) { // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); if( *pt1 == '-' || *pt2 == '-' ) { // fprintf( stderr, "penalty = %d\n", penalty ); score += penalty; while( *pt1 == '-' || *pt2 == '-' ) { pt1++; pt2++; } continue; } score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr ); // score += ( amino_dis[(int)*pt1++][(int)*pt2++] ); if( score > maxscore ) { // fprintf( stderr, "score = %f\n", score ); maxscore = score; maxstart = start; // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start ); } if( score < 0.0 ) { // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart ); if( start == maxstart ) { maxend = pt1 - s1; // fprintf( stderr, "maxend = %d\n", maxend ); } score = 0.0; start = pt1 - s1; } } if( start == maxstart ) maxend = pt1 - s1 - 1; // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); s1[maxend+1] = 0; s2[maxend+1] = 0; return( maxstart ); } void resetlocalhom( int nseq, LocalHom **lh ) { int i, j; LocalHom *pt; for( i=0; inext ) pt->opt = 1.0; } } void gapireru( char *res, char *ori, char *gt ) { char g; while( (g = *gt++) ) { if( g == '-' ) { *res++ = *newgapstr; } else { *res++ = *ori++; } } *res = 0; } void getkyokaigap( char *g, char **s, int pos, int n ) { // char *bk = g; // while( n-- ) *g++ = '-'; while( n-- ) *g++ = (*s++)[pos]; // fprintf( stderr, "bk = %s\n", bk ); } void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat ) #if 0 { int i, j, gc, gb; float feff; for( i=0; i", i, gaplen, k, (*fpt)[k].freq ); (*fpt)[k].freq += feff; // fprintf( stderr, "%f\n", (*fpt)[k].freq ); gaplen = 0; } } fpt++; } } #if 1 for( j=0; jnext = ac; acori->pos = -1; ac[0].prev = acori; // for( i=0; i tmpmin ) { minscore = tmpmin; nearest = i; } } nearesto = nearest; minscoreo = minscore; // for( i=0; i minscore ) { // fprintf( stderr, "\n\n\nminscore = %f\n", minscore ); // fprintf( stderr, "distfromtip = %f\n", dep[i].distfromtip ); // fprintf( stderr, "INSERT HERE, %d-%d\n", nearest, norg ); // fprintf( stderr, "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] ); if( leaf2node[nearest] == -1 ) { // fprintf( stderr, "INSERTING to 0!!!\n" ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][0][0] = nearest; topolc[posinnew][0][1] = -1; addedlen = lenc[posinnew][0] = minscore / 2; } else { // fprintf( stderr, "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[leaf2node[nearest]][0] ) + countmem( topol[leaf2node[nearest]][1] ) + 1 ) * sizeof( int ) ) ); // fprintf( stderr, "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] ); intcpy( topolc[posinnew][0], topol[leaf2node[nearest]][0] ); intcat( topolc[posinnew][0], topol[leaf2node[nearest]][1] ); addedlen = lenc[posinnew][0] = minscore / 2 - len[leaf2node[nearest]][0]; } neighbor = lastmem( topolc[posinnew][0] ); if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { fprintf( stderr, "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " distance: %f\n", minscore ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // fprintf( stderr, "INSERTING to 1!!!\n" ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][1][0] = norg; topolc[posinnew][1][1] = -1; lenc[posinnew][1] = minscore / 2; repnorg = nearest; // fprintf( stderr, "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][0][j] ); // fprintf( stderr, "\n len=%f\n", lenc[i][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][1][j] ); // fprintf( stderr, "\n len=%f\n", lenc[i][1] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); posinnew++; } // fprintf( stderr, "minscore = %f\n", minscore ); // fprintf( stderr, "distfromtip = %f\n", dep[i].distfromtip ); // fprintf( stderr, "Modify matrix, %d-%d\n", nearest, norg ); eff0 = iscorec[mem0][norg-mem0]; eff1 = iscorec[mem1][norg-mem1]; iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 ); iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda acprev = ac[mem1].prev; acnext = ac[mem1].next; acprev->next = acnext; if( acnext != NULL ) acnext->prev = acprev; if( ( nearest == mem1 || nearest == mem0 ) ) { minscore = 9999.9; // for( j=0; j iscorec[j][norg-j] ) // { // minscore = iscorec[j][norg-j]; // nearest = j; // } // } // fprintf( stderr, "searching on modified ac " ); for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda { // fprintf( stderr, "." ); j = acpt->pos; tmpmin = iscorec[j][norg-j]; if( minscore > tmpmin ) { minscore = tmpmin; nearest = j; } } // fprintf( stderr, "done\n" ); } // fprintf( stderr, "posinnew = %d\n", posinnew ); if( topol[i][0][0] == repnorg ) { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); intcat( topolc[posinnew][0], additionaltopol ); lenc[posinnew][0] = len[i][0] - addedlen; // gennmitsu niha chigau addedlen = 0.0; } else { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); lenc[posinnew][0] = len[i][0]; } if( topol[i][1][0] == repnorg ) { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); intcat( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = len[i][1] - addedlen; // gennmitsu niha chigau addedlen = 0.0; repnorg = topolc[posinnew][0][0]; // juuyou } else { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); lenc[posinnew][1] = len[i][1]; } // fprintf( stderr, "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( stderr, " %d->%d", topolc[posinnew][0][j], i+1 ); // fprintf( stderr, "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) fprintf( stderr, " %d->%d", topolc[posinnew][1][j], -i-1 ); // fprintf( stderr, "\n len=%f\n", lenc[posinnew][1] ); for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i; for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i; // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); // // fprintf( stderr, "%s\n", treetmp ); posinnew++; } if( nstep ) { i--; } else { i = 0; topol[i][0] = calloc( 2, sizeof( int ) ); topol[i][1] = calloc( 1, sizeof( int ) ); topol[i][0][0] = 0; topol[i][0][1] = -1; topol[i][1][0] = -1; } if( repnorg == -1 ) { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); intcat( topolc[posinnew][0], topol[i][1] ); lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) ); intcpy( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = minscore / 2; // neighbor = lastmem( topolc[posinnew][0] ); neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { fprintf( stderr, "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // fprintf( stderr, "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][0][j] ); // fprintf( stderr, "\n len=%f", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][1][j] ); // fprintf( stderr, "\n len=%f\n", lenc[posinnew][1] ); } free( leaf2node ); free( additionaltopol ); free( ac ); free( acori ); if( treeout ) free( neighborlist ); return( neighbor ); } #if 0 int samemember( int *mem, int *cand ) { int i, j; #if 0 fprintf( stderr, "mem = " ); for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] ); fprintf( stderr, "\n" ); fprintf( stderr, "cand = " ); for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] ); fprintf( stderr, "\n" ); #endif for( i=0, j=0; mem[i]>-1; ) { if( mem[i++] != cand[j++] ) return( 0 ); } if( cand[j] == -1 ) { return( 1 ); } else { return( 0 ); } } #else int samemember( int *mem, int *cand ) { int i, j; int nm, nc; #if 0 fprintf( stderr, "mem = " ); for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] ); fprintf( stderr, "\n" ); fprintf( stderr, "cand = " ); for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] ); fprintf( stderr, "\n" ); #endif nm = 0; for( i=0; mem[i]>-1; i++ ) nm++; nc = 0; for( i=0; cand[i]>-1; i++ ) nc++; if( nm != nc ) return( 0 ); for( i=0; mem[i]>-1; i++ ) { for( j=0; cand[j]>-1; j++ ) if( mem[i] == cand[j] ) break; if( cand[j] == -1 ) return( 0 ); } if( mem[i] == -1 ) { return( 1 ); } else { return( 0 ); } } #endif int includemember( int *mem, int *cand ) // mem in cand { int i, j; #if 0 fprintf( stderr, "mem = " ); for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] ); fprintf( stderr, "\n" ); fprintf( stderr, "cand = " ); for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] ); fprintf( stderr, "\n" ); #endif for( i=0; mem[i]>-1; i++ ) { for( j=0; cand[j]>-1; j++ ) if( mem[i] == cand[j] ) break; if( cand[j] == -1 ) return( 0 ); } // fprintf( stderr, "INCLUDED! mem[0]=%d\n", mem[0] ); return( 1 ); } int overlapmember( int *mem1, int *mem2 ) { int i, j; for( i=0; mem1[i]>-1; i++ ) for( j=0; mem2[j]>-1; j++ ) if( mem1[i] == mem2[j] ) return( 1 ); return( 0 ); } void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth ) { int i, j; double fr; // for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'D': scoremtx = -1; break; case 'P': scoremtx = 0; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } } int main( int ac, char **av ) { int *nlen; static char **name, **seq; double score; extern double score_calc_for_score( int, char ** ); arguments( ac, av ); getnumlen( stdin ); rewind( stdin ); nlen = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax+2 ); readData_pointer( stdin, name, nlen, seq ); if( !isaligned( njob, seq ) ) ErrorExit( "Not aligned." ); constants( njob, seq ); score = score_calc_for_score( njob, seq ); if( scoremtx == 0 ) score += offset; fprintf( stdout, "score = %f\n", score ); if ( scoremtx == 0 ) fprintf( stdout, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( stdout, "Dayhoff( machigai ga aru )\n" ); else if( scoremtx == 2 ) fprintf( stdout, "M-Y\n" ); else if( scoremtx == -1 ) fprintf( stdout, "DNA 1:%d\n", kimuraR ); fprintf( stdout, "gap penalty = %+6.2f, %+6.2f, %+6.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); exit( 0 ); } mafft-7.123-without-extensions/core/multi2hat3s.c0000640000076500007650000002104112176060426021002 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 1 #define TSUYOSAFACTOR 100 static int nhomologs; static int seedoffset; void strip( char *s ) { char *pt = s; while( *++pt ) if( *pt == '\n' ) *pt = 0; } void arguments( int argc, char *argv[] ) { int c; seedoffset = 0; nhomologs = 1; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "seed = %s\n", inputfile ); --argc; goto nextoption; case 't': nhomologs = myatoi( *++argv ); fprintf( stderr, "nhomologs = %d\n", nhomologs ); --argc; goto nextoption; case 'o': seedoffset = myatoi( *++argv ); fprintf( stderr, "seedoffset = %d\n", seedoffset ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen ) { int i, j; FILE *hat3p; float pscore = 0.0; // by D.Mathog static double *effarr1 = NULL; static double *effarr2 = NULL; char *aseq; static char **pseq; LocalHom **localhomtable, *tmpptr; double tsuyosa; if( nhomologs < 1 ) nhomologs = 1; // tsuyosa=0.0 wo sakeru tsuyosa = (double)nhomologs * nhomologs * TSUYOSAFACTOR; fprintf( stderr, "tsuyosa = %f\n", tsuyosa ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; if( tmpptr->start1 == -1 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d k\n", i+seedoffset, j+seedoffset, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); } } } fprintf( stderr, "\n" ); fclose( hat3p ); #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **bseq; static double *eff; int i; char c; int alloclen; FILE *infp; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax*9+1 ); bseq = AllocateCharMtx( njob, nlenmax*9+1 ); alloclen = nlenmax*9; eff = AllocateDoubleVec( njob ); #if 0 Read( name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i_seed_%s\n%s\n", name[i]+1, bseq[i] ); // CHUUI!! for( i=0; i_seed_%s\n%s\n", name[i]+1, seq[i] ); pairalign( name, nlen, seq, eff, alloclen ); fprintf( trap_g, "done.\n" ); #if DEBUG fprintf( stderr, "closing trap_g\n" ); #endif fclose( trap_g ); #if IODEBUG fprintf( stderr, "OSHIMAI\n" ); #endif SHOWVERSION; return( 0 ); } mafft-7.123-without-extensions/core/mafft-homologs.tmpl0000640000076500007650000002007411406322151022273 0ustar katohkatoh#!/usr/bin/env ruby localdb = "sp" # database name from which homologues are collected # by locally installed blast. Leave this if you do # not use the '-l' option. mafftpath = "_BINDIR/mafft" # path of mafft. "/usr/local/bin/mafft" # if mafft is in your command path, "mafft" is ok. blastpath = "blastall" # path of blastall. # if blastall is in your command path, "blastall" is ok. # mafft-homologs.rb v. 2.1 aligns sequences together with homologues # automatically collected from SwissProt via NCBI BLAST. # # mafft > 5.58 is required # # Usage: # mafft-homologs.rb [options] input > output # Options: # -a # the number of collected sequences (default: 50) # -e # threshold value (default: 1e-10) # -o "xxx" options for mafft # (default: " --op 1.53 --ep 0.123 --maxiterate 1000") # -l locally carries out blast searches instead of NCBI blast # (requires locally installed blast and a database) # -f outputs collected homologues also (default: off) # -w entire sequences are subjected to BLAST search # (default: well-aligned region only) require 'getopts' require 'tempfile' # mktemp GC.disable temp_vf = Tempfile.new("_vf").path temp_if = Tempfile.new("_if").path temp_pf = Tempfile.new("_pf").path temp_af = Tempfile.new("_af").path temp_qf = Tempfile.new("_qf").path temp_bf = Tempfile.new("_bf").path temp_rid = Tempfile.new("_rid").path temp_res = Tempfile.new("_res").path system( mafftpath + " --help > #{temp_vf} 2>&1" ) pfp = File.open( "#{temp_vf}", 'r' ) while pfp.gets break if $_ =~ /MAFFT v/ end pfp.close if( $_ ) then mafftversion = sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s else mafftversion = "0" end if( mafftversion < "5.58" ) then puts "" puts "======================================================" puts "Install new mafft (v. >= 5.58)" puts "======================================================" puts "" exit end srand ( 0 ) def readfasta( fp, name, seq ) nseq = 0 tmpseq = "" while fp.gets if $_ =~ /^>/ then name.push( $_.sub(/>/,"").strip ) seq.push( tmpseq ) if nseq > 0 nseq += 1 tmpseq = "" else tmpseq += $_.strip end end seq.push( tmpseq ) return nseq end nadd = 50 eval = 1e-10 local = 0 fullout = 0 entiresearch = 0 corewin = 50 corethr = 0.3 mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder " if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file" exit end if $OPT_c then corewin = $OPT_c.to_i end if $OPT_d then corethr = $OPT_d.to_f end if $OPT_w entiresearch = 1 end if $OPT_f fullout = 1 end if $OPT_s fullout = 0 end if $OPT_l local = 1 end if $OPT_e then eval = $OPT_e.to_f end if $OPT_a then nadd = $OPT_a.to_i end if $OPT_o then mafftopt += " " + $OPT_o + " " end system "cat " + ARGV.to_s + " > #{temp_if}" ar = mafftopt.split(" ") nar = ar.length for i in 0..(nar-1) if ar[i] == "--seed" then system "cat #{ar[i+1]} >> #{temp_if}" end end nseq = 0 ifp = File.open( "#{temp_if}", 'r' ) while ifp.gets nseq += 1 if $_ =~ /^>/ end ifp.close if nseq >= 100 then STDERR.puts "The number of input sequences must be <100." exit elsif nseq == 1 then system( "cp #{temp_if}" + " #{temp_pf}" ) else STDERR.puts "Performing preliminary alignment .. " if entiresearch == 1 then # system( mafftpath + " --maxiterate 1000 --localpair #{temp_if} > #{temp_pf}" ) system( mafftpath + " --maxiterate 0 --retree 2 #{temp_if} > #{temp_pf}" ) else system( mafftpath + " --maxiterate 1000 --localpair --core --coreext --corethr #{corethr.to_s} --corewin #{corewin.to_s} #{temp_if} > #{temp_pf}" ) end end pfp = File.open( "#{temp_pf}", 'r' ) inname = [] inseq = [] slen = [] act = [] nin = 0 nin = readfasta( pfp, inname, inseq ) for i in 0..(nin-1) slen.push( inseq[i].gsub(/-/,"").length ) act.push( 1 ) end pfp.close pfp = File.open( "#{temp_if}", 'r' ) orname = [] orseq = [] nin = 0 nin = readfasta( pfp, orname, orseq ) pfp.close allen = inseq[0].length for i in 0..(nin-2) for j in (i+1)..(nin-1) next if act[i] == 0 next if act[j] == 0 pid = 0.0 total = 0 for a in 0..(allen-1) next if inseq[i][a,1] == "-" || inseq[j][a,1] == "-" total += 1 pid += 1.0 if inseq[i][a,1] == inseq[j][a,1] end pid /= total # puts "#{i.to_s}, #{j.to_s}, #{pid.to_s}" if pid > 0.5 then if slen[i] < slen[j] act[i] = 0 else act[j] = 0 end end end end #p act afp = File.open( "#{temp_af}", 'w' ) STDERR.puts "Searching .. \n" ids = [] add = [] sco = [] for i in 0..(nin-1) inseq[i].gsub!(/-/,"") afp.puts ">" + orname[i] afp.puts orseq[i] # afp.puts ">" + inname[i] # afp.puts inseq[i] STDERR.puts "Query (#{i+1}/#{nin})\n" + inname[i] if act[i] == 0 then STDERR.puts "Skip.\n\n" next end if local == 0 then command = "lynx -source 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}" system command ridp = File.open( "#{temp_rid}", 'r' ) while ridp.gets break if $_ =~ / RID = (.*)/ end ridp.close rid = $1.strip STDERR.puts "Submitted to NCBI. rid = " + rid STDERR.printf "Waiting " while 1 STDERR.printf "." sleep 10 command = "lynx -source 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}" system command resp = File.open( "#{temp_res}", 'r' ) # resp.gets # if $_ =~ /WAITING/ then # resp.close # next # end while( resp.gets ) break if $_ =~ /QBlastInfoBegin/ end resp.gets if $_ =~ /WAITING/ then resp.close next else resp.close break end end else # puts "Not supported" # exit qfp = File.open( "#{temp_qf}", 'w' ) qfp.puts "> " qfp.puts inseq[i] qfp.close command = blastpath + " -p blastp -e #{eval} -b 1000 -m 7 -i #{temp_qf} -d #{localdb} > #{temp_res}" system command resp = File.open( "#{temp_res}", 'r' ) end STDERR.puts " Done.\n\n" resp = File.open( "#{temp_res}", 'r' ) while 1 while resp.gets break if $_ =~ /(.*)<\/Hit_id>/ || $_ =~ /()/ end id = $1 break if $_ =~ // # p id while resp.gets break if $_ =~ /(.*)<\/Hsp_bit-score>/ end score = $1.to_f # p score known = ids.index( id ) if known != nil then if sco[known] >= score then next else ids.delete_at( known ) add.delete_at( known ) sco.delete_at( known ) end end while resp.gets break if $_ =~ /(.*)<\/Hsp_hseq>/ end # break if $1 == nil target = $1.sub( /-/, "" ).sub( /U/, "X" ) # p target # STDERR.puts "adding 1 seq" ids.push( id ) sco.push( score ) add.push( target ) end resp.close end n = ids.length outnum = 0 while n > 0 && outnum < nadd m = rand( n ) afp.puts ">_addedbymaffte_" + ids[m] afp.puts add[m] ids.delete_at( m ) add.delete_at( m ) n -= 1 outnum += 1 end afp.close STDERR.puts "Performing alignment .. " system( mafftpath + mafftopt + " #{temp_af} > #{temp_bf}" ) STDERR.puts "done." bfp = File.open( "#{temp_bf}", 'r' ) outseq = [] outnam = [] readfasta( bfp, outnam, outseq ) bfp.close outseq2 = [] outnam2 = [] len = outseq.length for i in 0..(len-1) # p outnam[i] if fullout == 0 && outnam[i] =~ /_addedbymaffte_/ then next end outseq2.push( outseq[i] ) outnam2.push( outnam[i].sub( /_addedbymaffte_/, "_ho_" ) ) end nout = outseq2.length len = outseq[0].length p = len while p>0 p -= 1 allgap = 1 for j in 0..(nout-1) if outseq2[j][p,1] != "-" then allgap = 0 break end end if allgap == 1 then for j in 0..(nout-1) outseq2[j][p,1] = "" end end end for i in 0..(nout-1) puts ">" + outnam2[i] puts outseq2[i].gsub( /.{1,60}/, "\\0\n" ) end system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" ) mafft-7.123-without-extensions/core/makedirectionlist.c0000640000076500007650000004263612176060374022354 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define END_OF_VEC -1 int nadd; float thresholdtorev; int dodp; int addfragment; typedef struct _thread_arg { int iend; char **seq; char *tmpseq; int *res; int **spointt; short *table1; int iq; #ifdef enablemultithread int *jshare; int thread_no; pthread_mutex_t *mutex_counter; #endif } thread_arg_t; void arguments( int argc, char *argv[] ) { int c; nthread = 1; inputfile = NULL; nadd = 0; dodp = 0; alg = 'a'; alg = 'm'; dorp = NOTSPECIFIED; fmodel = 0; // ppenalty = (int)( -2.0 * 1000 - 0.5 ); // ppenalty_ex = (int)( -0.1 * 1000 - 0.5 ); // poffset = (int)( 0.1 * 1000 - 0.5 ); ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = 2; pamN = 200; thresholdtorev = 0.1; addfragment = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 't': thresholdtorev = atof( *++argv ); fprintf( stderr, "thresholdtorev = %f\n", thresholdtorev ); --argc; goto nextoption; case 'd': dodp = 1; break; case 'F': addfragment = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'S': alg = 'S'; break; case 'M': alg = 'M'; break; case 'm': alg = 'm'; break; case 'G': alg = 'G'; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } static int maxl; static int tsize; void seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else // fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); ; } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else // fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); ; } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } static int commonsextet_p2( short *table, int *pointt ) { int value = 0; short tmp; int point; short *memo; int *ct; int *cp; if( *pointt == -1 ) return( 0 ); memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!! if( !ct ) ErrorExit( "Cannot allocate memo\n" ); cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; free( memo ); free( ct ); return( value ); } static void *directionthread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int iend = targ->iend; char **seq = targ->seq; char *tmpseq = targ->tmpseq; int *res = targ->res; int **spointt = targ->spointt; short *table1 = targ->table1; int iq = targ->iq; #ifdef enablemultithread int thread_no = targ->thread_no; int *jshare = targ->jshare; #endif int j; char **mseq1, **mseq2; if( dodp ) // nakuserukamo { mseq1 = AllocateCharMtx( 1, 0 ); mseq2 = AllocateCharMtx( 1, 0 ); } j = -1; while( 1 ) { #ifdef enablemultithread if( nthread ) { pthread_mutex_lock( targ->mutex_counter ); j = *jshare; if( j == iend ) { fprintf( stderr, "\r %d / %d (thread %d) \r", iq, njob, thread_no ); pthread_mutex_unlock( targ->mutex_counter ); break; } ++(*jshare); pthread_mutex_unlock( targ->mutex_counter ); } else #endif { j++; if( j == iend ) { fprintf( stderr, "\r %d / %d \r", iq, njob ); break; } } if( dodp ) { // strcpy( mseq1[0], tmpseq ); // strcpy( mseq2[0], seq[j] ); mseq1[0] = tmpseq; mseq2[0] = seq[j]; // res[j] = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, 0 ); res[j] = L__align11_noalign( mseq1, mseq2 ); } else res[j] = commonsextet_p2( table1, spointt[j] ); } if( dodp ) // nakuserukamo { free( mseq1 ); free( mseq2 ); // G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11_noalign( NULL, NULL ); } // else // if( nthread ) // inthread == 0 no toki free suru to, error. nazeda // commonsextet_p( NULL, NULL ); return( NULL ); } int main( int argc, char *argv[] ) { static int *nlen; static int *nogaplen; static char **name, **seq; int i, j, istart, iend; FILE *infp; // FILE *adfp; char c; int *grpseq; char *tmpseq, *revseq; int **pointt, **pointt_rev, **spointt; float res_forward, res_reverse, res_max; int ires, mres, mres2; int *res; static short *table1, *table1_rev; static char **mseq1f, **mseq1r, **mseq2; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( alg == 'a' ) { if( nlenmax < 10000 ) alg = 'G'; else alg = 'S'; } seq = AllocateCharMtx( njob, nlenmax*1+1 ); #if 0 Read( name, nlen, seq ); readData( infp, name, nlen, seq ); #else name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); nogaplen = AllocateIntVec( njob ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); if( dorp != 'd' ) { fprintf( stderr, "Not necessary!\n" ); for( i=0; i thresholdtorev ) // tekitou { // fprintf( stderr, "REVERSE!!!\n" ); sreverse( seq[i], mseq2[0] ); strcpy( tmpseq, name[i] ); strcpy( name[i], "_R_" ); strncpy( name[i]+3, tmpseq+1, 10 ); name[i][13] = 0; } else { strcpy( seq[i], mseq2[0] ); strcpy( tmpseq, name[i] ); strcpy( name[i], "_F_" ); strncpy( name[i]+3, tmpseq+1, 10 ); name[i][13] = 0; } } FreeCharMtx( mseq1f ); FreeCharMtx( mseq1r ); FreeCharMtx( mseq2 ); free( tmpseq ); } else if( alg == 'm' ) { if( dodp ) // nakuserukamo { mseq1f = AllocateCharMtx( 1, nlenmax+1); mseq1r = AllocateCharMtx( 1, nlenmax+1 ); mseq2 = AllocateCharMtx( 1, nlenmax+1 ); } else { spointt = AllocateIntMtx( njob, 0 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); pointt_rev = AllocateIntMtx( njob, nlenmax+1 ); } tmpseq = AllocateCharVec( MAX( nlenmax, B ) +1 ); revseq = AllocateCharVec( nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); res = AllocateIntVec( njob ); if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); else tsize = (int)pow( 6, 6 ); // iranai maxl = 0; for( i=0; i maxl ) maxl = nogaplen[i]; } if( nadd ) iend = njob - nadd; else iend = 1; for( i=0; imres2 ) { if( ires>mres ) { mres2 = mres; mres = ires; } else mres2 = ires; } } res_forward = (float)( mres + mres2 ) / 2; #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex_counter; thread_arg_t *targ; int *jsharept; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); jsharept = calloc( 1, sizeof(int) ); *jsharept = 0; for( j=0; jmres2 ) { if( ires>mres ) { mres2 = mres; mres = ires; } else mres2 = ires; } } res_reverse = (float)( mres + mres2 ) / 2; // fprintf( stdout, "\n" ); // fprintf( stdout, "score_for(%d,%d) = %f\n", 0, i, res_forward ); // fprintf( stdout, "score_rev(%d,%d) = %f\n", 0, i, res_reverse ); // fflush( stdout ); res_max = MAX(res_reverse,res_forward); if( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou { strcpy( seq[i], revseq ); strcpy( tmpseq, name[i] ); strcpy( name[i], "_R_" ); strncpy( name[i]+3, tmpseq+1, 10 ); name[i][13] = 0; if( !dodp ) spointt[i] = pointt_rev[i]; } else { strcpy( tmpseq, name[i] ); strcpy( name[i], "_F_" ); strncpy( name[i]+3, tmpseq+1, 10 ); name[i][13] = 0; if( !dodp ) spointt[i] = pointt[i]; } if( !dodp ) { free( table1 ); free( table1_rev ); } } free( grpseq ); free( tmpseq ); free( revseq ); free( res ); if( dodp ) { FreeCharMtx( mseq1f ); FreeCharMtx( mseq1r ); FreeCharMtx( mseq2 ); } else { FreeIntMtx( pointt ); FreeIntMtx( pointt_rev ); free( spointt ); } } else { fprintf( stderr, "Unknown alg %c\n", alg ); exit( 1 ); } // writeData_pointer( stdout, njob, name, nlen, seq ); for( i=0; i%s\n", name[i] ); // fprintf( stdout, "%s\n", seq[i] ); fprintf( stdout, "%s\n", name[i] ); } fprintf( stderr, "\n" ); SHOWVERSION; return( 0 ); } mafft-7.123-without-extensions/core/fft.c0000640000076500007650000000736212116333276017414 0ustar katohkatoh#include "mltaln.h" #include "mtxutl.h" /* from "C gengo niyoru saishin algorithm jiten" ISBN4-87408-414-1 Haruhiko Okumura */ static void make_sintbl(int n, float sintbl[]) { int i, n2, n4, n8; double c, s, dc, ds, t; n2 = n / 2; n4 = n / 4; n8 = n / 8; t = sin(PI / n); dc = 2 * t * t; ds = sqrt(dc * (2 - dc)); t = 2 * dc; c = sintbl[n4] = 1; s = sintbl[0] = 0; for (i = 1; i < n8; i++) { c -= dc; dc += t * c; s += ds; ds -= t * s; sintbl[i] = s; sintbl[n4 - i] = c; } if (n8 != 0) sintbl[n8] = sqrt(0.5); for (i = 0; i < n4; i++) sintbl[n2 - i] = sintbl[i]; for (i = 0; i < n2 + n4; i++) sintbl[i + n2] = - sintbl[i]; } /* {\tt fft()}. */ static void make_bitrev(int n, int bitrev[]) { int i, j, k, n2; n2 = n / 2; i = j = 0; for ( ; ; ) { bitrev[i] = j; if (++i >= n) break; k = n2; while (k <= j) { j -= k; k /= 2; } j += k; } } /* */ int fft(int n, Fukusosuu *x, int freeflag) { static TLS int last_n = 0; /* {\tt n} */ static TLS int *bitrev = NULL; /* */ static TLS float *sintbl = NULL; /* */ int i, j, k, ik, h, d, k2, n4, inverse; float t, s, c, dR, dI; if (freeflag) { if (bitrev) free(bitrev); bitrev = NULL; if (sintbl) free(sintbl); sintbl = NULL; return( 0 ); } /* */ if (n < 0) { n = -n; inverse = 1; /* */ } else inverse = 0; n4 = n / 4; if (n != last_n || n == 0) { last_n = n; #if 0 if (sintbl != NULL) { free(sintbl); sintbl = NULL; } if (bitrev != NULL) { free(bitrev); bitrev = NULL; } if (n == 0) return 0; /* */ sintbl = (float *)malloc((n + n4) * sizeof(float)); bitrev = (int *)malloc(n * sizeof(int)); #else /* by T. Nishiyama */ sintbl = realloc(sintbl, (n + n4) * sizeof(float)); bitrev = realloc(bitrev, n * sizeof(int)); #endif if (sintbl == NULL || bitrev == NULL) { fprintf(stderr, "\n"); return 1; } make_sintbl(n, sintbl); make_bitrev(n, bitrev); } for (i = 0; i < n; i++) { /* */ j = bitrev[i]; if (i < j) { t = x[i].R; x[i].R = x[j].R; x[j].R = t; t = x[i].I; x[i].I = x[j].I; x[j].I = t; } } for (k = 1; k < n; k = k2) { /* */ #if 0 fprintf( stderr, "%d / %d\n", k, n ); #endif h = 0; k2 = k + k; d = n / k2; for (j = 0; j < k; j++) { #if 0 if( j % 1 == 0 ) fprintf( stderr, "%d / %d\r", j, k ); #endif c = sintbl[h + n4]; if (inverse) s = - sintbl[h]; else s = sintbl[h]; for (i = j; i < n; i += k2) { #if 0 if( k>=4194000 ) fprintf( stderr, "in loop %d - %d < %d, k2=%d\r", j, i, n, k2 ); #endif ik = i + k; dR = s * x[ik].I + c * x[ik].R; dI = c * x[ik].I - s * x[ik].R; x[ik].R = x[i].R - dR; x[i].R += dR; x[ik].I = x[i].I - dI; x[i].I += dI; } h += d; } } if (! inverse) /* n */ for (i = 0; i < n; i++) { x[i].R /= n; x[i].I /= n; } return 0; /* */ } mafft-7.123-without-extensions/core/dndfast4.c0000640000076500007650000001216010510056751020330 0ustar katohkatoh#include "mltaln.h" #include #include #define DEBUG 0 #define TEST 0 int howmanyx( char *s ) { int val = 0; if( scoremtx == -1 ) { do { if( !strchr( "atgcuATGCU", *s ) ) val++; } while( *++s ); } else { do { if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; } while( *++s ); } return( val ); } void arguments( int argc, char *argv[] ) { int c; disopt = 0; while( --argc > 0 && (*++argv)[0] == '-' ) while ( c = *++argv[0] ) switch( c ) { case 'i': disopt = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { int ktuple; int i, j; FILE *hat2p; char **seq; char **seq1; static char name[M][B]; static char name1[M][B]; static int nlen1[M]; double **mtx; double **mtx2; static int nlen[M]; char b[B]; double max; char com[B]; int opt[M]; int res; char *home; char queryfile[B]; char datafile[B]; char fastafile[B]; char hat2file[B]; int pid = (int)getpid(); #if 0 home = getenv( "HOME" ); #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ home = NULL; #endif #if DEBUG if( home ) fprintf( stderr, "home = %s\n", home ); #endif if( !home ) home = ""; sprintf( queryfile, "%s/tmp/query-%d\0", home, pid ); sprintf( datafile, "%s/tmp/data-%d\0", home, pid ); sprintf( fastafile, "%s/tmp/fasta-%d\0", home, pid ); sprintf( hat2file, "hat2-%d\0", pid ); arguments( argc, argv ); #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( stdin ); #endif rewind( stdin ); seq = AllocateCharMtx( njob, nlenmax+1 ); seq1 = AllocateCharMtx( 2, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); #if 0 FRead( stdin, name, nlen, seq ); #else readData( stdin, name, nlen, seq ); #endif if( scoremtx == -1 ) ktuple = 6; else ktuple = 1; for( i=0; i %s\0", M, M, 0, queryfile, datafile, ktuple, fastafile ); else sprintf( com, "fasta3 -Q -h -b%d -E%d -d%d %s %s %d > %s\0", M, M, 0, queryfile, datafile, ktuple, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); ReadFasta3( hat2p, mtx[i], njob-i, name1 ); if( i == 0 ) for( j=0; j %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); } } } for( i=0; i -1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); } static float Atracking( float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc ) { int i, j, k, l, iin, jin, ifi, jfi, lgth1, lgth2; // char gap[] = "-"; char *gap; float wm; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if DEBUG for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float x; static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *currentw; static TLS float *previousw; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS float **cpmx1; static TLS float **cpmx2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; #if DEBUG fprintf( stderr, "eff in SA+++align\n" ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( currentw ); FreeFloatVec( previousw ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.1*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.1*lgth2), orlgth2 ) + 100; fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); currentw = AllocateFloatVec( ll2+2 ); previousw = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); fprintf( stderr, "succeeded\n" ); orlgth1 = ll1; orlgth2 = ll2; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); commonIP = AllocateIntMtx( ll1+10, ll2+10 ); fprintf( stderr, "succeeded\n\n" ); commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; cpmx_calc( seq1, cpmx1, eff1, strlen( seq1[0] ), icyc ); cpmx_calc( seq2, cpmx2, eff2, strlen( seq2[0] ), jcyc ); match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, floatwork, intwork, 1 ); match_calc( currentw, cpmx1, cpmx2, 0, lgth2, floatwork, intwork, 1 ); if( outgap == 1 ) { for( i=1; i wm ) { wm = x; ijp[i][j] = -( j - mpi ); } g = penalty * 0.5; x = previousw[j-1] + g; if( mi <= x ) { mi = x; mpi = j-1; } g = penalty * 0.5; x = m[j] + g; if( x > wm ) { wm = x; ijp[i][j] = +( i - mp[j] ); } g = penalty * 0.5; x = previousw[j-1] + g; if( m[j] <= x ) { m[j] = x; mp[j] = i-1; } currentw[j] += wm; } lastverticalw[i] = currentw[lgth2-1]; } /* fprintf( stderr, "\n" ); for( i=0; i" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i .\" Generator: DocBook XSL Stylesheets v1.72.0 .\" Date: 2007-08-14 .\" Manual: Mafft Manual .\" Source: mafft-homologs 2.1 .\" .TH "MAFFT\-HOMOLOGS" "1" "2007\-06\-09" "mafft\-homologs 2.1" "Mafft Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" .RS 0 mafft\-homologs \- aligns sequences together with homologues automatically collected from SwissProt via NCBI BLAST .RE .SH "SYNOPSIS" .RS 0 \fBmafft\-homologs\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] .RE .SH "DESCRIPTION" .RS 0 The accuracy of an alignment of a few distantly related sequences is considerably improved when being aligned together with their close homologs. The reason for the improvement is probably the same as that for PSI\-BLAST. That is, the positions of highly conserved residues, those with many gaps and other additional information is brought by close homologs. According to Katoh et al. (2005), the improvement by adding close homologs is 10% or so, which is comparable to the improvement by incorporating structural information of a pair of sequences. Mafft\-homologs in a mafft server works like this: .sp .RS 4 \h'-04' 1.\h'+02'Collect a number (50 by default) of close homologs (E=1e\-10 by default) of the input sequences. .RE .sp .RS 4 \h'-04' 2.\h'+02'Align the input sequences and homologs all together using the L\-INS\-i strategy. .RE .sp .RS 4 \h'-04' 3.\h'+02'Remove the homologs. .RE .RE .SH "OPTIONS" .RS 0 .PP \fB\-a\fR \fI\fIn\fR\fR .RS 4 The number of collected sequences (default: 50). .RE .PP \fB\-e\fR \fI\fIn\fR\fR .RS 4 Threshold value (default: 1e\-10). .RE .PP \fB\-o\fR \fI\fIxxx\fR\fR .RS 4 Options for mafft (default: " \-\-op 1.53 \-\-ep 0.123 \-\-maxiterate 1000 --localpair --reorder"). .RE .PP \fB\-l\fR .RS 4 Locally carries out BLAST searches instead of NCBI BLAST (requires locally installed BLAST and a database). .RE .PP \fB\-f\fR .RS 4 Outputs collected homologues also (default: off). .RE .PP \fB\-w\fR .RS 4 entire sequences are subjected to BLAST search (default: well\-aligned region only) .RE .RE .SH "REQUIREMENTS" .RS 0 .PP MAFFT version > 5.58. .PP Either of .RS 4 .PP lynx (when remote BLAST server is used) .PP BLAST and a protein sequence database (when local BLAST is used) .RE .RE .SH "REFERENCES" .RS 0 .PP Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment. .RE .SH "SEE ALSO" .RS 0 .PP \fBmafft\fR(1) .RE .SH "AUTHORS" .RS 0 .PP \fBKazutaka Katoh\fR <\&katoh_at_bioreg.kyushu\-u.ac.jp.\&> .sp -1n .IP "" 4 Wrote Mafft. .PP \fBCharles Plessy\fR <\&charles\-debian\-nospam@plessy.org\&> .sp -1n .IP "" 4 Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. .RE .SH "COPYRIGHT" .RS 0 Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) .br Copyright \(co 2007 Charles Plessy (this manpage) .br .PP Mafft and its manpage are offered under the following conditions: .PP Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: .sp .RS 4 \h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. .RE .sp .RS 4 \h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. .RE .sp .RS 4 \h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. .RE .PP THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .br .RE mafft-7.123-without-extensions/core/pairlocalalign.c0000640000076500007650000022200112176060536021605 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define NODIST -9999 static char *whereispairalign; static char *laraparams; static char foldalignopt[1000]; static int stdout_align; static int stdout_dist; static int store_localhom; static int store_dist; static int nadd; static int laste; static int lastm; static int lastsubopt; static int lastonce; typedef struct _lastres { int score; int start1; int start2; char *aln1; char *aln2; } Lastres; typedef struct _reg { int start; int end; } Reg; typedef struct _aln { int nreg; Reg *reg1; Reg *reg2; } Aln; typedef struct _lastresx { int score; int naln; Aln *aln; } Lastresx; #ifdef enablemultithread typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _thread_arg { int thread_no; int njob; Jobtable *jobpospt; char **name; char **seq; char **dseq; int *thereisxineachseq; LocalHom **localhomtable; double **distancemtx; double *selfscore; char ***bpp; Lastresx **lastresx; int alloclen; pthread_mutex_t *mutex_counter; pthread_mutex_t *mutex_stdout; } thread_arg_t; #endif typedef struct _lastcallthread_arg { int nq, nd; char **dseq; char **qseq; Lastresx **lastresx; #ifdef enablemultithread int thread_no; int *kshare; pthread_mutex_t *mutex; #endif } lastcallthread_arg_t; static void t2u( char *seq ) { while( *seq ) { if ( *seq == 'A' ) *seq = 'a'; else if( *seq == 'a' ) *seq = 'a'; else if( *seq == 'T' ) *seq = 'u'; else if( *seq == 't' ) *seq = 'u'; else if( *seq == 'U' ) *seq = 'u'; else if( *seq == 'u' ) *seq = 'u'; else if( *seq == 'G' ) *seq = 'g'; else if( *seq == 'g' ) *seq = 'g'; else if( *seq == 'C' ) *seq = 'c'; else if( *seq == 'c' ) *seq = 'c'; else *seq = 'n'; seq++; } } static int removex( char *d, char *m ) { int val = 0; while( *m != 0 ) { if( *m == 'X' || *m == 'x' ) { m++; val++; } else { *d++ = *m++; } } *d = 0; return( val ); } static void putlocalhom_last( char *s1, char *s2, LocalHom *localhompt, Lastresx *lastresx ) { char *pt1, *pt2; int naln, nreg; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; LocalHom *tmppt2; LocalHom *localhompt0; Reg *rpt1, *rpt2; Aln *apt; int nlocalhom = 0; int len; // fprintf( stderr, "s1=%s\n", s1 ); // fprintf( stderr, "s2=%s\n", s2 ); naln = lastresx->naln; apt = lastresx->aln; if( naln == 0 ) return; while( naln-- ) { rpt1 = apt->reg1; rpt2 = apt->reg2; nreg = apt->nreg; isumscore = 0; sumoverlap = 0; while( nreg-- ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = rpt1->start; tmppt->start2 = rpt2->start; tmppt->end1 = rpt1->end; tmppt->end2 = rpt2->end; if( rpt1 == apt->reg1 ) localhompt0 = tmppt; // ? // fprintf( stderr, "in putlocalhom, reg1: %d-%d (nreg=%d)\n", rpt1->start, rpt1->end, lastresx->nreg ); // fprintf( stderr, "in putlocalhom, reg2: %d-%d (nreg=%d)\n", rpt2->start, rpt2->end, lastresx->nreg ); len = tmppt->end1 - tmppt->start1 + 1; // fprintf( stderr, "tmppt->start1=%d\n", tmppt->start1 ); // fprintf( stderr, "tmppt->start2=%d\n", tmppt->start2 ); // fprintf( stderr, "s1+tmppt->start1=%*.*s\n", len, len, s1+tmppt->start1 ); // fprintf( stderr, "s2+tmppt->start2=%*.*s\n", len, len, s2+tmppt->start2 ); pt1 = s1 + tmppt->start1; pt2 = s2 + tmppt->start2; iscore = 0; while( len-- ) { iscore += n_dis[(int)amino_n[(int)*pt1++]][(int)amino_n[(int)*pt2++]]; // - offset $B$O$$$i$J$$$+$b(B // fprintf( stderr, "len=%d, %c-%c, iscore(0) = %d\n", len, *(pt1-1), *(pt2-1), iscore ); } if( divpairscore ) { tmppt->overlapaa = tmppt->end2-tmppt->start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += tmppt->end2-tmppt->start2+1; } rpt1++; rpt2++; } #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif if( !divpairscore ) { for( tmppt2=localhompt0; tmppt2; tmppt2=tmppt2->next ) { tmppt2->overlapaa = sumoverlap; tmppt2->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } apt++; } } static int countcomma( char *s ) { int v = 0; while( *s ) if( *s++ == ',' ) v++; return( v ); } static float recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen ) { static FILE *fp = NULL; float value; char *aln1; char *aln2; int of1tmp, of2tmp; if( fp == NULL ) { fp = fopen( "_foldalignout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _foldalignout\n" ); exit( 1 ); } } aln1 = calloc( alloclen, sizeof( char ) ); aln2 = calloc( alloclen, sizeof( char ) ); readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen ); if( strstr( foldalignopt, "-global") ) { fprintf( stderr, "Calling G__align11\n" ); value = G__align11( mseq1, mseq2, alloclen, outgap, outgap ); *of1pt = 0; *of2pt = 0; } else { fprintf( stderr, "Calling L__align11\n" ); value = L__align11( mseq1, mseq2, alloclen, of1pt, of2pt ); } // value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame if( aln1[0] == 0 ) { fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 ); } else { strcpy( *mseq1, aln1 ); strcpy( *mseq2, aln2 ); *of1pt = of1tmp; *of2pt = of2tmp; } // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara. // fclose( fp ); // saigo dake yatta houga yoi. // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); free( aln1 ); free( aln2 ); return( value ); } static void block2reg( char *block, Reg *reg1, Reg *reg2, int start1, int start2 ) { Reg *rpt1, *rpt2; char *tpt, *npt; int pos1, pos2; int len, glen1, glen2; pos1 = start1; pos2 = start2; rpt1 = reg1; rpt2 = reg2; while( block ) { block++; // fprintf( stderr, "block = %s\n", block ); tpt = strchr( block, ':' ); npt = strchr( block, ',' ); if( !tpt || tpt > npt ) { len = atoi( block ); reg1->start = pos1; reg2->start = pos2; pos1 += len - 1; pos2 += len - 1; reg1->end = pos1; reg2->end = pos2; // fprintf( stderr, "in loop reg1: %d-%d\n", reg1->start, reg1->end ); // fprintf( stderr, "in loop reg2: %d-%d\n", reg2->start, reg2->end ); reg1++; reg2++; } else { sscanf( block, "%d:%d", &glen1, &glen2 ); pos1 += glen1 + 1; pos2 += glen2 + 1; } block = npt; } reg1->start = reg1->end = reg2->start = reg2->end = -1; while( rpt1->start != -1 ) { // fprintf( stderr, "reg1: %d-%d\n", rpt1->start, rpt1->end ); // fprintf( stderr, "reg2: %d-%d\n", rpt2->start, rpt2->end ); rpt1++; rpt2++; } // *apt1 = *apt2 = 0; // fprintf( stderr, "aln1 = %s\n", aln1 ); // fprintf( stderr, "aln2 = %s\n", aln2 ); } static void readlastresx_singleq( FILE *fp, int n1, int nameq, Lastresx **lastresx ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; if( name2 != nameq ) { fprintf( stderr, "BUG!!!\n" ); exit( 1 ); } // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 1 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 1 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 1 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 1 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #ifdef enablemultithread #if 0 static void readlastresx_group( FILE *fp, Lastresx **lastresx ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 3 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #endif #endif static void readlastresx( FILE *fp, int n1, int n2, Lastresx **lastresx, char **seq1, char **seq2 ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 3 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #ifdef enablemultithread #if 0 static void *lastcallthread_group( void *arg ) { lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg; int k, i; int nq = targ->nq; int nd = targ->nd; #ifdef enablemultithread int thread_no = targ->thread_no; int *kshare = targ->kshare; #endif Lastresx **lastresx = targ->lastresx; char **dseq = targ->dseq; char **qseq = targ->qseq; char command[5000]; FILE *lfp; int msize; int klim; int qstart, qend, shou, amari; char kd[1000]; if( nthread ) { shou = nq / nthread; amari = nq - shou * nthread; fprintf( stderr, "shou: %d, amari: %d\n", shou, amari ); qstart = thread_no * shou; if( thread_no - 1 < amari ) qstart += thread_no; else qstart += amari; qend = qstart + shou - 1; if( thread_no < amari ) qend += 1; fprintf( stderr, "%d: %d-%d\n", thread_no, qstart, qend ); } k = -1; while( 1 ) { if( nthread ) { if( qstart > qend ) break; if( k == thread_no ) break; fprintf( stderr, "\n%d-%d / %d (thread %d) \n", qstart, qend, nq, thread_no ); k = thread_no; } else { k++; if( k == nq ) break; fprintf( stderr, "\r%d / %d \r", k, nq ); } if( alg == 'R' ) // if 'r' -> calllast_fast { fprintf( stderr, "Not supported\n" ); exit( 1 ); } else // 'r' { kd[0] = 0; } sprintf( command, "_q%d", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open %s", command ); exit( 1 ); } for( i=qstart; i<=qend; i++ ) fprintf( lfp, ">%d\n%s\n", i, qseq[i] ); fclose( lfp ); // if( alg == 'R' ) msize = MAX(10,k+nq); // else msize = MAX(10,nd+nq); if( alg == 'R' ) msize = MAX(10,k*lastm); else msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k ); // sprintf( command, "grep '>' _db%sd", kd ); // system( command ); sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k ); if( system( command ) ) exit( 1 ); sprintf( command, "_lastres%d", k ); lfp = fopen( command, "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres%d", k ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); // fprintf( stderr, "Reading lastres\n" ); readlastresx_group( lfp, lastresx ); fclose( lfp ); } return( NULL ); } #endif #endif static void *lastcallthread( void *arg ) { lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg; int k, i; int nq = targ->nq; int nd = targ->nd; #ifdef enablemultithread int thread_no = targ->thread_no; int *kshare = targ->kshare; #endif Lastresx **lastresx = targ->lastresx; char **dseq = targ->dseq; char **qseq = targ->qseq; char command[5000]; FILE *lfp; int msize; int klim; char kd[1000]; k = -1; while( 1 ) { #ifdef enablemultithread if( nthread ) { pthread_mutex_lock( targ->mutex ); k = *kshare; if( k == nq ) { pthread_mutex_unlock( targ->mutex ); break; } fprintf( stderr, "\r%d / %d (thread %d) \r", k, nq, thread_no ); ++(*kshare); pthread_mutex_unlock( targ->mutex ); } else #endif { k++; if( k == nq ) break; fprintf( stderr, "\r%d / %d \r", k, nq ); } if( alg == 'R' ) // if 'r' -> calllast_fast { klim = MIN( k, njob-nadd ); // klim = k; // dochira demo yoi if( klim == k ) { sprintf( command, "_db%dd", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _db." ); exit( 1 ); } for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); // sprintf( command, "md5sum _db%dd > /dev/tty", k ); // system( command ); if( dorp == 'd' ) sprintf( command, "%s/lastdb _db%dd _db%dd", whereispairalign, k, k ); else sprintf( command, "%s/lastdb -p _db%dd _db%dd", whereispairalign, k, k ); system( command ); sprintf( kd, "%d", k ); } else // calllast_fast de tsukutta nowo riyou { kd[0] = 0; // fprintf( stderr, "klim=%d, njob=%d, nadd=%d, skip!\n", klim, njob, nadd ); } } else // 'r' { kd[0] = 0; } sprintf( command, "_q%d", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open %s", command ); exit( 1 ); } fprintf( lfp, ">%d\n%s\n", k, qseq[k] ); fclose( lfp ); // if( alg == 'R' ) msize = MAX(10,k+nq); // else msize = MAX(10,nd+nq); if( alg == 'R' ) msize = MAX(10,k*lastm); else msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k ); // sprintf( command, "grep '>' _db%sd", kd ); // system( command ); sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k ); if( system( command ) ) exit( 1 ); sprintf( command, "_lastres%d", k ); lfp = fopen( command, "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres%d", k ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); // fprintf( stderr, "Reading lastres\n" ); readlastresx_singleq( lfp, nd, k, lastresx ); fclose( lfp ); } return( NULL ); } static void calllast_fast( int nd, char **dseq, int nq, char **qseq, Lastresx **lastresx ) { int i, j; FILE *lfp; char command[1000]; lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } if( dorp == 'd' ) { fprintf( lfp, " " ); for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<4; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } } else { fprintf( lfp, " " ); for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<20; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } } fclose( lfp ); // if( alg == 'r' ) // if 'R' -> lastcallthread, kokonoha nadd>0 no toki nomi shiyou { sprintf( command, "_dbd" ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _dbd" ); exit( 1 ); } if( alg == 'R' ) j = njob-nadd; else j = nd; for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); if( dorp == 'd' ) sprintf( command, "%s/lastdb _dbd _dbd", whereispairalign ); else sprintf( command, "%s/lastdb -p _dbd _dbd", whereispairalign ); system( command ); } #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; lastcallthread_arg_t *targ; int *ksharept; targ = (lastcallthread_arg_t *)calloc( nthread, sizeof( lastcallthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); ksharept = calloc( 1, sizeof(int) ); *ksharept = 0; pthread_mutex_init( &mutex, NULL ); for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); if( dorp == 'd' ) { sprintf( command, "%s/lastdb _db _db", whereispairalign ); system( command ); lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } fprintf( lfp, " " ); for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<4; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } fclose( lfp ); #if 0 sprintf( command, "lastex -s 2 -a %d -b %d -p _scoringmatrixforlast -E 10000 _db.prj _db.prj > _lastex", -penalty, -penalty_ex ); system( command ); lfp = fopen( "_lastex", "r" ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); laste = atoi( command ); fclose( lfp ); fprintf( stderr, "laste = %d\n", laste ); sleep( 10 ); #else // laste = 5000; #endif } else { sprintf( command, "%s/lastdb -p _db _db", whereispairalign ); system( command ); lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } fprintf( lfp, " " ); for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<20; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } fclose( lfp ); // fprintf( stderr, "Not written yet\n" ); } lfp = fopen( "_q", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _q" ); exit( 1 ); } for( i=0; i%d\n%s\n", i, qseq[i] ); } fclose( lfp ); msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from calllast_once, msize=%d\n", msize ); sprintf( command, "%s/lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", whereispairalign, msize, laste, -penalty, -penalty_ex ); // sprintf( command, "lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", 1, laste, -penalty, -penalty_ex ); // sprintf( command, "lastal -v -e 40 -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", -penalty, -penalty_ex ); res = system( command ); if( res ) { fprintf( stderr, "LAST aborted\n" ); exit( 1 ); } lfp = fopen( "_lastres", "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres" ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); fprintf( stderr, "Reading lastres\n" ); readlastresx( lfp, nd, nq, lastresx, dseq, qseq ); fclose( lfp ); } static void callfoldalign( int nseq, char **mseq ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); res = system( com ); if( res ) { fprintf( stderr, "Error in foldalign\n" ); exit( 1 ); } } static void calllara( int nseq, char **mseq, char *laraarg ) { FILE *fp; int i; int res; static char com[10000]; // for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); // fprintf( stderr, "calling LaRA\n" ); sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); res = system( com ); if( res ) { fprintf( stderr, "Error in lara\n" ); exit( 1 ); } } static float recalllara( char **mseq1, char **mseq2, int alloclen ) { static FILE *fp = NULL; static char *ungap1; static char *ungap2; static char *ori1; static char *ori2; // int res; static char com[10000]; float value; if( fp == NULL ) { fp = fopen( "_laraout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _laraout\n" ); exit( 1 ); } ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); myfgets( com, 9999, fp ); strcpy( *mseq1, com ); myfgets( com, 9999, fp ); strcpy( *mseq2, com ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); t2u( ori1 ); t2u( ori2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // fclose( fp ); // saigo dake yatta houga yoi. return( value ); } static float calldafs_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j ) { FILE *fp; int res; char *com; float value; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); sprintf( dirname, "_%d-%d", i, j ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); sprintf( com, "%s/_bpporg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname ); system( com ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); sprintf( com, "%s/_dafsinorg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_dafsinorg\n", dirname ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_dafsinorg > %s/_dafsin", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "_dafssh%s", dirname ); fp = fopen( com, "w" ); fprintf( fp, "cd %s\n", dirname ); fprintf( fp, "%s/dafs --mafft-in _bpp _dafsin > _dafsout 2>_dum\n", whereispairalign ); fprintf( fp, "exit $tatus\n" ); fclose( fp ); sprintf( com, "tr -d '\\r' < _dafssh%s > _dafssh%s.unix", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "sh _dafssh%s.unix 2>_dum%s", dirname, dirname ); res = system( com ); if( res ) { fprintf( stderr, "Error in dafs\n" ); exit( 1 ); } sprintf( com, "%s/_dafsout", dirname ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_dafsout\n", dirname ); exit( 1 ); } myfgets( com, 999, fp ); // nagai kanousei ga arunode fgets( com, 999, fp ); myfgets( com, 999, fp ); // nagai kanousei ga arunode fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); #if 0 sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); usleep( 2000 ); system( com ); } #endif free( dirname ); free( com ); return( value ); } static float callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j ) { FILE *fp; int res; char *com; float value; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); sprintf( dirname, "_%d-%d", i, j ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); sprintf( com, "%s/_bpporg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname ); system( com ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); sprintf( com, "%s/_mxscarnainorg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_mxscarnainorg\n", dirname ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_mxscarnainorg > %s/_mxscarnain", dirname, dirname ); system( com ); // for cygwin, wakaran #if 0 sprintf( com, "cd %s; %s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", dirname, whereispairalign ); #else sprintf( com, "_mxscarnash%s", dirname ); fp = fopen( com, "w" ); fprintf( fp, "cd %s\n", dirname ); fprintf( fp, "%s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum\n", whereispairalign ); fprintf( fp, "exit $tatus\n" ); fclose( fp ); //sleep( 10000 ); sprintf( com, "tr -d '\\r' < _mxscarnash%s > _mxscarnash%s.unix", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "sh _mxscarnash%s.unix 2>_dum%s", dirname, dirname ); #endif res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } sprintf( com, "%s/_mxscarnaout", dirname ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_mxscarnaout\n", dirname ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); #if 0 sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); usleep( 2000 ); system( com ); } #endif free( dirname ); free( com ); return( value ); } static void readhat4( FILE *fp, char ***bpp ) { char oneline[1000]; int bppsize; int onechar; // double prob; // int posi, posj; bppsize = 0; // fprintf( stderr, "reading hat4\n" ); onechar = getc(fp); // fprintf( stderr, "onechar = %c\n", onechar ); if( onechar != '>' ) { fprintf( stderr, "Format error\n" ); exit( 1 ); } ungetc( onechar, fp ); fgets( oneline, 999, fp ); while( 1 ) { onechar = getc(fp); ungetc( onechar, fp ); if( onechar == '>' || onechar == EOF ) { // fprintf( stderr, "Next\n" ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = NULL; break; } fgets( oneline, 999, fp ); // fprintf( stderr, "oneline=%s\n", oneline ); // sscanf( oneline, "%d %d %f", &posi, &posj, &prob ); // fprintf( stderr, "%d %d -> %f\n", posi, posj, prob ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = calloc( 100, sizeof( char ) ); strcpy( (*bpp)[bppsize], oneline ); bppsize++; } } static void preparebpp( int nseq, char ***bpp ) { FILE *fp; int i; fp = fopen( "hat4", "r" ); if( !fp ) { fprintf( stderr, "Cannot open hat4\n" ); exit( 1 ); } for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; #if 0 case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #else case 'l': if( atof( *++argv ) < 0.00001 ) store_localhom = 0; --argc; goto nextoption; #endif case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'p': laraparams = *++argv; fprintf( stderr, "laraparams = %s\n", laraparams ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'w': lastm = myatoi( *++argv ); fprintf( stderr, "lastm = %d\n", lastm ); --argc; goto nextoption; case 'e': laste = myatoi( *++argv ); fprintf( stderr, "laste = %d\n", laste ); --argc; goto nextoption; case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame. break; case 'c': stdout_dist = 1; break; case 'n': stdout_align = 1; break; case 'x': store_localhom = 0; store_dist = 0; break; #if 1 case 'a': fmodel = 1; break; #endif #if 0 case 'r': fmodel = -1; break; #endif case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; #endif #if 0 case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; case 'a': alg = 'a'; break; case 'S': alg = 'S'; break; #endif case 'Q': lastonce = 1; break; case 'S': lastsubopt = 1; break; case 't': alg = 't'; store_localhom = 0; break; case 'L': alg = 'L'; break; case 'Y': alg = 'Y'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> L; break; case 's': alg = 's'; break; case 'G': alg = 'G'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'r': alg = 'r'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> R, last break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'o': // foldalignopt = *++argv; strcat( foldalignopt, " " ); strcat( foldalignopt, *++argv ); fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); --argc; goto nextoption; #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; #endif default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } #if enablemultithread static void *athread( void *arg ) // alg='R', alg='r' -> tsukawarenai. { thread_arg_t *targ = (thread_arg_t *)arg; int i, ilim, j, jst; int off1, off2, dum1, dum2, thereisx; int intdum; double bunbo; float pscore = 0.0; // by D.Mathog double *effarr1; double *effarr2; char **mseq1, **mseq2, **distseq1, **distseq2, **dumseq1, **dumseq2; char **aseq; // thread_arg int thread_no = targ->thread_no; int njob = targ->njob; Jobtable *jobpospt = targ->jobpospt; char **name = targ->name; char **seq = targ->seq; char **dseq = targ->dseq; int *thereisxineachseq = targ->thereisxineachseq; LocalHom **localhomtable = targ->localhomtable; double **distancemtx = targ->distancemtx; double *selfscore = targ->selfscore; char ***bpp = targ->bpp; Lastresx **lastresx = targ->lastresx; int alloclen = targ->alloclen; // fprintf( stderr, "thread %d start!\n", thread_no ); effarr1 = AllocateDoubleVec( 1 ); effarr2 = AllocateDoubleVec( 1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); if( alg == 'N' ) { dumseq1 = AllocateCharMtx( 1, alloclen+10 ); dumseq2 = AllocateCharMtx( 1, alloclen+10 ); } distseq1 = AllocateCharMtx( 1, 0 ); distseq2 = AllocateCharMtx( 1, 0 ); aseq = AllocateCharMtx( 2, alloclen+10 ); if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd; else ilim = njob - 1; while( 1 ) { pthread_mutex_lock( targ->mutex_counter ); j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; if( alg == 'Y' || alg == 'r' ) jst = njob - nadd; else jst = i + 1; j = jst; if( i == ilim ) { // fprintf( stderr, "thread %d end!\n", thread_no ); pthread_mutex_unlock( targ->mutex_counter ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); G__align11( NULL, NULL, 0, 0, 0 ); // 20130603 G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11( NULL, NULL, 0, NULL, NULL ); L__align11_noalign( NULL, NULL ); genL__align11( NULL, NULL, 0, NULL, NULL ); free( effarr1 ); free( effarr2 ); free( mseq1 ); free( mseq2 ); if( alg == 'N' ) { FreeCharMtx( dumseq1 ); FreeCharMtx( dumseq2 ); } free( distseq1 ); free( distseq2 ); FreeCharMtx( aseq ); return( NULL ); } } jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex_counter ); if( j == i+1 || j % 100 == 0 ) { fprintf( stderr, "% 5d / %d (by thread %3d) \r", i, njob-nadd, thread_no ); // fprintf( stderr, "% 5d - %5d / %d (thread %d)\n", i, j, njob, thread_no ); } if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 ) { if( store_dist ) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = 3.0; else distancemtx[i][j] = 3.0; } if( stdout_dist) { pthread_mutex_lock( targ->mutex_stdout ); fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 3.0 ); pthread_mutex_unlock( targ->mutex_stdout ); } continue; } strcpy( aseq[0], seq[i] ); strcpy( aseq[1], seq[j] ); // clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 ); // clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 ); // fprintf( stderr, "Skipping conjuction..\n" ); effarr1[0] = 1.0; effarr2[0] = 1.0; mseq1[0] = aseq[0]; mseq2[0] = aseq[1]; thereisx = thereisxineachseq[i] + thereisxineachseq[j]; // strcpy( distseq1[0], dseq[i] ); // nen no tame // strcpy( distseq2[0], dseq[j] ); // nen no tame distseq1[0] = dseq[i]; distseq2[0] = dseq[j]; // fprintf( stderr, "mseq1 = %s\n", mseq1[0] ); // fprintf( stderr, "mseq2 = %s\n", mseq2[0] ); #if 0 fprintf( stderr, "group1 = %.66s", indication1 ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); fprintf( stderr, "\n" ); #endif // for( l=0; l 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) x-ins-i de seido teika if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) { if( !store_localhom ) ; else if( alg == 'R' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j ); else if( alg == 'r' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd) );// ????? else if( alg == 'H' ) putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); else if( alg == 'Y' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ) ); else if( alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); } if( (bunbo=MIN( selfscore[i], selfscore[j] )) == 0.0 ) pscore = 2.0; else if( bunbo < pscore ) // mondai ari pscore = 0.0; else pscore = ( 1.0 - pscore / bunbo ) * 2.0; } else { pscore = 2.0; } #if 1 // mutex if( stdout_align ) { pthread_mutex_lock( targ->mutex_stdout ); if( alg != 't' ) { fprintf( stdout, "sequence %d - sequence %d, pairwise distance = %10.5f\n", i+1, j+1, pscore ); fprintf( stdout, ">%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } pthread_mutex_unlock( targ->mutex_stdout ); } if( stdout_dist ) { pthread_mutex_lock( targ->mutex_stdout ); if( j == i+1 ) fprintf( stdout, "%d %d d=%.3f\n", i+1, i+1, 0.0 ); fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore ); pthread_mutex_unlock( targ->mutex_stdout ); } #endif // mutex if( store_dist ) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore; else distancemtx[i][j] = pscore; } } } #endif static void pairalign( char **name, int *nlen, char **seq, char **aseq, char **dseq, int *thereisxineachseq, char **mseq1, char **mseq2, int alloclen, Lastresx **lastresx ) { int i, j, ilim, jst, jj; int off1, off2, dum1, dum2, thereisx; float pscore = 0.0; // by D.Mathog FILE *hat2p, *hat3p; double **distancemtx; double *selfscore; double *effarr1; double *effarr2; char *pt; char *hat2file = "hat2"; LocalHom **localhomtable = NULL, *tmpptr; int intdum; double bunbo; char ***bpp = NULL; // mxscarna no toki dake char **distseq1, **distseq2; char **dumseq1, **dumseq2; if( store_localhom ) { if( alg == 'Y' || alg == 'r' ) { ilim = njob - nadd; jst = nadd; } else { ilim = njob; jst = njob; } localhomtable = (LocalHom **)calloc( ilim, sizeof( LocalHom *) ); for( i=0; i 0 ) // alg=='r' || alg=='R' -> nthread:=0 (sukoshi ue) { Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex_counter; pthread_mutex_t mutex_stdout; thread_arg_t *targ; if( alg == 'Y' || alg == 'r' ) jobpos.j = njob - nadd - 1; else jobpos.j = 0; jobpos.i = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); pthread_mutex_init( &mutex_stdout, NULL ); for( i=0; i 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) // x-ins-i de seido teika if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) { if( !store_localhom ) ; else if( alg == 'R' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j ); else if( alg == 'r' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd) );// ????? else if( alg == 'H' ) putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); else if( alg == 'Y' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ) ); else if( alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); } if( (bunbo=MIN( selfscore[i], selfscore[j] )) == 0.0 ) pscore = 2.0; else if( bunbo < pscore ) // mondai ari pscore = 0.0; else pscore = ( 1.0 - pscore / bunbo ) * 2.0; } else { pscore = 2.0; } if( stdout_align ) { if( alg != 't' ) { fprintf( stdout, "sequence %d - sequence %d, pairwise distance = %10.5f\n", i+1, j+1, pscore ); fprintf( stdout, ">%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } } if( stdout_dist ) fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore ); if( store_dist) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore; else distancemtx[i][j] = pscore; } } } } if( store_dist ) { hat2p = fopen( hat2file, "w" ); if( !hat2p ) ErrorExit( "Cannot open hat2." ); if( alg == 'Y' || alg == 'r' ) WriteHat2_part_pointer( hat2p, njob, nadd, name, distancemtx ); else WriteHat2_pointer( hat2p, njob, name, distancemtx ); fclose( hat2p ); } hat3p = fopen( "hat3", "w" ); if( !hat3p ) ErrorExit( "Cannot open hat3." ); if( store_localhom ) { fprintf( stderr, "\n\n##### writing hat3\n" ); if( alg == 'Y' || alg == 'r' ) ilim = njob-nadd; else ilim = njob-1; for( i=0; inext ) { // fprintf( stderr, "j=%d, jj=%d\n", j, jj ); if( tmpptr->opt == -1.0 ) continue; // tmptmptmptmptmp // if( alg == 'B' || alg == 'T' ) // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); // else fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2+1, tmpptr->end2+1 ); // zettai dame!!!! } } } #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif if( alg == 'Y' || alg == 'r' ) FreeLocalHomTable_part( localhomtable, (njob-nadd), nadd ); else FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } fclose( hat3p ); if( alg == 's' ) { char **ptpt; for( i=0; i M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } if( ( alg == 'r' || alg == 'R' ) && dorp == 'p' ) { fprintf( stderr, "Not yet supported\n" ); exit( 1 ); } alloclen = nlenmax*2; seq = AllocateCharMtx( njob, alloclen+10 ); aseq = AllocateCharMtx( 2, alloclen+10 ); bseq = AllocateCharMtx( njob, alloclen+10 ); dseq = AllocateCharMtx( njob, alloclen+10 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); name = AllocateCharMtx( njob, B ); nlen = AllocateIntVec( njob ); thereisxineachseq = AllocateIntVec( njob ); if( alg == 'R' ) { lastresx = calloc( njob+1, sizeof( Lastresx * ) ); for( i=0; i 1 ) { fprintf( stderr, "\nThe order of distances is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself, using sort -n -k 2 | sort -n -k 1 -s\n" ); } if( stdout_align && nthread > 1 ) { fprintf( stderr, "\nThe order of pairwise alignments is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself.\n" ); } #if 1 if( lastresx ) { for( i=0; lastresx[i]; i++ ) { for( j=0; lastresx[i][j].naln!=-1; j++ ) { for( k=0; k value ) value = cand; return( value ); } void calcNaiseki( Fukusosuu *value, Fukusosuu *x, Fukusosuu *y ) { value->R = x->R * y->R + x->I * y->I; value->I = -x->R * y->I + x->I * y->R; } Fukusosuu *AllocateFukusosuuVec( int l1 ) { Fukusosuu *value; value = (Fukusosuu *)calloc( l1, sizeof( Fukusosuu ) ); if( !value ) { fprintf( stderr, "Cannot allocate %d FukusosuuVec\n", l1 ); return( NULL ); } return( value ); } Fukusosuu **AllocateFukusosuuMtx( int l1, int l2 ) { Fukusosuu **value; int j; // fprintf( stderr, "allocating %d x %d FukusosuuMtx\n", l1, l2 ); value = (Fukusosuu **)calloc( l1+1, sizeof( Fukusosuu * ) ); if( !value ) { fprintf( stderr, "Cannot allocate %d x %d FukusosuuVecMtx\n", l1, l2 ); exit( 1 ); } for( j=0; j max ) { ikouho = i; max = tmp; } } #if 0 if( max < 0.15 ) { break; } #endif #if 0 fprintf( stderr, "Kouho No.%d, pos=%d, score=%f, lag=%d\n", j, ikouho, soukan[ikouho], ikouho-nlen4 ); #endif soukan[ikouho] = -9999.9; kouho[j] = ( ikouho - nlen4 ); } return( j ); } void zurasu2( int lag, int clus1, int clus2, char **seq1, char **seq2, char **aseq1, char **aseq2 ) { int i; #if 0 fprintf( stderr, "### lag = %d\n", lag ); #endif if( lag > 0 ) { for( i=0; i 0 ) { for( i=0; i=0; j-- ) { if( prf1[j] ) { hat1[pre1] = j; pre1 = j; } if( prf2[j] ) { hat2[pre2] = j; pre2 = j; } } hat1[pre1] = -1; hat2[pre2] = -1; /* make site score */ stra[i] = 0.0; for( k=hat1[nalphabets]; k!=-1; k=hat1[k] ) for( j=hat2[nalphabets]; j!=-1; j=hat2[j] ) // stra[i] += n_dis[k][j] * prf1[k] * prf2[j]; stra[i] += n_disFFT[k][j] * prf1[k] * prf2[j]; stra[i] /= totaleff; } (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; for( j=0; j threshold ) { #if 0 seg->start = i; seg->end = i; seg->center = ( seg->start + seg->end + fftWinSize ) / 2 ; seg->score = score; status = 0; value++; #else if( !status ) { status = 1; starttmp = i; length = 0; cumscore = 0.0; } length++; cumscore += score; #endif } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { if( length > fftWinSize ) { seg->start = starttmp; seg->end = i; seg->center = ( seg->start + seg->end + fftWinSize ) / 2 ; seg->score = cumscore; #if 0 fprintf( stderr, "%d-%d length = %d, score = %f, value = %d\n", seg->start, seg->end, length, cumscore, value ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } value++; seg++; } length = 0; cumscore = 0.0; status = 0; starttmp = i; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status && length > fftWinSize ) { seg->end = i; seg->start = starttmp; seg->center = ( starttmp + i + fftWinSize ) / 2 ; seg->score = cumscore; #if 0 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } #if TMPTMPTMP exit( 0 ); #endif // fprintf( stderr, "returning %d\n", value ); return( value ); } static int permit( Segment *seg1, Segment *seg2 ) { return( 0 ); if( seg1->end >= seg2->start ) return( 0 ); if( seg1->pair->end >= seg2->pair->start ) return( 0 ); else return( 1 ); } void blockAlign2( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ) { int i, j, k, shift, cur1, cur2, count, klim; static TLS int crossscoresize = 0; static TLS int *result1 = NULL; static TLS int *result2 = NULL; static TLS int *ocut1 = NULL; static TLS int *ocut2 = NULL; double maximum; static TLS double **crossscore = NULL; static TLS int **track = NULL; static TLS double maxj, maxi; static TLS int pointj, pointi; if( cut1 == NULL) { if( result1 ) { if( result1 ) free( result1 ); result1 = NULL; if( result2 ) free( result2 ); result2 = NULL; if( ocut1 ) free( ocut1 ); ocut1 = NULL; if( ocut2 ) free( ocut2 ); ocut2 = NULL; if( track ) FreeIntMtx( track ); track = NULL; if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = NULL; } return; } if( result1 == NULL ) { result1 = AllocateIntVec( MAXSEG ); result2 = AllocateIntVec( MAXSEG ); ocut1 = AllocateIntVec( MAXSEG ); ocut2 = AllocateIntVec( MAXSEG ); } if( crossscoresize < *ncut+2 ) { crossscoresize = *ncut+2; if( fftkeika ) fprintf( stderr, "allocating crossscore and track, size = %d\n", crossscoresize ); if( track ) FreeIntMtx( track ); if( crossscore ) FreeDoubleMtx( crossscore ); track = AllocateIntMtx( crossscoresize, crossscoresize ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } #if 0 for( i=0; i<*ncut-2; i++ ) fprintf( stderr, "%d.start = %d, score = %f\n", i, seg1[i]->start, seg1[i]->score ); for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%#4.0f ", ocrossscore[i][j] ); fprintf( stderr, "\n" ); } #endif for( i=0; i<*ncut; i++ ) for( j=0; j<*ncut; j++ ) /* mudadanaa */ crossscore[i][j] = ocrossscore[i][j]; for( i=0; i<*ncut; i++ ) { ocut1[i] = cut1[i]; ocut2[i] = cut2[i]; } for( i=1; i<*ncut; i++ ) { #if 0 fprintf( stderr, "### i=%d/%d\n", i,*ncut ); #endif for( j=1; j<*ncut; j++ ) { pointi = 0; maxi = 0.0; klim = j-2; for( k=0; k maxj ) { pointi = k; maxi = crossscore[i-1][k]; } } pointj = 0; maxj = 0.0; klim = i-2; for( k=0; k maxj ) { pointj = k; maxj = crossscore[k][j-1]; } } maxi += penalty; maxj += penalty; maximum = crossscore[i-1][j-1]; track[i][j] = 0; if( maximum < maxi ) { maximum = maxi ; track[i][j] = j - pointi; } if( maximum < maxj ) { maximum = maxj ; track[i][j] = pointj - i; } crossscore[i][j] += maximum; } } #if 0 for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%3d ", track[i][j] ); fprintf( stderr, "\n" ); } #endif result1[MAXSEG-1] = *ncut-1; result2[MAXSEG-1] = *ncut-1; for( i=MAXSEG-1; i>=1; i-- ) { cur1 = result1[i]; cur2 = result2[i]; if( cur1 == 0 || cur2 == 0 ) break; shift = track[cur1][cur2]; if( shift == 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - 1; continue; } else if( shift > 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - shift; } else if( shift < 0 ) { result1[i-1] = cur1 + shift; result2[i-1] = cur2 - 1; } } count = 0; for( j=i; j ocrossscore[result1[j-1]][result2[j-1]] ) count--; cut1[count] = ocut1[result1[j]]; cut2[count] = ocut2[result2[j]]; count++; } *ncut = count; #if 0 for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); #endif } void blockAlign3( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ) // memory complexity = O(n^3), time complexity = O(n^2) { int i, j, shift, cur1, cur2, count; static TLS int crossscoresize = 0; static TLS int jumpposi, *jumppos; static TLS double jumpscorei, *jumpscore; static TLS int *result1 = NULL; static TLS int *result2 = NULL; static TLS int *ocut1 = NULL; static TLS int *ocut2 = NULL; double maximum; static TLS double **crossscore = NULL; static TLS int **track = NULL; if( result1 == NULL ) { result1 = AllocateIntVec( MAXSEG ); result2 = AllocateIntVec( MAXSEG ); ocut1 = AllocateIntVec( MAXSEG ); ocut2 = AllocateIntVec( MAXSEG ); } if( crossscoresize < *ncut+2 ) { crossscoresize = *ncut+2; if( fftkeika ) fprintf( stderr, "allocating crossscore and track, size = %d\n", crossscoresize ); if( track ) FreeIntMtx( track ); if( crossscore ) FreeDoubleMtx( crossscore ); if( jumppos ) FreeIntVec( jumppos ); if( jumpscore ) FreeDoubleVec( jumpscore ); track = AllocateIntMtx( crossscoresize, crossscoresize ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); jumppos = AllocateIntVec( crossscoresize ); jumpscore = AllocateDoubleVec( crossscoresize ); } #if 0 for( i=0; i<*ncut-2; i++ ) fprintf( stderr, "%d.start = %d, score = %f\n", i, seg1[i]->start, seg1[i]->score ); for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%#4.0f ", ocrossscore[i][j] ); fprintf( stderr, "\n" ); } #endif for( i=0; i<*ncut; i++ ) for( j=0; j<*ncut; j++ ) /* mudadanaa */ crossscore[i][j] = ocrossscore[i][j]; for( i=0; i<*ncut; i++ ) { ocut1[i] = cut1[i]; ocut2[i] = cut2[i]; } for( j=0; j<*ncut; j++ ) { jumpscore[j] = -999.999; jumppos[j] = -1; } for( i=1; i<*ncut; i++ ) { jumpscorei = -999.999; jumpposi = -1; for( j=1; j<*ncut; j++ ) { #if 1 fprintf( stderr, "in blockalign3, ### i=%d, j=%d\n", i, j ); #endif #if 0 for( k=0; k maxj ) { pointi = k; maxi = crossscore[i-1][k]; } } pointj = 0; maxj = 0.0; for( k=0; k maxj ) { pointj = k; maxj = crossscore[k][j-1]; } } maxi += penalty; maxj += penalty; #endif maximum = crossscore[i-1][j-1]; track[i][j] = 0; if( maximum < jumpscorei && permit( seg1[jumpposi], seg1[i] ) ) { maximum = jumpscorei; track[i][j] = j - jumpposi; } if( maximum < jumpscore[j] && permit( seg2[jumppos[j]], seg2[j] ) ) { maximum = jumpscore[j]; track[i][j] = jumpscore[j] - i; } crossscore[i][j] += maximum; if( jumpscorei < crossscore[i-1][j] ) { jumpscorei = crossscore[i-1][j]; jumpposi = j; } if( jumpscore[j] < crossscore[i][j-1] ) { jumpscore[j] = crossscore[i][j-1]; jumppos[j] = i; } } } #if 0 for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%3d ", track[i][j] ); fprintf( stderr, "\n" ); } #endif result1[MAXSEG-1] = *ncut-1; result2[MAXSEG-1] = *ncut-1; for( i=MAXSEG-1; i>=1; i-- ) { cur1 = result1[i]; cur2 = result2[i]; if( cur1 == 0 || cur2 == 0 ) break; shift = track[cur1][cur2]; if( shift == 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - 1; continue; } else if( shift > 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - shift; } else if( shift < 0 ) { result1[i-1] = cur1 + shift; result2[i-1] = cur2 - 1; } } count = 0; for( j=i; j ocrossscore[result1[j-1]][result2[j-1]] ) count--; cut1[count] = ocut1[result1[j]]; cut2[count] = ocut2[result2[j]]; count++; } *ncut = count; #if 0 for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); #endif } mafft-7.123-without-extensions/core/mafft.tmpl0000640000076500007650000022005312227117263020455 0ustar katohkatoh#! /bin/sh er=0; myself=`dirname "$0"`/`basename "$0"`; export myself version="v7.123b (2013/10/15)"; export version LANG=C; export LANG os=`uname` progname=`basename "$0"` if [ `echo $os | grep -i cygwin` ]; then os="cygwin" elif [ `echo $os | grep -i mingw` ]; then os="mingw" elif [ `echo $os | grep -i darwin` ]; then os="darwin" elif [ `echo $os | grep -i sunos` ]; then os="sunos" elif [ `echo $os | grep -i linux` ]; then os="linux" else os="unix" fi export os if [ "$MAFFT_BINARIES" ]; then prefix="$MAFFT_BINARIES" else prefix=_LIBDIR fi export prefix if [ $# -gt 0 ]; then if [ "$1" = "--man" ]; then man "$prefix/mafft.1" exit 0; fi fi if [ -x "$prefix/version" ]; then versionbin=`"$prefix/version" | awk '{print $1}'` # for cygwin else versionbin="0.000" fi if ! expr "$version" : v"$versionbin" > /dev/null ; then echo "" 1>&2 echo "v$versionbin != $version" 1>&2 echo "" 1>&2 echo "There is a problem in the configuration of your shell." 1>&2 echo "Check the MAFFT_BINARIES environmental variable by" 1>&2 echo "$ echo \$MAFFT_BINARIES" 1>&2 echo "" 1>&2 echo "This variable must be *unset*, unless you have installed MAFFT" 1>&2 echo "with a special configuration. To unset this variable, type" 1>&2 echo "$ unset MAFFT_BINARIES" 1>&2 echo "or" 1>&2 echo "% unsetenv MAFFT_BINARIES" 1>&2 echo "Then retry" 1>&2 echo "$ mafft input > output" 1>&2 echo "" 1>&2 echo "To keep this change permanently, edit setting files" 1>&2 echo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2 echo "to delete the MAFFT_BINARIES line." 1>&2 echo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2 echo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2 echo "" 1>&2 echo "Please send a problem report to kazutaka.katoh@aist.go.jp," 1>&2 echo "if this problem remains." 1>&2 echo "" 1>&2 exit 1 er=1 fi defaultiterate=0 defaultcycle=2 defaultgop="1.53" #defaultaof="0.123" defaultaof="0.000" defaultlaof="0.100" defaultlgop="-2.00" defaultfft=1 defaultrough=0 defaultdistance="ktuples" #defaultdistance="local" defaultweighti="2.7" defaultweightr="0.0" defaultweightm="1.0" defaultdafs=0 defaultmccaskill=0 defaultcontrafold=0 defaultalgopt=" " defaultalgoptit=" " defaultsbstmodel=" -b 62 " defaultfmodel=" " defaultkappa=" " if [ $progname = "xinsi" -o $progname = "mafft-xinsi" ]; then defaultfft=1 defaultcycle=1 defaultiterate=1000 defaultdistance="scarna" defaultweighti="3.2" defaultweightr="8.0" defaultweightm="2.0" defaultmccaskill=1 defaultcontrafold=0 defaultdafs=0 defaultalgopt=" -A " defaultalgoptit=" -AB " ## chui defaultaof="0.0" defaultsbstmodel=" -b 62 " defaultkappa=" " defaultfmodel=" " # 2013/06/18 elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then defaultfft=1 defaultcycle=1 defaultiterate=1000 defaultdistance="global" defaultweighti="3.2" defaultweightr="8.0" defaultweightm="2.0" defaultmccaskill=1 defaultcontrafold=0 defaultdafs=0 defaultalgopt=" -A " defaultalgoptit=" -AB " ## chui defaultaof="0.0" defaultsbstmodel=" -b 62 " defaultkappa=" " defaultfmodel=" " # 2013/06/18 elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then defaultfft=0 defaultcycle=1 defaultiterate=1000 defaultdistance="local" elif [ $progname = "ginsi" -o $progname = "mafft-ginsi" ]; then defaultfft=1 defaultcycle=1 defaultiterate=1000 defaultdistance="global" elif [ $progname = "einsi" -o $progname = "mafft-einsi" ]; then defaultfft=0 defaultcycle=1 defaultiterate=1000 defaultdistance="localgenaf" elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then defaultfft=1 defaultcycle=2 defaultdistance="ktuples" elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then defaultfft=1 defaultcycle=2 defaultiterate=2 defaultdistance="ktuples" elif [ $progname = "nwns" -o $progname = "mafft-nwns" ]; then defaultfft=0 defaultcycle=2 defaultdistance="ktuples" elif [ $progname = "nwnsi" -o $progname = "mafft-nwnsi" ]; then defaultfft=0 defaultcycle=2 defaultiterate=2 defaultdistance="ktuples" fi outputfile="" namelength=-1 anysymbol=0 parallelizationstrategy="BAATARI2" kappa=$defaultkappa sbstmodel=$defaultsbstmodel fmodel=$defaultfmodel gop=$defaultgop aof=$defaultaof cycle=$defaultcycle iterate=$defaultiterate fft=$defaultfft rough=$defaultrough distance=$defaultdistance forcefft=0 memopt=" " weightopt=" " GGOP="-6.00" LGOP="-6.00" LEXP="-0.000" GEXP="-0.000" lgop=$defaultlgop lexp="-0.100" laof=$defaultlaof pggop="-2.00" pgexp="-0.10" pgaof="0.10" rgop="-1.530" rgep="-0.000" seqtype=" " weighti=$defaultweighti weightr=$defaultweightr weightm=$defaultweightm rnaalifold=0 dafs=$defaultdafs mccaskill=$defaultmccaskill contrafold=$defaultcontrafold progressfile="/dev/stderr" debug=0 sw=0 algopt=$defaultalgopt algoptit=$defaultalgoptit #algspecified=0 pairspecified=0 scorecalcopt=" " coreout=0 corethr="0.5" corewin="100" coreext=" " outputformat="pir" f2clext="-N" outorder="input" seed="x" seedtable="x" auto=0 groupsize=-1 partsize=50 partdist="ktuples" partorderopt=" -x " treeout=0 distout=0 treein=0 topin=0 treeinopt=" " seedfiles="/dev/null" seedtablefile="/dev/null" pdblist="/dev/null" ownlist="/dev/null" strdir="$PWD" aamatrix="/dev/null" treeinfile="/dev/null" rnascoremtx=" " laraparams="/dev/null" foldalignopt=" " treealg=" -X " scoreoutarg=" " numthreads=0 numthreadsit=-1 randomseed=0 addfile="/dev/null" addarg0=" " fragment=0 legacygapopt=" " mergetable="/dev/null" mergearg=" " seedoffset=0 outnum=" " last_e=5000 last_m=3 last_subopt=" " last_once=" " adjustdirection=0 tuplesize=6 termgapopt=" -O " similarityoffset="0.0" if [ $# -gt 0 ]; then while [ $# -gt 1 ]; do if [ "$1" = "--auto" ]; then auto=1 elif [ "$1" = "--anysymbol" ]; then anysymbol=1 elif [ "$1" = "--preservecase" ]; then anysymbol=1 elif [ "$1" = "--clustalout" ]; then outputformat="clustal" elif [ "$1" = "--phylipout" ]; then outputformat="phylip" elif [ "$1" = "--reorder" ]; then outorder="aligned" partorderopt=" " elif [ "$1" = "--inputorder" ]; then outorder="input" partorderopt=" -x " elif [ "$1" = "--unweight" ]; then weightopt=" -u " elif [ "$1" = "--termgappenalty" ]; then termgapopt=" " elif [ "$1" = "--alga" ]; then algopt=" " algoptit=" " # algspecified=1 elif [ "$1" = "--algq" ]; then algopt=" -Q " algoptit=" " # algspecified=1 elif [ "$1" = "--namelength" ]; then shift namelength=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify the length of name in clustal format output!" 1>&2 exit fi elif [ "$1" = "--groupsize" ]; then shift groupsize=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify groupsize!" 1>&2 exit fi elif [ "$1" = "--partsize" ]; then shift partsize=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify partsize!" 1>&2 exit fi elif [ "$1" = "--parttree" ]; then distance="parttree" partdist="ktuples" elif [ "$1" = "--dpparttree" ]; then distance="parttree" partdist="localalign" elif [ "$1" = "--fastaparttree" ]; then distance="parttree" partdist="fasta" elif [ "$1" = "--treeout" ]; then treeout=1 elif [ "$1" = "--distout" ]; then distout=1 elif [ "$1" = "--fastswpair" ]; then distance="fasta" pairspecified=1 sw=1 elif [ "$1" = "--fastapair" ]; then distance="fasta" pairspecified=1 sw=0 elif [ "$1" = "--averagelinkage" ]; then treealg=" -E " elif [ "$1" = "--minimumlinkage" ]; then treealg=" -q " elif [ "$1" = "--noscore" ]; then scorecalcopt=" -Z " elif [ "$1" = "--6mermultipair" ]; then distance="ktuplesmulti" tuplesize=6 pairspecified=1 elif [ "$1" = "--10mermultipair" ]; then distance="ktuplesmulti" tuplesize=10 pairspecified=1 elif [ "$1" = "--6merpair" ]; then distance="ktuples" tuplesize=6 pairspecified=1 elif [ "$1" = "--10merpair" ]; then distance="ktuples" tuplesize=10 pairspecified=1 elif [ "$1" = "--blastpair" ]; then distance="blast" pairspecified=1 elif [ "$1" = "--lastmultipair" ]; then distance="lastmulti" pairspecified=1 elif [ "$1" = "--globalpair" ]; then distance="global" pairspecified=1 elif [ "$1" = "--localpair" ]; then distance="local" pairspecified=1 elif [ "$1" = "--lastpair" ]; then distance="last" pairspecified=1 elif [ "$1" = "--multipair" ]; then distance="multi" pairspecified=1 elif [ "$1" = "--hybridpair" ]; then distance="hybrid" pairspecified=1 elif [ "$1" = "--scarnapair" ]; then distance="scarna" pairspecified=1 elif [ "$1" = "--dafspair" ]; then distance="dafs" pairspecified=1 elif [ "$1" = "--larapair" ]; then distance="lara" pairspecified=1 elif [ "$1" = "--slarapair" ]; then distance="slara" pairspecified=1 elif [ "$1" = "--foldalignpair" ]; then distance="foldalignlocal" pairspecified=1 elif [ "$1" = "--foldalignlocalpair" ]; then distance="foldalignlocal" pairspecified=1 elif [ "$1" = "--foldalignglobalpair" ]; then distance="foldalignglobal" pairspecified=1 elif [ "$1" = "--globalgenafpair" ]; then distance="globalgenaf" pairspecified=1 elif [ "$1" = "--localgenafpair" ]; then distance="localgenaf" pairspecified=1 elif [ "$1" = "--genafpair" ]; then distance="localgenaf" pairspecified=1 elif [ "$1" = "--memsave" ]; then memopt=" -M -B " # -B (bunkatsunashi no riyu ga omoidasenai) elif [ "$1" = "--nomemsave" ]; then memopt=" -N " elif [ "$1" = "--nuc" ]; then seqtype=" -D " elif [ "$1" = "--amino" ]; then seqtype=" -P " elif [ "$1" = "--fft" ]; then fft=1 forcefft=1 elif [ "$1" = "--nofft" ]; then fft=0 elif [ "$1" = "--quiet" ]; then if [ $os = "mingw" ]; then progressfile="nul" else progressfile="/dev/null" fi elif [ "$1" = "--debug" ]; then debug=1 elif [ "$1" = "--coreext" ]; then coreext=" -c " elif [ "$1" = "--core" ]; then coreout=1 elif [ "$1" = "--adjustdirection" ]; then adjustdirection=1 elif [ "$1" = "--adjustdirectionaccurately" ]; then adjustdirection=2 elif [ "$1" = "--progress" ]; then shift progressfile="$1" if ! ( expr "$progressfile" : "/" > /dev/null || expr "$progressfile" : "[A-Za-z]\:" > /dev/null ) ; then echo "Specify a progress file name with the absolute path!" 1>&2 exit fi elif [ "$1" = "--out" ]; then shift outputfile="$1" elif [ "$1" = "--thread" ]; then shift if ! expr "$1" : "[0-9\-]" > /dev/null ; then echo "Specify the number of threads!" 1>&2 exit fi numthreads=`expr "$1" - 0` elif [ "$1" = "--threadit" ]; then shift if ! expr "$1" : "[0-9\-]" > /dev/null ; then echo "Specify the number of threads for the iterative step!" 1>&2 exit fi numthreadsit=`expr "$1" - 0` elif [ "$1" = "--last_subopt" ]; then last_subopt="-S" elif [ "$1" = "--last_once" ]; then last_once="-Q" elif [ "$1" = "--last_m" ]; then shift last_m=`expr "$1" - 0` elif [ "$1" = "--last_e" ]; then shift last_e=`expr "$1" - 0` elif [ "$1" = "--randomseed" ]; then shift randomseed=`expr "$1" - 0` elif [ "$1" = "--bestfirst" ]; then parallelizationstrategy="BESTFIRST" elif [ "$1" = "--adhoc0" ]; then parallelizationstrategy="BAATARI0" elif [ "$1" = "--adhoc1" ]; then parallelizationstrategy="BAATARI1" elif [ "$1" = "--adhoc2" ]; then parallelizationstrategy="BAATARI2" elif [ "$1" = "--simplehillclimbing" ]; then parallelizationstrategy="BAATARI2" elif [ "$1" = "--scoreout" ]; then scoreoutarg="-S -B" elif [ "$1" = "--outnum" ]; then scoreoutarg="-n" elif [ "$1" = "--legacygappenalty" ]; then legacygapopt="-L" elif [ "$1" = "--merge" ]; then shift mergetable="$1" if [ ! -e "$mergetable" ]; then echo "Cannot open $mergetable" 1>&2 exit fi elif [ "$1" = "--addprofile" ]; then shift addarg0="-I" addfile="$1" elif [ "$1" = "--add" ]; then shift addarg0="-K -I" addfile="$1" elif [ "$1" = "--addfragments" ]; then shift addarg0="-K -I" addfile="$1" fragment=1 elif [ "$1" = "--addfull" ]; then shift addarg0="-K -I" addfile="$1" fragment=-1 elif [ "$1" = "--maxiterate" ]; then shift iterate=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify the number of iterations!" 1>&2 exit fi elif [ "$1" = "--retree" ]; then shift cycle=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify the number of tree rebuilding!" 1>&2 exit fi elif [ "$1" = "--text" ]; then sbstmodel=" -b -2 -a " f2clext="-E" seqtype="-P" fft=0 elif [ "$1" = "--aamatrix" ]; then shift sbstmodel=" -b -1 " aamatrix="$1" if [ ! -e "$aamatrix" ]; then echo "Cannot open $aamatrix" 1>&2 exit fi elif [ "$1" = "--treein" ]; then shift treeinopt=" -U " treein=1 treeinfile="$1" if [ ! -e "$treeinfile" ]; then echo "Cannot open $treeinfile" 1>&2 exit fi elif [ "$1" = "--topin" ]; then shift treeinopt=" -V " treein=1 treeinfile="$1" echo "The --topin option has been disabled." 1>&2 echo "There was a bug in version < 6.530." 1>&2 echo "This bug has not yet been fixed." 1>&2 exit 1 elif [ "$1" = "--kappa" ]; then shift kappa=" -k $1 " if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify kappa value!" 1>&2 exit fi elif [ "$1" = "--fmodel" ]; then fmodel=" -a " elif [ "$1" = "--jtt" ]; then shift sbstmodel=" -j $1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify pam value!" 1>&2 exit fi elif [ "$1" = "--kimura" ]; then shift sbstmodel=" -j $1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify pam value!" 1>&2 exit fi elif [ "$1" = "--tm" ]; then shift sbstmodel=" -m $1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify pam value!" 1>&2 exit fi elif [ "$1" = "--bl" ]; then shift sbstmodel=" -b $1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "blosum $1?" 1>&2 exit fi elif [ "$1" = "--weighti" ]; then shift weighti="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify weighti value!" 1>&2 exit fi elif [ "$1" = "--weightr" ]; then shift weightr="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify weightr value!" 1>&2 exit fi elif [ "$1" = "--weightm" ]; then shift weightm="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify weightm value!" 1>&2 exit fi elif [ "$1" = "--rnaalifold" ]; then rnaalifold=1 elif [ "$1" = "--mccaskill" ]; then mccaskill=1 contrafold=0 dafs=0 elif [ "$1" = "--contrafold" ]; then mccaskill=0 contrafold=1 dafs=0 elif [ "$1" = "--dafs" ]; then mccaskill=0 contrafold=0 dafs=1 elif [ "$1" = "--ribosum" ]; then rnascoremtx=" -s " elif [ "$1" = "--op" ]; then shift gop="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify op!" 1>&2 exit fi elif [ "$1" = "--ep" ]; then shift # aof="$1" tmpval="$1" aof=`awk "BEGIN{ print -1.0 * $tmpval}"` if ! expr "$1" : "[0-9\-]" > /dev/null ; then echo "Specify ep!" 1>&2 exit fi elif [ "$1" = "--rop" ]; then shift rgop="$1" # Atode check elif [ "$1" = "--rep" ]; then shift rgep="$1" elif [ "$1" = "--lop" ]; then shift lgop="$1" elif [ "$1" = "--LOP" ]; then shift LGOP="$1" elif [ "$1" = "--lep" ]; then shift laof="$1" elif [ "$1" = "--lexp" ]; then shift lexp="$1" elif [ "$1" = "--LEXP" ]; then shift LEXP="$1" elif [ "$1" = "--GEXP" ]; then shift GEXP="$1" elif [ "$1" = "--GOP" ]; then shift GGOP="$1" elif [ "$1" = "--gop" ]; then shift pggop="$1" elif [ "$1" = "--gep" ]; then shift pgaof="$1" elif [ "$1" = "--gexp" ]; then shift pgexp="$1" elif [ "$1" = "--laraparams" ]; then shift laraparams="$1" elif [ "$1" = "--corethr" ]; then shift corethr="$1" elif [ "$1" = "--corewin" ]; then shift corewin="$1" elif [ "$1" = "--strdir" ]; then shift strdir="$1" elif [ "$1" = "--pdbidlist" ]; then shift pdblist="$1" elif [ "$1" = "--pdbfilelist" ]; then shift ownlist="$1" elif [ "$1" = "--seedtable" ]; then shift seedtable="y" seedtablefile="$1" elif [ "$1" = "--seed" ]; then shift seed="m" seedfiles="$seedfiles $1" elif [ $progname = "fftns" -o $progname = "nwns" ]; then if [ "$1" -gt 0 ]; then cycle=`expr "$1" - 0` fi elif [ "$1" = "--similaritylevel" ]; then shift similarityoffset="$1" else echo "Unknown option: $1" 1>&2 er=1; fi shift done; echo "" 1>"$progressfile" # TMPFILE=/tmp/$progname.$$ TMPFILE=`mktemp -dt $progname.XXXXXXXXXX` if [ $? -ne 0 ]; then echo "mktemp seems to be obsolete. Re-trying without -t" 1>&2 TMPFILE=`mktemp -d /tmp/$progname.XXXXXXXXXX` fi umask 077 # mkdir $TMPFILE || er=1 if [ $debug -eq 1 ]; then trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 else trap "rm -rf $TMPFILE" 0 fi if [ $# -eq 1 ]; then if [ -r "$1" -o "$1" = - ]; then if [ -r "$addfile" ]; then printf ''; else echo "$0": Cannot open "$addfile". 1>&2 exit 1; fi cat "$1" | tr "\r" "\n" > $TMPFILE/infile echo "" >> $TMPFILE/infile cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> $TMPFILE/infile cat "$addfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_addfile cat "$aamatrix" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_aamtx cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_subalignmentstable cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_guidetree cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_seedtablefile cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_lara.params cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/pdblist cat "$ownlist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/ownlist # echo $seedfiles infilename="$1" seedfilesintmp="/dev/null" seednseq="0" set $seedfiles > /dev/null while [ $# -gt 1 ]; do shift if [ -r "$1" ]; then cat "$1" | tr "\r" "\n" > $TMPFILE/seed$# else echo "$0": Cannot open "$1". 1>&2 exit 1; fi seednseq=$seednseq" "`grep -c '^[>|=]' $TMPFILE/seed$#` seedfilesintmp=$seedfilesintmp" "seed$# done # ls $TMPFILE # echo $seedfilesintmp # echo $seednseq else echo "$0": Cannot open "$1". 1>&2 er=1 # exit 1; fi else echo '$#'"=$#" 1>&2 er=1 fi if [ $numthreads -lt 0 ]; then if [ $os = "linux" ]; then nlogicalcore=`cat /proc/cpuinfo | grep "^processor" | uniq | wc -l` ncoresinacpu=`cat /proc/cpuinfo | grep 'cpu cores' | uniq | awk '{print $4}'` nphysicalcpu=`cat /proc/cpuinfo | grep 'physical id' | sort | uniq | wc -l` if [ $nlogicalcore -eq 0 ]; then echo "Cannot get the number of processors from /proc/cpuinfo" 1>>"$progressfile" exit 1 fi if [ ${#ncoresinacpu} -gt 0 -a $nphysicalcpu -gt 0 ]; then numthreads=`expr $ncoresinacpu '*' $nphysicalcpu` # if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading # numthreads=`expr $numthreads '+' 1` # fi else numthreads=$nlogicalcore fi elif [ $os = "darwin" ]; then numthreads=`sysctl -n hw.physicalcpu` if [ -z $numthreads ]; then echo "Cannot get the number of physical cores from sysctl" 1>>"$progressfile" exit 1 fi # nlogicalcore=`sysctl -n hw.logicalcpu` # if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading # numthreads=`expr $numthreads '+' 1` # fi elif [ $os = "mingw" -o $os = "cygwin" ]; then numthreads=0 else echo "Cannot count the number of physical cores." 1>>"$progressfile" exit 1 fi echo $os 1>>"$progressfile" echo $numthreads 1>>"$progressfile" fi if [ $numthreadsit -lt 0 ]; then if [ $numthreads -lt 7 ]; then numthreadsit=$numthreads else numthreadsit=6 fi fi if [ $numthreadsit -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then echo 'Impossible' 1>&2; exit 1; fi if [ "$addarg0" != " " ]; then iterate=0 # 2013/03/23 "$prefix/countlen" < $TMPFILE/_addfile > $TMPFILE/addsize 2>>"$progressfile" nadd=`awk '{print $1}' $TMPFILE/addsize` if [ $nadd -eq "0" ]; then echo Check $addfile 1>&2 exit 1; fi if [ $seed != "x" -o $seedtable != "x" ]; then echo 'Impossible' 1>&2; echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2 exit 1; fi else nadd="0" fi if [ $auto -eq 1 ]; then "$prefix/countlen" < $TMPFILE/infile > $TMPFILE/size 2>>"$progressfile" nseq=`awk '{print $1}' $TMPFILE/size` nlen=`awk '{print $3}' $TMPFILE/size` if [ $nlen -lt 2000 -a $nseq -lt 100 ]; then distance="local" iterate=1000 elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then distance="ktuples" iterate=2 cycle=2 elif [ $nseq -lt 10000 ]; then distance="ktuples" iterate=0 cycle=2 elif [ $nseq -lt 30000 ]; then distance="ktuples" iterate=0 cycle=1 elif [ $nlen -lt 10000 ]; then distance="parttree" partdist="localalign" algopt=" " algoptit=" " # algspecified=1 cycle=1 else distance="parttree" partdist="ktuples" algopt=" " algoptit=" " # algspecified=1 cycle=1 fi if [ $fragment -ne 0 ]; then norg=`expr $nseq '-' $nadd` npair=`expr $norg '*' $nadd` echo "nadd = " $nadd 1>>"$progressfile" echo "npair = " $npair 1>>"$progressfile" # nagasa check! # if [ $npair -gt 10000000 ]; then # ato de kentou distance="ktuples" echo "use ktuples! size=$tuplesize" 1>>"$progressfile" elif [ $npair -gt 3000000 ]; then # ato de kentou distance="multi" weighti="0.0" echo "use multipair, weighti=0.0!" 1>>"$progressfile" else distance="multi" echo "use multipair, weighti=$weighti!" 1>>"$progressfile" fi pairspecified=1 fi fi if [ `awk "BEGIN {print( $similarityoffset <= -1.0 || $similarityoffset >= 1.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "Similarity must be between -1.0 and +1.0" 1>>"$progressfile" exit; fi aof=`awk "BEGIN{print 0.0 + $similarityoffset + $aof}"` laof=`awk "BEGIN{print 0.0 + $similarityoffset + $laof}"` pgaof=`awk "BEGIN{print 0.0 + $similarityoffset + $pgaof}"` if [ $parallelizationstrategy = "BESTFIRST" -o $parallelizationstrategy = "BAATARI0" ]; then iteratelimit=254 else iteratelimit=16 fi if [ $iterate -gt $iteratelimit ]; then #?? iterate=$iteratelimit fi if [ $rnaalifold -eq 1 ]; then rnaopt=" -e $rgep -o $rgop -c $weightm -r $weightr -R $rnascoremtx " # rnaoptit=" -o $rgop -BT -c $weightm -r $weightr -R " rnaoptit=" -o $rgop -F -c $weightm -r $weightr -R " elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $contrafold -eq 1 ]; then rnaopt=" -o $rgop -c $weightm -r $weightr " # rnaoptit=" -e $rgep -o $rgop -BT -c $weightm -r $weightr $rnascoremtx " rnaoptit=" -e $rgep -o $rgop -F -c $weightm -r $weightr $rnascoremtx " else rnaopt=" " rnaoptit=" -F " fi # if [ $algspecified -eq 0 ]; then # if [ $distance = "parttree" ]; then # algopt=" -Q " # algoptit=" " # else # algopt=" " # algoptit=" " # fi # fi model="$sbstmodel $kappa $fmodel" if [ $er -eq 1 ]; then echo "------------------------------------------------------------------------------" 1>&2 echo " MAFFT" $version 1>&2 # echo "" 1>&2 # echo " Input format: fasta" 1>&2 # echo "" 1>&2 # echo " Usage: `basename $0` [options] inputfile > outputfile" 1>&2 echo " http://mafft.cbrc.jp/alignment/software/" 1>&2 echo " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)" 1>&2 # echo "------------------------------------------------------------------------------" 1>&2 # echo " % mafft in > out" 1>&2 echo "------------------------------------------------------------------------------" 1>&2 # echo "" 1>&2 echo "High speed:" 1>&2 echo " % mafft in > out" 1>&2 echo " % mafft --retree 1 in > out (fast)" 1>&2 echo "" 1>&2 echo "High accuracy (for <~200 sequences x <~2,000 aa/nt):" 1>&2 echo " % mafft --maxiterate 1000 --localpair in > out (% linsi in > out is also ok)" 1>&2 echo " % mafft --maxiterate 1000 --genafpair in > out (% einsi in > out)" 1>&2 echo " % mafft --maxiterate 1000 --globalpair in > out (% ginsi in > out)" 1>&2 echo "" 1>&2 echo "If unsure which option to use:" 1>&2 echo " % mafft --auto in > out" 1>&2 echo "" 1>&2 # echo "Other options:" 1>&2 echo "--op # : Gap opening penalty, default: 1.53" 1>&2 echo "--ep # : Offset (works like gap extension penalty), default: 0.0" 1>&2 echo "--maxiterate # : Maximum number of iterative refinement, default: 0" 1>&2 echo "--clustalout : Output: clustal format, default: fasta" 1>&2 echo "--reorder : Outorder: aligned, default: input order" 1>&2 echo "--quiet : Do not report progress" 1>&2 echo "--thread # : Number of threads (if unsure, --thread -1)" 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 --localpair in > out (L-INS-i)" 1>&2 # echo " most accurate in many cases, assumes only one alignable domain" 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 --genafpair in > out (E-INS-i)" 1>&2 # echo " works well if many unalignable residues exist between alignable domains" 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 --globalpair in > out (G-INS-i)" 1>&2 # echo " suitable for globally alignable sequences " 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 in > out (FFT-NS-i)" 1>&2 # echo " accurate and slow, iterative refinement method " 1>&2 # echo "" 1>&2 # echo "If the input sequences are long (~1,000,000nt)," 1>&2 # echo " % mafft --retree 1 --memsave --fft in > out (FFT-NS-1-memsave, new in v5.8)" 1>&2 # echo "" 1>&2 # echo "If many (~5,000) sequences are to be aligned," 1>&2 # echo "" 1>&2 # echo " % mafft --retree 1 [--memsave] --nofft in > out (NW-NS-1, new in v5.8)" 1>&2 # echo "" 1>&2 # echo " --localpair : All pairwise local alignment information is included" 1>&2 # echo " to the objective function, default: off" 1>&2 # echo " --globalpair : All pairwise global alignment information is included" 1>&2 # echo " to the objective function, default: off" 1>&2 # echo " --op # : Gap opening penalty, default: $defaultgop " 1>&2 # echo " --ep # : Offset (works like gap extension penalty), default: $defaultaof " 1>&2 # echo " --bl #, --jtt # : Scoring matrix, default: BLOSUM62" 1>&2 # echo " Alternatives are BLOSUM (--bl) 30, 45, 62, 80, " 1>&2 # echo " or JTT (--jtt) # PAM. " 1>&2 # echo " --nuc or --amino : Sequence type, default: auto" 1>&2 # echo " --retree # : The number of tree building in progressive method " 1>&2 # echo " (see the paper for detail), default: $defaultcycle " 1>&2 # echo " --maxiterate # : Maximum number of iterative refinement, default: $defaultiterate " 1>&2 # if [ $defaultfft -eq 1 ]; then # echo " --fft or --nofft: FFT is enabled or disabled, default: enabled" 1>&2 # else # echo " --fft or --nofft: FFT is enabled or disabled, default: disabled" 1>&2 # fi # echo " --memsave: Memory saving mode" 1>&2 # echo " (for long genomic sequences), default: off" 1>&2 # echo " --clustalout : Output: clustal format, default: fasta" 1>&2 # echo " --reorder : Outorder: aligned, default: input order" 1>&2 # echo " --quiet : Do not report progress" 1>&2 # echo "-----------------------------------------------------------------------------" 1>&2 exit 1; fi if [ $sw -eq 1 ]; then swopt=" -A " else swopt=" " fi if [ $distance = "fasta" -o $partdist = "fasta" ]; then if [ ! "$FASTA_4_MAFFT" ]; then FASTA_4_MAFFT=`which fasta34` fi if [ ! -x "$FASTA_4_MAFFT" ]; then echo "" 1>&2 echo "== Install FASTA ========================================================" 1>&2 echo "This option requires the fasta34 program (FASTA version x.xx or higher)" 1>&2 echo "installed in your PATH. If you have the fasta34 program but have renamed" 1>&2 echo "(like /usr/local/bin/myfasta), set the FASTA_4_MAFFT environment variable" 1>&2 echo "to point your fasta34 (like setenv FASTA_4_MAFFT /usr/local/bin/myfasta)." 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "last" -o $distance = "lastmulti" ]; then if [ ! -x "$prefix/lastal" -o ! -x "$prefix/lastdb" ]; then echo "" 1>&2 echo "== Install LAST ============================================================" 1>&2 echo "LAST (Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487) is required." 1>&2 echo "http://last.cbrc.jp/" 1>&2 echo "http://mafft.cbrc.jp/alignment/software/xxxxxxx.html " 1>&2 echo "============================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "lara" -o $distance = "slara" ]; then if [ ! -x "$prefix/mafft_lara" ]; then echo "" 1>&2 echo "== Install LaRA =========================================================" 1>&2 echo "This option requires LaRA (Bauer et al. http://www.planet-lisa.net/)." 1>&2 echo "The executable have to be renamed to 'mafft_lara' and installed into " 1>&2 echo "the $prefix directory. " 1>&2 echo "A configuration file of LaRA also have to be given" 1>&2 echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2 echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi if [ ! -s "$laraparams" ]; then echo "" 1>&2 echo "== Configure LaRA =======================================================" 1>&2 echo "A configuration file of LaRA have to be given" 1>&2 echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2 echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then if [ ! -x "$prefix/foldalign210" ]; then echo "" 1>&2 echo "== Install FOLDALIGN ====================================================" 1>&2 echo "This option requires FOLDALIGN (Havgaard et al. http://foldalign.ku.dk/)." 1>&2 echo "The executable have to be renamed to 'foldalign210' and installed into " 1>&2 echo "the $prefix directory. " 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "scarna" -o $mccaskill -eq 1 ]; then if [ ! -x "$prefix/mxscarnamod" ]; then echo "" 1>&2 echo "== Install MXSCARNA ======================================================" 1>&2 echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2 echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2 echo "which contains the modified version of MXSCARNA." 1>&2 echo "http://mafft.cbrc.jp/alignment/software/source.html " 1>&2 echo "==========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "dafs" -o $dafs -eq 1 ]; then if [ ! -x "$prefix/dafs" ]; then echo "" 1>&2 echo "== Install DAFS===========================================================" 1>&2 echo "DAFS (Sato et al. Journal 2012 issue:page) is required." 1>&2 echo "http://www.ncrna.org/ " 1>&2 echo "==========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $contrafold -eq 1 ]; then if [ ! -x "$prefix/contrafold" ]; then echo "" 1>&2 echo "== Install CONTRAfold ===================================================" 1>&2 echo "This option requires CONTRAfold" 1>&2 echo "(Do et al. http://contra.stanford.edu/contrafold/)." 1>&2 echo "The executable 'contrafold' have to be installed into " 1>&2 echo "the $prefix directory. " 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi #old # if [ $treeout -eq 1 ]; then # parttreeoutopt="-t" # if [ $cycle -eq 0 ]; then # treeoutopt="-t -T" # groupsize=1 # iterate=0 # if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # distance="distonly" # fi # else # treeoutopt="-t" # fi # else # parttreeoutopt=" " # if [ $cycle -eq 0 ]; then # treeoutopt="-t -T" # iterate=0 # if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # distance="distonly" # fi # else # treeoutopt=" " # fi # fi #new if [ $cycle -eq 0 ]; then treeoutopt="-t -T" iterate=0 # if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # 2012/04, localpair --> local alignment distance if [ $distance = "global" ]; then distance="distonly" fi if [ $treeout -eq 1 ]; then parttreeoutopt="-t" groupsize=1 else parttreeoutopt=" " fi if [ $distout -eq 1 ]; then distoutopt="-y -T" fi else if [ $treeout -eq 1 ]; then parttreeoutopt="-t" treeoutopt="-t" else parttreeoutopt=" " treeoutopt=" " fi if [ $distout -eq 1 ]; then distoutopt="-y" fi fi # formatcheck=`grep -c '^[[:blank:]]\+>' $TMPFILE/infile | head -1 ` if [ $formatcheck -gt 0 ]; then echo "The first character of a description line must be " 1>&2 echo "the greater-than (>) symbol, not a blank." 1>&2 echo "Please check the format around the following line(s):" 1>&2 grep -n '^[[:blank:]]\+>' $TMPFILE/infile 1>&2 exit 1 fi nseq=`grep -c '^[>|=]' $TMPFILE/infile | head -1 ` if [ $nseq -eq 2 ]; then cycle=1 fi if [ $cycle -gt 3 ]; then cycle=3 fi if [ $nseq -gt 6000 -a $iterate -gt 1 ]; then echo "Too many sequences to perform iterative refinement!" 1>&2 echo "Please use a progressive method." 1>&2 exit 1 fi if [ $distance = "lastmulti" -o $distance = "multi" ]; then if [ $fragment -eq 0 ]; then echo 'Specify --addfragments too' 1>&2 exit 1 fi fi if [ $fragment -ne 0 ]; then if [ $pairspecified -eq 0 ]; then distance="multi" fi if [ $distance != "multi" -a $distance != "hybrid" -a $distance != "lastmulti" -a $distance != "local" -a $distance != "last" -a $distance != "ktuples" -a $distance != "ktuplesmulti" ]; then echo 'Specify --multipair, --lastmultipair, --lastpair, --localpair, --6merpair, --6mermultipair or --hybridpair' 1>&2 exit 1 fi fi if [ "$memopt" = " -M -B " -a "$distance" != "ktuples" ]; then echo "Impossible" 1>&2 exit 1 fi if [ $distance = "parttree" ]; then if [ $seed != "x" -o $seedtable != "x" ]; then echo "Impossible" 1>&2 exit 1 fi if [ $iterate -gt 1 ]; then echo "Impossible" 1>&2 exit 1 fi if [ $outorder = "aligned" ]; then outorder="input" fi outorder="input" # partorder ga kiku if [ $partdist = "localalign" ]; then splitopt=" -U " # -U -l -> fast cycle=1 elif [ $partdist = "fasta" ]; then splitopt=" -S " cycle=1 else splitopt=" " fi fi if [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed = "x" -a $seedtable = "x" \) ]; then localparam=" " weighti="0.0" elif [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed != "x" -o $seedtable != "x" \) ]; then if [ $cycle -lt 2 ]; then cycle=2 # disttbfast ha seed hi-taiou fi if [ $iterate -lt 2 ]; then echo "############################################################################" 1>&2 echo "# Warning:" 1>&2 echo "# Progressive alignment method is incompatible with the --seed option." 1>&2 echo "# Automatically switched to the iterative refinement method." 1>&2 echo "# " 1>&2 echo "# Also consider using the '--add' option, which is compatible with" 1>&2 echo "# the progressive method and FASTER than the '--seed' option." 1>&2 echo "# Usage is:" 1>&2 echo "# % mafft --add newSequences existingAlignment > output" 1>&2 echo "############################################################################" 1>&2 iterate=2 fi localparam="-l "$weighti elif [ $distance = "parttree" ]; then localparam=" " weighti="0.0" if [ $groupsize -gt -1 ]; then cycle=1 fi else localparam=" -l "$weighti if [ $cycle -gt 1 ]; then # 09/01/08 cycle=1 fi fi if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then aof="0.000" fi # if [ $nseq -gt 5000 ]; then # fft=0 # fi if [ $forcefft -eq 1 ]; then param_fft=" -G " fft=1 elif [ $fft -eq 1 ]; then param_fft=" -F " else param_fft=" " fi if [ $seed != "x" -a $seedtable != "x" ]; then echo 'Use either one of seedtable and seed. Not both.' 1>&2 exit 1 fi if [ $f2clext = "-E" -a $anysymbol -gt 0 ]; then echo '' 1>&2 echo 'At present, the combination of --text and ( --anysymbol or --preservecase ) is impossible.' 1>&2 echo '' 1>&2 exit 1 fi if [ $treein -eq 1 ]; then # if [ $iterate -gt 0 ]; then # echo 'Not supported yet.' 1>&2 # exit 1 # fi cycle=1 fi if [ $nadd -gt "0" ]; then if [ $fragment -eq 1 ]; then addarg="$addarg0 $nadd -g -0.01" else addarg="$addarg0 $nadd" fi cycle=1 iterate=0 # treealg=" -q " ## 2012/01/24 ## removed 2012/02/06 fi if [ $adjustdirection -gt 0 -a $seed != "x" ]; then echo '' 1>&2 echo 'The combination of --adjustdirection(accurately) and --seed is not supported.' 1>&2 echo '' 1>&2 exit 1 fi if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then if [ $distance = "ktuples" ]; then echo 'Not supported.' 1>&2 echo 'Please add --globalpair, --localpair, --scarnapair, --dafspair' 1>&2 echo '--larapair, --slarapair, --foldalignlocalpair or --foldalignglobalpair' 1>&2 exit 1 fi if [ $f2clext = "-E" ]; then echo '' 1>&2 echo 'For RNA alignment, the --text mode is impossible.' 1>&2 echo '' 1>&2 exit 1 fi fi # cycle ga atode henkou sareru node koko de strategy no namae wo kimeru. # kokokara if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then if [ $distance = "scarna" -o $distance = "dafs" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then strategy="X-" elif [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o "globalgenaf" ]; then strategy="Q-" fi elif [ $distance = "fasta" -a $sw -eq 0 ]; then strategy="F-" elif [ $distance = "fasta" -a $sw -eq 1 ]; then strategy="H-" elif [ $distance = "blast" ]; then strategy="B-" elif [ $distance = "global" -o $distance = "distonly" ]; then strategy="G-" elif [ $distance = "local" ]; then strategy="L-" elif [ $distance = "last" ]; then strategy="Last-" elif [ $distance = "hybrid" ]; then strategy="Hybrid-" elif [ $distance = "multi" ]; then strategy="Multi-" elif [ $distance = "lastmulti" ]; then strategy="LastMulti-" elif [ $distance = "localgenaf" ]; then strategy="E-" elif [ $distance = "globalgenaf" ]; then strategy="K-" elif [ $fft -eq 1 ]; then strategy="FFT-" else strategy="NW-" fi # if [ `echo "$weighti>0.0" | bc` -gt 0 ]; then if [ `awk "BEGIN {print($weighti>0.0)}"` -gt 0 ]; then strategy=$strategy"I" fi strategy=$strategy"NS-" if [ $iterate -gt 0 ]; then strategy=$strategy"i" elif [ $distance = "parttree" ]; then if [ $partdist = "fasta" ]; then strategy=$strategy"FastaPartTree-"$cycle elif [ $partdist = "localalign" ]; then strategy=$strategy"DPPartTree-"$cycle else strategy=$strategy"PartTree-"$cycle fi elif [ $fragment -eq 1 ]; then strategy=$strategy"fragment" elif [ $fragment -eq -1 ]; then strategy=$strategy"full" else strategy=$strategy$cycle fi explanation='?' performance='Not tested.' if [ $strategy = "F-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information' performance='Most accurate, but very slow' elif [ $strategy = "L-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information' performance='Probably most accurate, very slow' elif [ $strategy = "E-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment with generalized affine gap costs (Altschul 1998)' performance='Suitable for sequences with long unalignable regions, very slow' elif [ $strategy = "G-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with GLOBAL pairwise alignment information' performance='Suitable for sequences of similar lengths, very slow' elif [ $strategy = "X-INS-i" ]; then explanation='RNA secondary structure information is taken into account.' performance='For short RNA sequences only, extremely slow' elif [ $strategy = "F-INS-1" ]; then explanation='Progressive method incorporating LOCAL pairwise alignment information' elif [ $strategy = "L-INS-1" ]; then explanation='Progressive method incorporating LOCAL pairwise alignment information' elif [ $strategy = "G-INS-1" ]; then explanation='Progressive method incorporating GLOBAL pairwise alignment information' elif [ $strategy = "FFT-NS-i" -o $strategy = "NW-NS-i" ]; then explanation='Iterative refinement method (max. '$iterate' iterations)' if [ $iterate -gt 2 ]; then performance='Accurate but slow' else performance='Standard' fi elif [ $strategy = "FFT-NS-2" -o $strategy = "NW-NS-2" ]; then explanation='Progressive method (guide trees were built '$cycle' times.)' performance='Fast but rough' elif [ $strategy = "FFT-NS-1" -o $strategy = "NW-NS-1" ]; then explanation='Progressive method (rough guide tree was used.)' performance='Very fast but very rough' fi if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then outputopt=" -c $strategy -r $TMPFILE/order $f2clext " elif [ $outputformat = "clustal" -a $outorder = "input" ]; then outputopt=" -c $strategy $f2clext " elif [ $outputformat = "phylip" -a $outorder = "aligned" ]; then outputopt=" -y -r $TMPFILE/order " elif [ $outputformat = "phylip" -a $outorder = "input" ]; then outputopt=" -y " elif [ $outputformat = "pir" -a $outorder = "aligned" ]; then outputopt=" -f -r $TMPFILE/order " else outputopt="null" fi # kokomade ( cd $TMPFILE; cat /dev/null > pre echo "nseq = " $nseq 1>>"$progressfile" echo "distance = " $distance 1>>"$progressfile" echo "iterate = " $iterate 1>>"$progressfile" echo "cycle = " $cycle 1>>"$progressfile" if [ $anysymbol -eq 1 ]; then mv infile orig "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1 fi if [ $adjustdirection -gt 0 ]; then if [ $fragment -ne 0 ]; then fragarg="-F" else fragarg=" " fi if [ $adjustdirection -eq 1 ]; then "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.01 > _direction elif [ $adjustdirection -eq 2 ]; then "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.01 -d > _direction fi "$prefix/setdirection" -d _direction -i infile > infiled mv infiled infile if [ $anysymbol -eq 1 ]; then "$prefix/setdirection" -d _direction -i orig > origd mv origd orig fi fi if [ $seed != "x" ]; then mv infile infile2 if [ $anysymbol -eq 1 ]; then mv orig orig2 cat /dev/null > orig fi cat /dev/null > infile cat /dev/null > hat3.seed seedoffset=0 # echo "seednseq="$seednseq # echo "seedoffset="$seedoffset set $seednseq >> "$progressfile" # echo $# while [ $# -gt 1 ] do shift # echo "num="$# if [ $anysymbol -eq 1 ]; then cat seed$# >> orig "$prefix/replaceu" $seqtype -i seed$# -o $seedoffset > clean 2>>"$progressfile" || exit 1 mv clean seed$# fi "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>>"$progressfile" || exit 1 cat hat3 >> hat3.seed # echo "$1" seedoffset=`expr $seedoffset + $1` # echo "$1" # echo "seedoffset="$seedoffset done; # echo "seedoffset="$seedoffset if [ $anysymbol -eq 1 ]; then "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi cat orig2 >> orig else cat infile2 >> infile fi elif [ $seedtable != "x" ]; then cat _seedtablefile > hat3.seed elif [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then mv infile infile2 if [ $anysymbol -eq 1 ]; then mv orig orig2 cat /dev/null > orig fi cat /dev/null > infile echo "strdir = " 1>>"$progressfile" echo $strdir 1>>"$progressfile" echo "Calling DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/)" 1>>"$progressfile" perl "$prefix/mafftash_premafft.pl" -p pdblist -o ownlist -d "$strdir" 2>>"dasherr" dashres="$?" cat dasherr 1>>"$progressfile" echo "Done." 1>>"$progressfile" if [ $dashres = "1" ]; then echo "Error in DASH" 1>>"$progressfile" exit 1; fi seedoffset=`grep -c '^[>|=]' instr | head -1 ` echo "# of structures = " 1>>"$progressfile" echo $seedoffset 1>>"$progressfile" mv hat3 hat3.seed if [ $anysymbol -eq 1 ]; then cat instr >> orig "$prefix/replaceu" $seqtype -i instr -o 0 > clean 2>>"$progressfile" || exit 1 mv clean infile "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi cat orig2 >> orig else cat instr > infile cat infile2 >> infile fi else cat /dev/null > hat3.seed fi # cat hat3.seed if [ $mergetable != "/dev/null" ]; then if [ $nadd -gt "0" ]; then echo "Impossible" 1>&2 exit 1 fi # if [ $seed != "x" -o $seedtable != "x" ]; then # echo "This version does not support the combination of merge and seed." 1>&2 # exit 1 # fi # iterate=0 # 2013/04/16 mergearg="-H $seedoffset" fi if [ $mccaskill -eq 1 ]; then "$prefix/mccaskillwrap" -s -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 elif [ $dafs -eq 1 ]; then "$prefix/mccaskillwrap" -G -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 elif [ $contrafold -eq 1 ]; then "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 fi if [ $distance = "fasta" ]; then "$prefix/dndfast7" $swopt < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "blast" ]; then "$prefix/dndblast" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "foldalignlocal" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -h $laof -H -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "foldalignglobal" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "slara" ]; then "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -T -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "lara" ]; then "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -B -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "scarna" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -s -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "dafs" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -G -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "global" ]; then "$prefix/pairlocalalign" $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -A < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "local" ]; then if [ $fragment -ne 0 ]; then "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -L < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/addsingle" $legacygapopt -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/pairlocalalign" $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -L < infile > /dev/null 2>>"$progressfile" || exit 1 # addarg wo watasanai cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $termgapopt $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 fi elif [ $distance = "globalgenaf" ]; then "$prefix/pairlocalalign" $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -O $GGOP -E $GEXP -K < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "localgenaf" ]; then "$prefix/pairlocalalign" $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $termgapopt $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "last" ]; then if [ $fragment -ne 0 ]; then "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/addsingle" $legacygapopt -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 # addarg wo watasanai cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" $legacygapopt $mergearg $termgapopt $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 fi elif [ $distance = "lastmulti" ]; then "$prefix/dndpre" -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 mv hat2 hat2i "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -h $laof -r $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hat2 hat2n mv hatx hat3 if [ $fragment -ne 0 ]; then "$prefix/addsingle" $legacygapopt -d -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else echo "Impossible" 1>&2 exit 1 fi elif [ $distance = "multi" ]; then "$prefix/dndpre" -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 mv hat2 hat2i "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hat2 hat2n mv hatx hat3 if [ $fragment -ne 0 ]; then "$prefix/addsingle" $legacygapopt -d -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else echo "Impossible" 1>&2 exit 1 fi elif [ $distance = "hybrid" ]; then "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/disttbfast" $legacygapopt -W $tuplesize $termgapopt $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 if [ $fragment -ne 0 ]; then "$prefix/addsingle" $legacygapopt -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/tbfast" $legacygapopt $mergearg $termgapopt $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 fi elif [ $distance = "distonly" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -t < infile > /dev/null 2>>"$progressfile" || exit 1 "$prefix/tbfast" $legacygapopt $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "parttree" ]; then "$prefix/splittbfast" $legacygapopt $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h $aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre 2>>"$progressfile" || exit 1 mv hat3.seed hat3 elif [ $distance = "ktuplesmulti" ]; then # "$prefix/dndpre" -M 1 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 # mv hat2 hat2i # "$prefix/disttbfast" $legacygapopt -W $tuplesize $termgapopt $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 # mv hat2 hat2n if [ $fragment -ne 0 ]; then "$prefix/addsingle" $legacygapopt -d -W $tuplesize -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 # "$prefix/addsingle" $legacygapopt -d -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else echo "Impossible" 1>&2 exit 1 fi else if [ $fragment -ne 0 ]; then "$prefix/addsingle" $legacygapopt -W $tuplesize -O $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/disttbfast" $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > pre 2>>"$progressfile" || exit 1 mv hat3.seed hat3 fi fi while [ $cycle -gt 1 ] do if [ $distance = "parttree" ]; then mv pre infile "$prefix/splittbfast" $legacygapopt -Z $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h $aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre 2>>"$progressfile" || exit 1 else "$prefix/tbfast" $legacygapopt $mergearg $termgapopt $outnum -C $numthreads $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1 fi cycle=`expr $cycle - 1` done if [ $iterate -gt 0 ]; then if [ $distance = "ktuples" ]; then "$prefix/dndpre" -C $numthreads < pre > /dev/null 2>>"$progressfile" || exit 1 fi "$prefix/dvtditr" $legacygapopt $mergearg -C $numthreadsit -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -h $aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1 fi if [ $coreout -eq 1 ]; then "$prefix/setcore" -w $corewin -i $corethr $coreext < pre > pre2 mv pre2 pre elif [ $anysymbol -eq 1 ]; then "$prefix/restoreu" -a pre -i orig > restored || exit 1 mv restored pre fi echo '' 1>>"$progressfile" if [ $mccaskill -eq 1 ]; then echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>>"$progressfile" echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>>"$progressfile" echo "incorporated in the iterative alignment process (4)." 1>>"$progressfile" echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>>"$progressfile" echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>>"$progressfile" echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>>"$progressfile" echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile" echo "" 1>>"$progressfile" elif [ $contrafold -eq 1 ]; then echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>>"$progressfile" echo "and then incorporated in the iterative alignment process (4)." 1>>"$progressfile" echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>>"$progressfile" echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile" echo "" 1>>"$progressfile" fi if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then echo "Input structures are decomposed into structural domains using" 1>>"$progressfile" echo "Protein Domain Parser (Alexandrov & Shindyalov 2003)." 1>>"$progressfile" echo "Domain pairs are aligned using the rash function in" 1>>"$progressfile" echo "the ASH structural alignment package (Standley et al. 2007)." 1>>"$progressfile" fi if [ $pdblist != "/dev/null" ]; then echo "Pre-computed alignments stored in " 1>>"$progressfile" echo "DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/) are used. " 1>>"$progressfile" fi if [ $distance = "fasta" -o $partdist = "fasta" ]; then echo "Pairwise alignments were computed by FASTA" 1>>"$progressfile" echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>>"$progressfile" fi if [ $distance = "blast" ]; then echo "Pairwise alignments were computed by BLAST" 1>>"$progressfile" echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>>"$progressfile" fi if [ $distance = "last" -o $distance = "lastmulti" ]; then echo "Pairwise alignments were computed by LAST" 1>>"$progressfile" echo "http://last.cbrc.jp/" 1>>"$progressfile" echo "Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487" 1>>"$progressfile" fi if [ $distance = "scarna" ]; then echo "Pairwise alignments were computed by MXSCARNA" 1>>"$progressfile" echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>>"$progressfile" fi if [ $distance = "dafs" ]; then echo "Pairwise alignments were computed by DAFS" 1>>"$progressfile" echo "(Sato et al., 2012,,,,)." 1>>"$progressfile" fi if [ $distance = "lara" -o $distance = "slara" ]; then echo "Pairwise alignments were computed by LaRA" 1>>"$progressfile" echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>>"$progressfile" fi if [ $distance = "foldalignlocal" ]; then echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>>"$progressfile" echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile" fi if [ $distance = "foldalignglobal" ]; then echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>>"$progressfile" echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile" fi printf "\n" 1>>"$progressfile" echo 'Strategy:' 1>>"$progressfile" printf ' '$strategy 1>>"$progressfile" echo ' ('$performance')' 1>>"$progressfile" echo ' '$explanation 1>>"$progressfile" echo '' 1>>"$progressfile" echo "If unsure which option to use, try 'mafft --auto input > output'." 1>>"$progressfile" echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>>"$progressfile" echo "" 1>>"$progressfile" echo "The default gap scoring scheme has been changed in version 7.110 (2013 Oct)." 1>>"$progressfile" echo "It tends to insert more gaps into gap-rich regions than previous versions." 1>>"$progressfile" echo "To disable this change, add the --legacygappenalty option." 1>>"$progressfile" # echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>>"$progressfile" # echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>>"$progressfile" echo '' 1>>"$progressfile" if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then cat dasherr >>"$progressfile" fi ) if [ "$outputfile" = "" ]; then if [ "$outputopt" = "null" ]; then cat < $TMPFILE/pre || exit 1 else "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre 2>>/dev/null || exit 1 fi else if [ "$outputopt" = "null" ]; then cat < $TMPFILE/pre > "$outputfile" || exit 1 else "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre > "$outputfile" 2>>/dev/null || exit 1 fi fi if [ $treeout -eq 1 ]; then cp $TMPFILE/infile.tree "$infilename.tree" fi if [ -s $TMPFILE/GuideTree ]; then # --merge no toki dake cp $TMPFILE/GuideTree . fi if [ $distout -eq 1 ]; then cp $TMPFILE/hat2 "$infilename.hat2" fi exit 0; fi prog="awk" tmpawk=`which nawk 2>/dev/null | awk '{print $1}'` if [ -x "$tmpawk" ]; then prog="$tmpawk" fi tmpawk=`which gawk 2>/dev/null | awk '{print $1}'` if [ -x "$tmpawk" ]; then prog="$tmpawk" fi #echo "prog="$prog 1>&2 umask 077 ( $prog ' BEGIN { prefix = ENVIRON["prefix"]; version = ENVIRON["version"]; myself = ENVIRON["myself"]; while( 1 ) { options = "" printf( "\n" ) > "/dev/tty"; printf( "---------------------------------------------------------------------\n" ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; printf( " MAFFT %s\n", version ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; printf( " Copyright (c) 2013 Kazutaka Katoh\n" ) > "/dev/tty"; printf( " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)\n" ) > "/dev/tty"; printf( " http://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty"; printf( "---------------------------------------------------------------------\n" ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; while( 1 ) { printf( "\n" ) > "/dev/tty"; printf( "Input file? (fasta format)\n@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ) if( res == 0 || NF == 0 ) continue; infile = sprintf( "%s", $0 ); res = getline < infile; close( infile ); if( res == -1 ) { printf( "%s: No such file.\n\n", infile ) > "/dev/tty"; printf( "Filename extension (eg., .txt) must be typed, if any.\n\n" ) > "/dev/tty"; } else if( res == 0 ) printf( "%s: Empty.\n", infile ) > "/dev/tty"; else { printf( "OK. infile = %s\n\n", infile ) > "/dev/tty"; break; } } nseq = 0; while( 1 ) { printf( "\n" ) > "/dev/tty"; printf( "Output file?\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 || NF == 0 ) continue; else { outfile = sprintf( "%s", $0 ); printf( "OK. outfile = %s\n\n", outfile ) > "/dev/tty"; break; } } while( 1 ) { outargs = ""; printf( "\n" ) > "/dev/tty"; printf( "Output format?\n" ) > "/dev/tty"; printf( " 1. Clustal format / Sorted\n" ) > "/dev/tty"; printf( " 2. Clustal format / Input order\n" ) > "/dev/tty"; printf( " 3. Fasta format / Sorted\n" ) > "/dev/tty"; printf( " 4. Fasta format / Input order\n" ) > "/dev/tty"; printf( " 5. Phylip format / Sorted\n" ) > "/dev/tty"; printf( " 6. Phylip format / Input order\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); # printf( "res=%d, NF=%d\n", res, NF ); resnum = 0 + $1; # printf( "resnum=%d\n", resnum ); if( resnum < 1 || 6 < resnum ) continue; else { if( resnum == 1 ) outargs = "--clustalout --reorder"; else if( resnum == 2 ) outargs = "--clustalout --inputorder"; else if( resnum == 3 ) outargs = "--reorder"; else if( resnum == 4 ) outargs = "--inputorder"; else if( resnum == 5 ) outargs = "--phylipout --reorder"; else if( resnum == 6 ) outargs = "--phylipout --inputorder"; else continue; printf( "OK. arguments = %s\n\n", outargs ) > "/dev/tty"; break; } } while( 1 ) { arguments = ""; printf( "\n" ) > "/dev/tty"; printf( "Strategy?\n" ) > "/dev/tty"; printf( " 1. --auto\n" ) > "/dev/tty"; printf( " 2. FFT-NS-1 (fast)\n" ) > "/dev/tty"; printf( " 3. FFT-NS-2 (default)\n" ) > "/dev/tty"; printf( " 4. G-INS-i (accurate)\n" ) > "/dev/tty"; printf( " 5. L-INS-i (accurate)\n" ) > "/dev/tty"; printf( " 6. E-INS-i (accurate)\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); # printf( "res=%d, NF=%d\n", res, NF ); resnum = 0 + $1; # printf( "resnum=%d\n", resnum ); if( resnum < 1 || 6 < resnum ) continue; else { if( resnum == 1 ) arguments = "--auto"; else if( resnum == 2 ) arguments = "--retree 1"; else if( resnum == 3 ) arguments = "--retree 2"; else if( resnum == 4 ) arguments = "--globalpair --maxiterate 16"; else if( resnum == 5 ) arguments = "--localpair --maxiterate 16"; else if( resnum == 6 ) arguments = "--genafpair --maxiterate 16"; else arguments = sprintf( "%s", $0 ); printf( "OK. arguments = %s %s\n\n", arguments, outargs ) > "/dev/tty"; break; } } while( 1 ) { printf( "\n" ) > "/dev/tty"; printf( "Additional arguments? (--ep #, --op #, --kappa #, etc)\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 || NF == 0 ) { break; } else { addargs = sprintf( "%s", $0 ); printf( "OK. arguments = %s %s %s\n\n", addargs, arguments, outargs ) > "/dev/tty"; break; } } arguments = sprintf( "%s %s %s", addargs, arguments, outargs ); print "" command = sprintf( "\"%s\" %s \"%s\" > \"%s\"", myself, arguments, infile, outfile ); gsub( /\\/, "/", command ); printf( "command=\n%s\n", command ) > "/dev/tty"; while( 1 ) { go = 0; printf( "OK?\n" ) > "/dev/tty"; printf( "@ [Y] " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 ) continue; else if( NF == 0 || $0 ~ /^[Yy]/ ) { go=1; break; } else break; } if( go ) break; printf( "\n" ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; } system( command ); command = sprintf( "less \"%s\"", outfile ); system( command ); printf( "Press Enter to exit." ) > "/dev/tty"; res = getline < "/dev/tty"; } ' ) exit 0; mafft-7.123-without-extensions/core/univscript.tmpl0000640000076500007650000000244311753415240021566 0ustar katohkatohprogs="_PROGS" for prog in $progs; do printf $prog" " done make clean make CC="$HOME/soft/gcc/usr/local/bin/gcc" CFLAGS="-O3 -m32 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" LIBS="-lm -lpthread -lgcc_eh" ENABLE_MULTITHREAD="-Denablemultithread" for prog in $progs; do mv $prog $prog.intel32 done make clean make CC="$HOME/soft/gcc/usr/local/bin/gcc" CFLAGS="-O3 -m64 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" LIBS="-lm -lpthread -lgcc_eh" ENABLE_MULTITHREAD="-Denablemultithread" for prog in $progs; do mv $prog $prog.intel64 done make clean make CC="gcc-4.0" CFLAGS="-arch ppc64 -m64 -O3 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" ENABLE_MULTITHREAD="" for prog in $progs; do mv $prog $prog.ppc64 done make clean make CC="gcc-4.0" CFLAGS="-arch ppc -m32 -O3 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" ENABLE_MULTITHREAD="" for prog in $progs; do mv $prog $prog.ppc32 done set $progs for prog in $progs; do # lipo -create $prog.icc $prog.ppc32 $prog.ppc64 -output $prog lipo -create $prog.intel64 $prog.intel32 $prog.ppc32 $prog.ppc64 -output $prog cp $prog ../binaries done mafft-7.123-without-extensions/core/partSalignmm.c0000640000076500007650000007421112226127150021262 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define MACHIGAI 0 #define OUTGAP0TRY 1 #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 #define FASTMATCHCALC 1 #if 0 static void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len ) { int i, j, gc, gb; float feff; for( i=0; i impmtx=%f\n", i1, j1, impmtx[i1][j1] ); return( impmtx[i1][j1] ); #if 0 if( i1 == l1 || j1 == l2 ) return( 0.0 ); return( impmtx[i1+start1][j1+start2] ); #endif } static void part_imp_match_out_vead_gapmap( float *imp, int i1, int lgth2, int start2, int *gapmap2 ) { #if FASTMACHCALC float *pt = imp; int *gapmappt = gapmap2; while( lgth2-- ) *pt++ += impmtx[i1][start2+*gapmappt++]; #else int j; for( j=0; jstart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = (int)( pt - seq1[i] ) - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = (int)( pt - seq1[i] ) - 1; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = (int)( pt - seq1[i] ) - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = (int)( pt - seq2[j] ) - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = (int)( pt - seq2[j] ) - 1; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = (int)( pt - seq2[j] ) - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; // impmtx[k1][k2] += tmpptr->fimportance * effij; if( tmpptr->korh == 'k' ) impmtx[k1][k2] += tmpptr->fimportance * effij_kozo; else impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } } } #if 0 fprintf( stderr, "impmtx = \n" ); for( k2=0; k2start1 ); fprintf( stderr, "end1 = %d\n", localhom[i][j]->end1 ); fprintf( stderr, "j = %d, seq2 = %s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->start1 ) break; } start1 = pt - seq1[i] - 1; while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, localhom[i][j].end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->end1 ) break; } end1 = pt - seq1[i] - 1; pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->start2 ) break; } start2 = pt - seq2[j] - 1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->end2 ) break; } end2 = pt - seq2[j] - 1; // fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); k1 = start1; k2 = start2; fprintf( stderr, "step 0\n" ); while( k1 <= end1 && k2 <= end2 ) { #if 0 if( !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] += localhom[i][j].wimportance * eff1[i] * eff2[j]; k1++; k2++; #else if( !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] += localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; #endif } dif = ( end1 - start1 ) - ( end2 - start2 ); fprintf( stderr, "dif = %d\n", dif ); if( dif > 0 ) { do { fprintf( stderr, "dif = %d\n", dif ); k1 = start1; k2 = start2 - dif; while( k1 <= end1 && k2 <= end2 ) { if( 0 <= k2 && start2 <= k2 && !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; } } while( dif-- ); } else { do { k1 = start1 + dif; k2 = start2; while( k1 <= end1 ) { if( k1 >= 0 && k1 >= start1 && !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; } } while( dif++ ); } } } #if 0 fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } #endif } static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2 ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; // char gap[] = "-"; char *gap; float wm; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS float *ogcp1; static TLS float *ogcp2; static TLS float *fgcp1; static TLS float *fgcp2; static TLS float **cpmx1; static TLS float **cpmx2; static TLS float *gapfreq1; static TLS float *gapfreq2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif float *fgcp2pt; float *ogcp2pt; float fgcp1va; float ogcp1va; float *gf2pt; float *gf2ptpre; float gf1va; float gf1vapre; float headgapfreq1; float headgapfreq2; if( seq1 == NULL ) { if( orlgth1 ) { // fprintf( stderr, "## Freeing local arrays in A__align\n" ); orlgth1 = 0; orlgth2 = 0; part_imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0 ); free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } else { // fprintf( stderr, "## Not allocated\n" ); } return( 0.0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 1 // if( lgth1 == 0 ) fprintf( stderr, "WARNING: lgth1=0 in partA__align\n" ); // if( lgth2 == 0 ) fprintf( stderr, "WARNING: lgth2=0 in partA__align\n" ); if( lgth1 == 0 && lgth2 == 0 ) return( 0.0 ); if( lgth1 == 0 ) { for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); gapfreq1 = AllocateFloatVec( ll1+2 ); gapfreq2 = AllocateFloatVec( ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc ); cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc ); if( sgap1 ) { new_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 ); new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 ); new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap1 ); new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 ); outgapcount( &headgapfreq1, icyc, sgap1, eff1 ); outgapcount( &headgapfreq2, jcyc, sgap2, eff2 ); outgapcount( gapfreq1+lgth1, icyc, egap1, eff1 ); outgapcount( gapfreq2+lgth2, jcyc, egap2, eff2 ); } else { st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 ); st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 ); st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 ); st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 ); headgapfreq1 = 0.0; headgapfreq2 = 0.0; gapfreq1[lgth1] = 0.0; gapfreq2[lgth2] = 0.0; } if( legacygapcost == 0 ) { gapcountf( gapfreq1, seq1, icyc, eff1, lgth1 ); gapcountf( gapfreq2, seq2, jcyc, eff2, lgth2 ); for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( outgap == 1 ) { for( i=1; i", wm ); #endif // g = mi + *fgcp2pt * gapfreq1[i]; if( (g = mi + *fgcp2pt * gf1va) > wm ) { wm = g; *ijppt = -( j - mpi ); } // g = *prept + *ogcp2pt * gapfreq1[i-1]; if( (g = *prept + *ogcp2pt * gf1vapre) >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // g = *mjpt + fgcp1va * gapfreq2[j]; if( (g = *mjpt + fgcp1va * *gf2pt) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } // g = *prept + ogcp1va * gapfreq2[j-1]; if( (g = *prept + ogcp1va * *gf2ptpre) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; ijppt++; mjpt++; prept++; mpjpt++; curpt++; fgcp2pt++; ogcp2pt++; gf2ptpre++; gf2pt++; } lastverticalw[i] = currentw[lgth2-1]; } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'd': directionfile = *++argv; fprintf( stderr, "directionfile = %s\n", directionfile ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *difp; int nlenmin; char **name; char **seq; char *tmpseq; char line[100]; int *nlen; int i; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( directionfile ) { difp = fopen( directionfile, "r" ); if( !difp ) { fprintf( stderr, "Cannot open %s\n", directionfile ); exit( 1 ); } } else { fprintf( stderr, "Give directionfile!\n" ); } dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); tmpseq = AllocateCharVec( MAX( B, nlenmax )+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); readData_pointer_casepreserve( infp, name, nlen, seq ); for( i=0; i%s\n", name[i]+1 ); fprintf( stdout, "%s\n", seq[i] ); } free( nlen ); FreeCharMtx( seq ); FreeCharMtx( name ); free( tmpseq ); return( 0 ); } mafft-7.123-without-extensions/core/partQalignmm.c0000640000076500007650000007456312224730722021275 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define MACHIGAI 0 #define OUTGAP0TRY 1 #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 #define FASTMATCHCALC 1 static int impalloclen = 0; static float **impmtx = NULL; float part_imp_match_out_scQ( int i1, int j1 ) { // fprintf( stderr, "impalloclen = %d\n", impalloclen ); // fprintf( stderr, "i1,j1=%d,%d -> impmtx=%f\n", i1, j1, impmtx[i1][j1] ); return( impmtx[i1][j1] ); #if 0 if( i1 == l1 || j1 == l2 ) return( 0.0 ); return( impmtx[i1+start1][j1+start2] ); #endif } static void part_imp_match_out_vead_gapmapQ( float *imp, int i1, int lgth2, int start2, int *gapmap2 ) { #if FASTMACHCALC float *pt = imp; int *gapmappt = gapmap2; while( lgth2-- ) *pt++ += impmtx[i1][start2+*gapmappt++]; #else int j; for( j=0; jstart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = (int)( pt - seq1[i] ) - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = (int)( pt - seq1[i] ) - 1; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = (int)( pt - seq1[i] ) - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = (int)( pt - seq2[j] ) - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = (int)( pt - seq2[j] ) - 1; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = (int)( pt - seq2[j] ) - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } } } #if 0 fprintf( stderr, "impmtx = \n" ); for( k2=0; k2start1 ); fprintf( stderr, "end1 = %d\n", localhom[i][j]->end1 ); fprintf( stderr, "j = %d, seq2 = %s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->start1 ) break; } start1 = pt - seq1[i] - 1; while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, localhom[i][j].end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->end1 ) break; } end1 = pt - seq1[i] - 1; pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->start2 ) break; } start2 = pt - seq2[j] - 1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == localhom[i][j]->end2 ) break; } end2 = pt - seq2[j] - 1; // fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); k1 = start1; k2 = start2; fprintf( stderr, "step 0\n" ); while( k1 <= end1 && k2 <= end2 ) { #if 0 if( !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] += localhom[i][j].wimportance * eff1[i] * eff2[j]; k1++; k2++; #else if( !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] += localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; #endif } dif = ( end1 - start1 ) - ( end2 - start2 ); fprintf( stderr, "dif = %d\n", dif ); if( dif > 0 ) { do { fprintf( stderr, "dif = %d\n", dif ); k1 = start1; k2 = start2 - dif; while( k1 <= end1 && k2 <= end2 ) { if( 0 <= k2 && start2 <= k2 && !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; } } while( dif-- ); } else { do { k1 = start1 + dif; k2 = start2; while( k1 <= end1 ) { if( k1 >= 0 && k1 >= start1 && !nocount1[k1] && !nocount2[k2] ) impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j]; k1++; k2++; } } while( dif++ ); } } } #if 0 fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2 ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; // char gap[] = "-"; char *gap; float wm; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static float mi, *m; static int **ijp; static int mpi, *mp; static float *w1, *w2; static float *match; static float *initverticalw; /* kufuu sureba iranai */ static float *lastverticalw; /* kufuu sureba iranai */ static char **mseq1; static char **mseq2; static char **mseq; static float *digf1; static float *digf2; static float *diaf1; static float *diaf2; static float *gapz1; static float *gapz2; static float *gapf1; static float *gapf2; static float *ogcp1g; static float *ogcp2g; static float *fgcp1g; static float *fgcp2g; static float *og_h_dg_n1_p; static float *og_h_dg_n2_p; static float *fg_h_dg_n1_p; static float *fg_h_dg_n2_p; static float *og_t_fg_h_dg_n1_p; static float *og_t_fg_h_dg_n2_p; static float *fg_t_og_h_dg_n1_p; static float *fg_t_og_h_dg_n2_p; static float *gapz_n1; static float *gapz_n2; static float **cpmx1; static float **cpmx2; static int **intwork; static float **floatwork; static int orlgth1 = 0, orlgth2 = 0; float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif float tmppenal; float *fg_t_og_h_dg_n2_p_pt; float *og_t_fg_h_dg_n2_p_pt; float *og_h_dg_n2_p_pt; float *fg_h_dg_n2_p_pt; float *gapz_n2_pt0; float *gapz_n2_pt1; float *fgcp2pt; float *ogcp2pt; float fg_t_og_h_dg_n1_p_va; float og_t_fg_h_dg_n1_p_va; float og_h_dg_n1_p_va; float fg_h_dg_n1_p_va; float gapz_n1_va0; float gapz_n1_va1; float fgcp1va; float ogcp1va; #if 0 fprintf( stderr, "eff in SA+++align\n" ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( digf1 ); FreeFloatVec( digf2 ); FreeFloatVec( diaf1 ); FreeFloatVec( diaf2 ); FreeFloatVec( gapz1 ); FreeFloatVec( gapz2 ); FreeFloatVec( gapf1 ); FreeFloatVec( gapf2 ); FreeFloatVec( ogcp1g ); FreeFloatVec( ogcp2g ); FreeFloatVec( fgcp1g ); FreeFloatVec( fgcp2g ); FreeFloatVec( og_h_dg_n1_p ); FreeFloatVec( og_h_dg_n2_p ); FreeFloatVec( fg_h_dg_n1_p ); FreeFloatVec( fg_h_dg_n2_p ); FreeFloatVec( og_t_fg_h_dg_n1_p ); FreeFloatVec( og_t_fg_h_dg_n2_p ); FreeFloatVec( fg_t_og_h_dg_n1_p ); FreeFloatVec( fg_t_og_h_dg_n2_p ); FreeFloatVec( gapz_n1 ); FreeFloatVec( gapz_n2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); digf1 = AllocateFloatVec( ll1+2 ); digf2 = AllocateFloatVec( ll2+2 ); diaf1 = AllocateFloatVec( ll1+2 ); diaf2 = AllocateFloatVec( ll2+2 ); gapz1 = AllocateFloatVec( ll1+2 ); gapz2 = AllocateFloatVec( ll2+2 ); gapf1 = AllocateFloatVec( ll1+2 ); gapf2 = AllocateFloatVec( ll2+2 ); ogcp1g = AllocateFloatVec( ll1+2 ); ogcp2g = AllocateFloatVec( ll2+2 ); fgcp1g = AllocateFloatVec( ll1+2 ); fgcp2g = AllocateFloatVec( ll2+2 ); og_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); og_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); fg_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); fg_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); og_t_fg_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); og_t_fg_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); fg_t_og_h_dg_n1_p = AllocateFloatVec( ll1 + 2 ); fg_t_og_h_dg_n2_p = AllocateFloatVec( ll2 + 2 ); gapz_n1 = AllocateFloatVec( ll1+2 ); gapz_n2 = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc ); cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc ); if( sgap1 ) { new_OpeningGapCount_zure( ogcp1g, icyc, seq1, eff1, lgth1, sgap1, egap1 ); new_OpeningGapCount_zure( ogcp2g, jcyc, seq2, eff2, lgth2, sgap2, egap2 ); new_FinalGapCount_zure( fgcp1g, icyc, seq1, eff1, lgth1, sgap1, egap1 ); new_FinalGapCount_zure( fgcp2g, jcyc, seq2, eff2, lgth2, sgap2, egap2 ); getdigapfreq_part( digf1, icyc, seq1, eff1, lgth1, sgap1, egap1 ); getdigapfreq_part( digf2, jcyc, seq2, eff2, lgth2, sgap2, egap2 ); getdiaminofreq_part( diaf1, icyc, seq1, eff1, lgth1, sgap1, egap1 ); getdiaminofreq_part( diaf2, jcyc, seq2, eff2, lgth2, sgap2, egap2 ); getgapfreq( gapf1, icyc, seq1, eff1, lgth1 ); getgapfreq( gapf2, jcyc, seq2, eff2, lgth2 ); getgapfreq_zure_part( gapz1, icyc, seq1, eff1, lgth1, sgap1 ); getgapfreq_zure_part( gapz2, jcyc, seq2, eff2, lgth2, sgap1 ); } else { st_OpeningGapCount( ogcp1g, icyc, seq1, eff1, lgth1 ); st_OpeningGapCount( ogcp2g, jcyc, seq2, eff2, lgth2 ); st_FinalGapCount_zure( fgcp1g, icyc, seq1, eff1, lgth1 ); st_FinalGapCount_zure( fgcp2g, jcyc, seq2, eff2, lgth2 ); getdigapfreq_st( digf1, icyc, seq1, eff1, lgth1 ); getdigapfreq_st( digf2, jcyc, seq2, eff2, lgth2 ); getdiaminofreq_x( diaf1, icyc, seq1, eff1, lgth1 ); getdiaminofreq_x( diaf2, jcyc, seq2, eff2, lgth2 ); getgapfreq( gapf1, icyc, seq1, eff1, lgth1 ); getgapfreq( gapf2, jcyc, seq2, eff2, lgth2 ); getgapfreq_zure( gapz1, icyc, seq1, eff1, lgth1 ); getgapfreq_zure( gapz2, jcyc, seq2, eff2, lgth2 ); } #if 1 lastj = lgth2+2; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( outgap == 1 ) { g = 0.0; g += ogcp1g[0] * og_h_dg_n2_p[0]; g += ogcp2g[0] * og_h_dg_n1_p[0]; g += fgcp1g[0] * fg_h_dg_n2_p[0]; g += fgcp2g[0] * fg_h_dg_n1_p[0]; initverticalw[0] += g; currentw[0] += g; for( i=1; i", wm ); #endif tmppenal = gapz_n1_va1 * *fg_t_og_h_dg_n2_p_pt; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+tmppenal) > wm ) { wm = g; *ijppt = -( j - mpi ); } tmppenal = gapz_n1_va0 * *og_t_fg_h_dg_n2_p_pt; if( (g=*prept+tmppenal) >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif tmppenal = *gapz_n2_pt1 * fg_t_og_h_dg_n1_p_va; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt+tmppenal) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } tmppenal = *gapz_n2_pt0 * og_t_fg_h_dg_n1_p_va; if( (g=*prept+tmppenal) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fg_t_og_h_dg_n2_p_pt++; og_t_fg_h_dg_n2_p_pt++; og_h_dg_n2_p_pt++; fg_h_dg_n2_p_pt++; gapz_n2_pt0++; gapz_n2_pt1++; fgcp2pt++; ogcp2pt++; } lastverticalw[i] = currentw[lgth2-1]; } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i nlenmax ) nlenmax = strlen( aseq[0] ); /* s = ( int )( rnd() * locnjob ); s++; if( s == locnjob ) s = 0; ou = 0; */ if( ou == 0 ) { ou = 1; s = sss[0]; /* sss[0]++; if( sss[0] == locnjob ) { sss[0] = 0; cyc[0]++; } */ sss[0]--; if( sss[0] == -1 ) { sss[0] = locnjob-1; cyc[0]++; } } else { ou = 0; s = sss[1]; sss[1]++; if( sss[1] == locnjob ) { sss[1] = 0; cyc[1]++; } } fprintf( trap_g, "%d ", weight ); /* for( i=0, count=0; i alloclen ) { if( resultlen > nlenmax0*3 || resultlen > N ) { fprintf(stderr, "Error in main1\n"); exit( 1 ); } FreeTmpSeqs( mseq2, mseq1 ); alloclen = strlen( result[0] ) * 2.0; fprintf( stderr, "\n\ntrying to allocate TmpSeqs\n\n" ); AllocateTmpSeqs( &mseq2, &mseq1, alloclen ); } for( i=0; i=s; i-- ) strcpy( mseq2[i+1], mseq2[i] ); strcpy( mseq2[s], mseq1 ); if( checkC ) { tmpscore1= score_m_1_0( mseq2, locnjob, s, eff, effarr ); fprintf( stderr, "pick up %d, before ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore ); fprintf( stderr, "pick up %d, after ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore1 ); if( tmpscore1 < tmpscore ) { fprintf( stderr, "\7" ); fprintf( trap_g, ">>>>>>>n\n" ); } if( fabs( wm - tmpscore1 ) / wm > 0.001 ) { fprintf( stderr, "\7sorry\n" ); exit( 1 ); } } identity = !strcmp( mseq2[s], aseq[s] ); if( s == locnjob - 1 ) ss = 0; else ss=s+1; identity *= !strcmp( mseq2[ss], aseq[ss] ); if( !identity ) { tmpscore = score_calc0( mseq2, locnjob, eff, s ); } else tmpscore = tscore; if( disp ) { fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 ); display( mseq2, locnjob ); } fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 ); fprintf( stderr, "score = %f mscore = %f ", tmpscore, mscore ); fprintf( trap_g, "%#4d %#4d / the rest ", l+1, s+1 ); fprintf( trap_g, "score = %f mscore = %f ", tmpscore, mscore ); if( identity ) { fprintf( stderr, "( identical )\n" ); fprintf( trap_g, "( identical )\n" ); sai[s] = 2; } else if( tmpscore > mscore - cut ) { fprintf( stderr, "accepted\n" ); fprintf( trap_g, "accepted\n" ); for( i=0; i mscore ) { for( i=0; i %f\n", mscore ); strcpy( sai, sai1 ); /* kokoka ? */ #if 0 if( !tbitr && !tbweight ) { prep = fopen( "best", "w" ); Write( prep, locnjob, name, nlen, bseq ); fclose( prep ); } #endif } } else { if( tmpscore == tscore ) { fprintf( stderr, "occational coincidence \n" ); fprintf( trap_g, "occational coincidence\n" ); } else { fprintf( stderr, "rejected\n" ); fprintf( trap_g, "rejected\n" ); } for( i=0; i0; i-- ) { if( tmpscore == his[ou][s][i] ) { shindou = 1; break; } } fprintf( stderr, "\n" ); if( shindou == 1 ) { returnvalue = -1; fprintf( trap_g, "oscillating\n" ); break; } } if( l == MAXITERATION ) returnvalue = -2; FreeDoubleCub( his ); return( returnvalue ); } mafft-7.123-without-extensions/core/mccaskillwrap.c0000640000076500007650000002475412176060411021467 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 static char *whereismccaskillmea; #ifdef enablemultithread typedef struct _thread_arg { int thread_no; int njob; int *jobpospt; int **gapmap; char **nogap; int nlenmax; RNApair ***pairprob; pthread_mutex_t *mutex; } thread_arg_t; #endif void outmccaskill( FILE *fp, RNApair **pairprob, int length ) { int i; RNApair *pt; for( i=0; ibestpos!=-1; pt++ ) { if( pt->bestpos > i ) fprintf( fp, "%d %d %50.40f\n", i, pt->bestpos, pt->bestscore ); } } #if 1 static void readrawmccaskill( FILE *fp, RNApair **pairprob, int length ) { char gett[1000]; int *pairnum; int i; int left, right; float prob; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i' ) continue; sscanf( gett, "%d %d %f", &left, &right, &prob ); if( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou //fprintf( stderr, "gett = %s\n", gett ); if( left != right && prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", right, left, prob ); } } free( pairnum ); } #endif #ifdef enablemultithread static void *athread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int thread_no = targ->thread_no; int njob = targ->njob; int *jobpospt = targ->jobpospt; int **gapmap = targ->gapmap; char **nogap = targ->nogap; int nlenmax = targ->nlenmax; RNApair ***pairprob = targ->pairprob; int i, res; FILE *infp; char *com; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); while( 1 ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob ) { pthread_mutex_unlock( targ->mutex ); // return( NULL ); break; } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); commongappick_record( 1, nogap+i, gapmap[i] ); if( strlen( nogap[i] ) == 0 ) continue; sprintf( dirname, "_%d", i ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); fprintf( stderr, "%d / %d (by thread %4d)\n", i+1, njob, thread_no ); sprintf( com, "%s/_mccaskillinorg", dirname ); infp = fopen( com, "w" ); // fprintf( infp, ">in\n%s\n", nogap[i] ); fprintf( infp, ">in\n" ); write1seq( infp, nogap[i] ); fclose( infp ); sprintf( com, "tr -d '\\r' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname ); system( com ); // for cygwin, wakaran if( alg == 'G' ) sprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea ); else sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea ); res = system( com ); if( res ) { fprintf( stderr, "ERROR IN mccaskill_mea\n" ); exit( 1 ); } sprintf( com, "%s/_mccaskillout", dirname ); infp = fopen( com, "r" ); readrawmccaskill( infp, pairprob[i], nlenmax ); fclose( infp ); sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); // nanosleep( 100000 ); sleep( 1 ); system( com ); } } free( dirname ); free( com ); return( NULL ); } #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; inputfile = NULL; dorp = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; whereismccaskillmea = NULL; alg = 's'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'd': whereismccaskillmea = *++argv; fprintf( stderr, "whereismccaskillmea = %s\n", whereismccaskillmea ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 's': alg = 's'; // use scarna; default break; case 'G': alg = 'G'; // use dafs, instead of scarna break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static char com[10000]; static int *nlen; int left, right; int res; static char **name, **seq, **nogap; static int **gapmap; static int *order; int i, j; FILE *infp; RNApair ***pairprob; RNApair **alnpairprob; RNApair *pairprobpt; RNApair *pt; int *alnpairnum; float prob; int adpos; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( !whereismccaskillmea ) whereismccaskillmea = ""; getnumlen( infp ); rewind( infp ); if( dorp != 'd' ) { fprintf( stderr, "nuc only\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*2+1 ); nogap = AllocateCharMtx( njob, nlenmax*2+1 ); gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); alnpairnum = AllocateIntVec( nlenmax ); for( i=0; i 0 ) { int jobpos; pthread_t *handle; pthread_mutex_t mutex; thread_arg_t *targ; jobpos = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i%d\n", i ); outmccaskill( stdout, pairprob[i], nlenmax ); } } else #endif { for( i=0; i%d\n", i ); continue; } infp = fopen( "_mccaskillinorg", "w" ); // fprintf( infp, ">in\n%s\n", nogap[i] ); fprintf( infp, ">in\n" ); write1seq( infp, nogap[i] ); fclose( infp ); system( "tr -d '\\r' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran if( alg == 'G' ) sprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea ); else sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea ); res = system( com ); if( res ) { fprintf( stderr, "ERROR IN mccaskill_mea\n" ); exit( 1 ); } infp = fopen( "_mccaskillout", "r" ); readrawmccaskill( infp, pairprob[i], nlenmax ); fclose( infp ); fprintf( stdout, ">%d\n", i ); outmccaskill( stdout, pairprob[i], nlenmax ); } } for( i=0; ibestpos!=-1; pairprobpt++ ) { left = gapmap[i][j]; right = gapmap[i][pairprobpt->bestpos]; prob = pairprobpt->bestscore; for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) if( pt->bestpos == right ) break; if( pt->bestpos == -1 ) { alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); adpos = alnpairnum[left]; alnpairnum[left]++; alnpairprob[left][adpos].bestscore = 0.0; alnpairprob[left][adpos].bestpos = right; alnpairprob[left][adpos+1].bestscore = -1.0; alnpairprob[left][adpos+1].bestpos = -1; pt = alnpairprob[left]+adpos; } else adpos = pt-alnpairprob[left]; pt->bestscore += prob; if( pt->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); } } for( i=0; ibestpos!=-1; pairprobpt++ ) { pairprobpt->bestscore /= (float)njob; left = i; right = pairprobpt->bestpos; prob = pairprobpt->bestscore; fprintf( stdout, "%d-%d, %f\n", left, right, prob ); } return( 0 ); #endif } mafft-7.123-without-extensions/core/functions.h0000640000076500007650000007606512225635370020661 0ustar katohkatohextern int intlen( int *num ); extern char seqcheck( char **seq ); extern void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx ); extern void exitall( char arr[] ); extern void display( char **seq, int nseq ); extern void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ); extern void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ); extern void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ); extern double score_calc5( char **seq, int s, double **eff, int ex ); extern double score_calc4( char **seq, int s, double **eff, int ex ); extern void upg2( int nseq, double **eff, int ***topol, double **len ); //extern void veryfastsupg_float_realloc_nobk_halfmtx( int njob, float **mtx, int ***topol, float **len ); //extern void veryfastsupg_float_realloc_nobk( int njob, float **mtx, int ***topol, float **len ); extern void veryfastsupg_int_realloc_nobk( int njob, int **mtx, int ***topol, double **len ); extern void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ); extern void veryfastsupg_double( int nseq, double **oeff, int ***topol, double **len ); extern void veryfastsupg_double_loadtree( int nseq, double **oeff, int ***topol, double **len, char **name ); //extern void veryfastsupg_double_loadtop( int nseq, double **oeff, int ***topol, double **len ); extern void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ); extern void fastsupg( int nseq, double **oeff, int ***topol, double **len ); extern void supg( int nseq, double **oeff, int ***topol, double **len ); extern void spg( int nseq, double **oeff, int ***topol, double **len ); extern double ipower( double x, int n ); extern void countnode( int nseq, int ***topol, double **node ); extern void countnode_int( int nseq, int ***topol, int **node ); extern void counteff_simple( int nseq, int ***topol, double **len, double *node ); extern void counteff_simple_float( int nseq, int ***topol, float **len, double *node ); extern void counteff_simple_float_nostatic( int nseq, int ***topol, float **len, double *node ); extern void counteff( int nseq, int ***topol, double **len, double **node ); extern float score_calc1( char *seq1, char *seq2 ); extern float score_calcp( char *seq1, char *seq2, int len ); extern float substitution_nid( char *seq1, char *seq2 ); extern float substitution_score( char *seq1, char *seq2 ); extern float substitution_hosei( char *seq1, char *seq2 ); extern float substitution( char *seq1, char *seq2 ); extern void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff ); extern float bscore_calc( char **seq, int s, double **eff ); extern void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax ); extern void FreeTmpSeqs( char **mseq2, char *mseq1 ); extern void gappick_samestring( char *aseq ); extern void gappick0( char *aseq, char *seq ); extern void gappick( int nseq, int s, char **aseq, char **mseq2, double **eff, double *effarr ); extern void commongappick_record( int nseq, char **seq, int *map ); extern void commongappick( int nseq, char **seq ); extern double score_calc0( char **seq, int s, double **eff, int ex ); extern void strins( char *str1, char *str2 ); extern int isaligned( int nseq, char **seq ); extern double score_calc_for_score( int nseq, char **seq ); extern void floatncpy( float *vec1, float *vec2, int len ); extern float score_calc_a( char **seq, int s, double **eff ); extern float score_calc_s( char **seq, int s, double **eff ); extern double score_calc_for_score_s( int s, char **seq ); extern double SSPscore( int s, char **seq ); extern double DSPscore( int s, char **seq ); extern int searchAnchors( int nseq, char **seq, Segment *seg ); extern char *progName( char *str ); extern void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ); extern void dontcalcimportance_lastone( int nseq, double *eff, char **seq, LocalHom **localhom ); extern void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom ); extern void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ); extern void weightimportance2( int nseq, double *eff, LocalHom **localhom ); extern void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom ); extern void extendlocalhom( int nseq, LocalHom **localhom ); extern void extendlocalhom2( int nseq, LocalHom **localhom, double **mtx ); extern int makelocal( char *s1, char *s2, int thr ); extern void mdfymtx( char **pair, int s1, double **partialmtx, double **mtx ); extern float score_calc( char **seq, int s ); extern void cpmx_calc( char **seq, float **cpmx, double *eff, int lgth, int clus ); extern void cpmx_calc_new( char **seq, float **cpmx, double *eff, int lgth, int clus ); extern void MScpmx_calc_new( char **seq, float **cpmx, double *eff, int lgth, int clus ); extern void mseqcat( char **seq1, char **seq2, double **eff, double *effarr1, double *effarr2, char name1[M][B], char name2[M][B], int clus1, int clus2 ); extern void strnbcat( char *s1, char *s2, int m ); extern int conjuctionforgaln( int s0, int s1, char **seq, char **aseq, double *peff, double *eff, char **name, char **aname, char *d ); extern int fastconjuction( int *memlist, char **seq, char **aseq, double *peff, double *eff, char name[M][B], char aname[M][B], char *d ); extern int fastconjuction_noname_kozo( int *memlist, char **seq, char **aseq, double *peff, double *eff, double *peff_kozo, double *eff_kozo, char *d ); extern int fastconjuction_noname( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d ); extern int fastconjuction_noweight( int *memlist, char **seq, char **aseq, double *peff, char *d ); extern int conjuctionfortbfast_old( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char *d ); extern int conjuction( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char **name, char **aname, char *d ); extern void floatdelete( float **cpmx, int d, int len ); extern void chardelete( char *seq, int d ); extern int RootBranchNode( int nseq, int ***topol, int step, int branch ); extern void BranchLeafNode( int nseq, int ***topol, int *node, int step, int branch ); extern void RootLeafNode( int nseq, int ***topol, int *node ); extern void nodeFromABranch( int nseq, int *result, int **pairwisenode, int ***topol, double **len, int step, int num ); //extern void OneClusterAndTheOther( int locnjob, char **pair, int *s1, int *s2, int ***topol, int step, int branch ); extern void OneClusterAndTheOther_fast( int locnjob, int *memlist1, int *memlist2, int *s1, int *s2, char *pairbuf, int ***topol, int step, int branch ); extern void makeEffMtx( int nseq, double **mtx, double *vec ); extern void node_eff( int nseq, double *eff, int *node ); extern int shrinklocalhom( char **pair, int s1, int s2, LocalHom **localhom, LocalHom ***localhomshrink ); extern int msshrinklocalhom_fast( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink ); extern int fastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink ); extern int fastshrinklocalhom_one( int *mem1, int *mem2, int norg, LocalHom **localhom, LocalHom ***localhomshrink ); extern int msfastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink ); extern int TreeDependentIteration( int locnjob, char **name, int nlen[M], char **aseq, char **bseq, int ***topol, double **len, int **skipthisbranch, int alloclen, LocalHom **localhomtable, RNApair ***single, int nkozo, char *kozoarivec ); extern void checkMinusLength( int nseq, double **len ); extern void negativeMember2( int *mem, int *query, int locnseq ); extern int *negativeMember( int *query, int locnseq ); extern int IntExistsInVec( int query, int *vector ); extern NodeInCub searchParent( int top, int ***topol, int Start, int End ); extern void stopolInit( int n, Node *stopol ); extern void treeCnv( Node *stopol, int locnseq, int ***topol, double **len, double **bw ); extern int isLeaf( Node node ); extern double syntheticLength( Node *ob, Node *oppositeNode ); extern double calcW( Node *ob, Node *op ); extern void calcBranchWeight( double **bw, int locnseq, Node *stopol, int ***topol, double **len ); extern void branchWeightToPairWeight( int locnseq, int ***topol, double **pw, double **bw ); extern void weightFromABranch_rec( double *result, Node *ob, Node *op ); extern void weightFromABranch( int nseq, double *result, Node *stopol, int ***topol, int step, int LorR ); extern void keika( char *str, int current, int all ); extern double maxItch( double *soukan, int size ); extern void calcNaiseki( Fukusosuu *value, Fukusosuu *x, Fukusosuu *y ); extern Fukusosuu *AllocateFukusosuuVec( int l1 ); extern Fukusosuu **AllocateFukusosuuMtx( int l1, int l2 ); extern Fukusosuu ***AllocateFukusosuuCub( int l1, int l2, int l3 ); extern void FreeFukusosuuVec( Fukusosuu *vec ); extern void FreeFukusosuuMtx( Fukusosuu **mtx ); extern int getKouho( int *kouho, int nkouho, double *soukan, int nlen2 ); extern void zurasu2( int lag, int clus1, int clus2, char **seq1, char **seq2, char **aseq1, char **aseq2 ); extern void zurasu( int lag, int clus1, int clus2, char **seq1, char **seq2, char **aseq1, char **aseq2 ); extern int alignableReagion( int clus1, int clus2, char **seq1, char **seq2, double *eff1, double *eff2, Segment *seg ); extern void blockAlign( int *cut1, int *cut2, double **ocrossscore, int *ncut ); extern void blockAlign2( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ); extern void blockAlign3( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ); extern float A__align11( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch ); extern float imp_match_out_scH( int i1, int j1 ); extern void imp_match_init_strictH( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore ); extern float imp_match_out_scQ( int i1, int j1 ); extern float imp_match_out_scR( int i1, int j1 ); extern void imp_match_init_strictQ( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore ); extern void imp_match_init_strictR( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore ); extern void imp_match_init( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom ); extern float MSalignmm( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, int *, int, int *, int headgp, int tailgp ); extern float Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, float **map ); extern float Lalign2m2m_hmout( char **seq1, char **seq2, char **seq1r, char **seq2r, char *dir1, char *dir2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, float **map ); extern float MSalign11( char **seq1, char **seq2, int alloclen ); //extern float rnalocal( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, RNApair **pair ); extern float A__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2, int *, int, int *, int headgp, int tailgp ); extern float H__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 ); extern float Q__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 ); extern float Q__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2, int *gapmap1, int *gapmap2 ); extern float R__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 ); extern float R__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 ); extern float A__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int *gapmap1, int *gapmap2 ); extern float D__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int *gapmap1, int *gapmap2 ); extern float translate_and_Calign( char **mseq1, char **mseq2, double *effarr1, double *effarr2, int clus1, int clus2, int alloclen ); extern double Fgetlag( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen ); extern float Falign( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, int *fftlog, int *, int, int * ); extern float Falign_udpari_long( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, int *fftlog ); float Falign_localhom( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, LocalHom ***localhom, float *totalimpmatch, int *gapmap1, int *gapmap2, int *chudanpt, int chudanref, int *chudanres ); extern float part_imp_match_out_sc( int i1, int j1 ); extern float part_imp_match_out_scQ( int i1, int j1 ); extern void part_imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, int forscore ); extern void part_imp_match_init_strictQ( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore ); extern void part_imp_match_init( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom ); extern float partA__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *, int *, int, int * ); extern float partQ__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *); extern float G__align11( char **seq1, char **seq2, int alloclen, int headgp, int tailgp ); extern float G__align11_noalign( int mtx[0x80][0x80], int penal, int penal_ex, char **seq1, char **seq2, int alloclen ); extern float L__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt ); extern float L__align11_noalign( char **seq1, char **seq2 ); extern float genL__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt ); extern float genG__align11( char **seq1, char **seq2, int alloclen ); extern float VAalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt ); extern float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt ) ; extern int fft(int n, Fukusosuu *x, int dum); extern void topolcpy( int s1[], int s2[], int *mpt1, int *mpt2 ); extern void topolcat( int s1[], int s2[], int *mpt1, int *mpt2 ); extern void topolsort( int m, int s[] ); extern void topolswap( int s1[], int s2[], int *mpt1, int *mpt2 ); extern void reduc( double **mtx, int nseq, int im, int jm ); extern void nj( int nseq, double **omtx, int ***topol, double **dis ); extern void JTTmtx( double **rsr, double *freq, char locamino[0x80], char locgrp[0x80], int isTM ); extern void BLOSUMmtx( int n, double **matrix, double *freq, char *amino, char *amino_grp ); extern void extendedmtx( double **matrix, double *freq, char *amino, char *amino_grp ); extern void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ); extern void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ); extern void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ); extern void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ); extern void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ); extern char *cutal( char *al, int al_display_start, int start, int end ); extern void ErrorExit( char *message ); extern void strncpy_caseC( char *str1, char *str2, int len ); extern void seqUpper( int nseq, char **seq ); extern void seqLower( int nseq, char **seq ); extern int getaline_fp_eof( char *s, int l, FILE *fp ); extern int getaline_fp_eof_new(char s[], int l, FILE *fp); extern int myfgets(char s[], int l, FILE *fp); extern float input_new( FILE *fp, int d ); extern void PreRead( FILE *fp, int *locnjob, int *locnlenmax ); extern int allSpace( char *str ); extern void Read( char name[M][B], int nlen[M], char **seq ); extern void FRead( FILE *fp, char name[][B], int nlen[], char **seq ); extern void kake2hiku( char *str ); extern void readDataforgaln( FILE *fp, char **name, int *nlen, char **seq ); extern void readData( FILE *fp, char name[][B], int nlen[], char **seq ); extern void readData_pointer_casepreserve( FILE *fp, char **name, int *nlen, char **seq ); extern void readData_pointer( FILE *fp, char **name, int *nlen, char **seq ); extern void readData_pointer2( FILE *fp, int nseq, char **name, int *nlen, char **seq ); extern void readData_varlen( FILE *fp, char **name, int *nlen, char **seq ); extern int countATGC( char *s, int *total ); extern void getnumlen( FILE *fp ); extern void getnumlen_casepreserve( FILE *fp, int *nlenmin ); extern void getnumlen_nogap( FILE *fp, int *nlenmin ); extern void WriteGapFill( FILE *fp, int locnjob, char name[][B], int nlen[M], char **aseq ); extern void writeDataforgaln( FILE *fp, int locnjob, char **name, int *nlen, char **aseq ); extern void writeData( FILE *fp, int locnjob, char name[][B], int nlen[], char **aseq ); extern void writeData_pointer( FILE *fp, int locnjob, char **name, int *nlen, char **aseq ); extern void readhat2_floathalf( FILE *fp, int nseq, char name[M][B], float **mtx ); extern void readhat2_floathalf_pointer( FILE *fp, int nseq, char **name, float **mtx ); extern void readhat2_floathalf_part_pointer( FILE *fp, int nseq, int nadd, char **name, float **mtx ); extern void readhat2_float( FILE *fp, int nseq, char name[M][B], float **mtx ); extern void readhat2_int( FILE *fp, int nseq, char name[M][B], int **mtx ); extern void readhat2_pointer( FILE *fp, int nseq, char **name, double **mtx ); extern void readhat2( FILE *fp, int nseq, char name[M][B], double **mtx ); extern void WriteFloatHat2_pointer_halfmtx( FILE *hat2p, int locnjob, char **name, float **mtx ); extern void WriteFloatHat2( FILE *hat2p, int locnjob, char name[M][B], float **mtx ); extern void WriteHat2_int( FILE *hat2p, int locnjob, char name[M][B], int **mtx ); extern void WriteHat2( FILE *hat2p, int locnjob, char name[M][B], double **mtx ); extern void WriteHat2_pointer( FILE *hat2p, int locnjob, char **name, double **mtx ); extern void WriteHat2_part_pointer( FILE *hat2p, int locnjob, int nadd, char **name, double **mtx ); extern int ReadFasta_sub( FILE *fp, double *dis, int nseq, char name[M][B] ); extern int ReadSsearch( FILE *fp, double *dis, int nseq, char name[M][B] ); extern int ReadBlastm7( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ); extern int ReadBlastm7_scoreonly( FILE *fp, double *dis, int nin ); extern int ReadBlastm7_avscore( FILE *fp, double *dis, int nin ); extern int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ); extern int ReadFasta34m10_nuc( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ); extern int ReadFasta34m10( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ); extern int ReadFasta34m10_scoreonly_nuc( FILE *fp, double *dis, int nin ); extern int ReadFasta34m10_scoreonly( FILE *fp, double *dis, int nin ); extern int ReadFasta34( FILE *fp, double *dis, int nseq, char name[M][B], LocalHom *localhomlist ); extern int ReadFasta3( FILE *fp, double *dis, int nseq, char name[M][B] ); extern int ReadFasta( FILE *fp, double *dis, int nseq, char name[M][B] ); extern int ReadOpt( FILE *fp, int opt[M], int nseq, char name[M][B] ); extern int ReadOpt2( FILE *fp, int opt[M], int nseq, char name[M][B] ); extern int writePre( int nseq, char **name, int nlen[M], char **aseq, int force ); extern void readOtherOptions( int *ppidptr, int *fftThresholdptr, int *fftWinSizeptr ); extern void initSignalSM( void ); extern void initFiles( void ); extern void WriteForFasta( FILE *fp, int locnjob, char **name, int nlen[M], char **aseq ); extern void readlocalhomtable( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ); extern void readlocalhomtable2( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ); extern void readlocalhomtable_part( FILE*fp, int njob, int nadd, LocalHom **localhomtable, char *kozoarivec ); extern void readlocalhomtable_two( FILE*fp, int njob, int nadd, LocalHom **localhomtable, LocalHom **localhomtablex, char *kozoarivec ); extern void readlocalhomtable_one( FILE*fp, int njob, int nadd, LocalHom **localhomtable, char *kozoarivec ); extern void outlocalhom( LocalHom **localhom, int nseq ); extern void outlocalhom_part( LocalHom **localhom, int norg, int nadd ); extern void outlocalhompt( LocalHom ***localhom, int n1, int n2 ); extern void FreeLocalHomTable( LocalHom **localhomtable, int n ) ; extern void FreeLocalHomTable_part( LocalHom **localhomtable, int n, int m ) ; extern void FreeLocalHomTable_two( LocalHom **localhomtable, int n, int m ) ; extern void FreeLocalHomTable_one( LocalHom **localhomtable, int n, int m ) ; extern void constants( int nseq, char **seq ); extern void clustalout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, char *mark, char *comment, int *order, int namelen ); extern void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order, int namelen ); extern void writeData_reorder( FILE *fp, int locnjob, char name[][B], int nlen[], char **aseq, int *order ); extern void writeData_reorder_pointer( FILE *fp, int locnjob, char **name, int *nlen, char **aseq, int *order ); extern void resetlocalhom( int, LocalHom ** ); extern int load1SeqWithoutName_new( FILE *fpp, char *cbuf ); extern char *load1SeqWithoutName_realloc( FILE *fpp ); extern char *load1SeqWithoutName_realloc_casepreserve( FILE *fpp ); extern int disttbfast( char **in, int nlen[M], char name[M][B] ); extern void searchKUorWA( FILE *fp ); extern void gapireru( char *res, char *ori, char *gt ); extern int seqlen( char *seq ); extern void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len ); extern void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len ); extern void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len ); extern void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len ); extern void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len ); extern void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *s, char *e ); extern void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *g ); extern void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *g ); extern void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *s, char *e ); extern void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *g ); extern void getgapfreq( float *freq, int clus, char **seq, double *eff, int len ); extern void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len ); //extern void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *s ); extern void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *s ); extern void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *s, char *e ); extern void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *s, char *e ); extern void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len ); extern void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len ); extern void st_getGapPattern( Gappat **gpat, int clus, char **seq, double *eff, int len ); extern void getkyokaigap( char *g, char **s, int pos, int n ); extern double *loadaamtx( void ); extern float naivepairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ); extern float naivepairscore11( char *seq1, char *seq2, int penal ); extern float naiveQpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ); extern float naiveRpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ); extern float naiveHpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ); extern void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, float **impmtx, int *gapmap1, int *gapmap2, RNApair *pair ); extern void foldrna_gappick( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, float **impmtx, int *gapmap1, int *gapmap2, RNApair *pair ); extern void imp_rna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair ); extern void imp_rnaQ( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair ); extern void part_imp_rnaQ( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair ); extern void part_imp_rna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair ); extern void imp_rnaQ_gappick( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair ); extern void foldalignedrna( int clus1, int clus2, char **mseq1, char **mseq2, double *effarr1, double *effarr2, RNApair *rnapairboth ); void readmccaskill( FILE *fp, RNApair **pairprob, int length ); void makegrouprna( RNApair ***group, RNApair ***all, int *memlist ); void makegrouprnait( RNApair ***group, RNApair ***all, char *pair, int s ); extern void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *, int progressout ); extern void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep * ); //extern void loadtop( int nseq, float **eff, int ***topol, float **len ); extern void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep * ); extern void fixed_supg_float_realloc_nobk_halfmtx_treeout_constrained( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *, int ncons, int **constraints ); extern void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name ); extern void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ncons, int **constraints ); extern void imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1kozo, double*eff2kozo, LocalHom ***localhom, int forscore ); extern void miyataout_reorder_pointer( FILE *fp, int locnjob, int nlenmax, char **name, int *nlen, char **aseq, int *order ); extern void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ); extern void cpmx_ribosum( char **seq, char **seqr, char *dir, float **cpmx, double *eff, int lgth, int clus ); extern void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob ); extern void readpairfoldalign( FILE *fp, char *seq1, char *seq2, char *aln1, char *aln2, int q1, int q2, int *of1, int *of2, int sumlen ); extern void write1seq( FILE *fp, char *aseq ); extern void assignstrweight( int nseq, double *strweight, Node *stopol, int ***topol, int step, int LorR, char *kozoari, double *seqweight ); extern void cutData( FILE *, int **, char **, int * ); extern void cutAlignment( FILE *, int **, char **, int *, char **, char ** ); extern void catData( FILE * ); extern void getnumlen_nogap_outallreg_web( FILE *fp, FILE *ofp, int *nlenminpt, int *isalignedpt ); extern void getnumlen_nogap_outallreg( FILE *fp, int *nlenminpt ); extern double plainscore( int nseq, char **seq ); extern void eq2dash( char *s ); extern void findnewgaps( int n, int rep, char **seq, int *gaplen ); extern void findcommongaps( int, char **, int * ); extern void adjustgapmap( int, int *, char * ); extern void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar ); extern void restorecommongaps( int n, char **seq, int *top0, int *top1, int *gaplen, int alloclen, char gapchar ); extern int samemember( int *mem, int *cand ); extern int includemember( int *mem, int *cand ); extern int overlapmember( int *mem1, int *mem2 ); extern void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ); extern void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ); extern void sreverse( char *r, char *s ); extern int addonetip( int njobc, int ***topolc, float **lenc, float **iscorec, int ***topol, float **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name ); extern void intcpy( int *s1, int *s2 ); extern void intcat( int *s1, int *s2 ); extern void readsubalignmentstable( int n, int **table, int *preservegaps, int *nsubpt, int *maxmempt ); extern int myatoi( char * ); extern void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth ); extern void gapcountf( float *freq, char **seq, int nseq, double *eff, int lgth ); extern void outgapcount( float *freq, int nseq, char *gappat, double *eff ); mafft-7.123-without-extensions/core/dndpre.c0000640000076500007650000001711212176060062020077 0ustar katohkatoh#include "mltaln.h" #define TEST 0 static int treeout = 0; static int maxdist = 1; static int nadd = 0; #ifdef enablemultithread typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _thread_arg { int njob; int thread_no; float *selfscore; double **mtx; char **seq; Jobtable *jobpospt; pthread_mutex_t *mutex; } thread_arg_t; void *athread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; float *selfscore = targ->selfscore; double **mtx = targ->mtx; char **seq = targ->seq; Jobtable *jobpospt = targ->jobpospt; int i, j; float ssi, ssj, bunbo; double mtxv; if( njob == 1 ) return( NULL ); while( 1 ) { pthread_mutex_lock( targ->mutex ); j = jobpospt->j; i = jobpospt->i; j++; // fprintf( stderr, "\n i=%d, j=%d before check\n", i, j ); if( j == njob ) { // fprintf( stderr, "\n j = %d, i = %d, njob = %d\n", j, i, njob ); fprintf( stderr, "%4d/%4d (thread %4d), dndpre\r", i+1, njob, thread_no ); i++; j = i + 1; if( i == njob-1 ) { // fprintf( stderr, "\n i=%d, njob-1=%d\n", i, njob-1 ); pthread_mutex_unlock( targ->mutex ); return( NULL ); } } // fprintf( stderr, "\n i=%d, j=%d after check\n", i, j ); jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex ); ssi = selfscore[i]; ssj = selfscore[j]; bunbo = MIN( ssi, ssj ); if( bunbo == 0.0 ) mtxv = maxdist; else mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo ); #if 1 if( mtxv > 9.0 || mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j] = mtxv; } } #endif void arguments( int argc, char *argv[] ) { int c; nadd = 0; nthread = 1; alg = 'X'; fmodel = 0; treeout = 0; scoremtx = 1; nblosum = 62; dorp = NOTSPECIFIED; inputfile = NULL; ppenalty = NOTSPECIFIED; //? ppenalty_ex = NOTSPECIFIED; //? poffset = NOTSPECIFIED; //? kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 't': treeout = '1'; break; case 'D': dorp = 'd'; break; case 'a': fmodel = 1; break; case 'P': dorp = 'p'; break; case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame. break; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'M': maxdist = myatoi( *++argv ); fprintf( stderr, "maxdist = %d\n", maxdist ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char **argv ) { int i, j, ilim; char **seq; static char **name; static int nlen[M]; float *selfscore; double **mtx; double mtxv; FILE *fp; FILE *infp; float ssi, ssj, bunbo; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); njob -= nadd; // atarashii hairetsu ha mushi seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); mtx = AllocateDoubleMtx( njob, njob ); selfscore = AllocateFloatVec( njob ); #if 0 FRead( stdin, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 for( i=0; i 0 ) { thread_arg_t *targ; Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 9.0 || mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j] = mtxv; } } } #endif #if TEST for( i=0; i -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { *matchpt = 0.0; for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif free( scarr ); } static float Atracking( float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int icyc, int jcyc, int ist, int ien, int jst, int jen, int fulllen1, int fulllen2, int tailgp ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, klim; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; float wm; lgth1 = ien-ist+1; lgth2 = jen-jst+1; gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } #if 0 else if( jen == fulllen2-1 ) { fprintf( stderr, "searching lastverticalw\n" ); wm = lastverticalw[0]; for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } } else if( ien == fulllen1-1 ) { fprintf( stderr, "searching lasthorizontalw\n" ); wm = lasthorizontalw[0]; for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } #endif for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i 0 ) headgapfreq1 = gapfreq1f[-1]; else headgapfreq1 = headgapfreq1_g; if( jst > 0 ) headgapfreq2 = gapfreq2f[-1]; else headgapfreq2 = headgapfreq2_g; #if STOREWM char ttt1[10000], ttt2[10000]; #endif lgth1 = ien-ist+1; lgth2 = jen-jst+1; #if STOREWM strncpy( ttt1, seq1[0]+ist, lgth1 ); ttt1[lgth1] = 0; strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt2[lgth2] = 0; fprintf( stderr, "in _tanni ist,ien = %d,%d, lgth1=%d\n", ist, ien, lgth1 ); fprintf( stderr, "in _tanni jst,jen = %d,%d, lgth2=%d\n", jst, jen, lgth2 ); fprintf( stderr, "ttt1 = %s\n", ttt1 ); fprintf( stderr, "ttt2 = %s\n", ttt2 ); #endif #if 0 fprintf( stderr, "in _tanni ist,ien = %d,%d, fulllen1=%d\n", ist, ien, fulllen1 ); fprintf( stderr, "in _tanni jst,jen = %d,%d, fulllen2=%d\n", jst, jen, fulllen2 ); fprintf( stderr, "in _tanni seq1[0] = %-*.*s\n", ien-ist+1, ien-ist+1, seq1[0]+ist ); fprintf( stderr, "in _tanni seq2[0] = %-*.*s\n", jen-jst+1, jen-jst+1, seq2[0]+jst ); #endif ll1 = ( (int)(lgth1) ) + 100; ll2 = ( (int)(lgth2) ) + 100; // aseq1 = AllocateCharMtx( icyc, 0 ); // aseq2 = AllocateCharMtx( jcyc, 0 ); // aseq1bk = AllocateCharMtx( icyc, lgth1+lgth2+100 ); // aseq2bk = AllocateCharMtx( jcyc, lgth1+lgth2+100 ); // for( i=0; i", wm ); #endif g = mi + fgcp2[j-1] * gapfreq1f[i]; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = -( j - mpi ); } g = *prept + ogcp2[j] * gapfreq1f[i-1]; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1] * gapfreq2f[j]; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i] * gapfreq2f[j-1]; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; ijppt++; mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, ist, ien, jst, jen, fulllen1, fulllen2, tailgp ); #if 0 fprintf( stderr, "res in _tanni mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "res in _tanni mseq2[0] = %s\n", mseq2[0] ); #endif // for( i=0; i 0 ) headgapfreq1 = gapfreq1f[-1]; else headgapfreq1 = headgapfreq1_g; if( jst > 0 ) headgapfreq2 = gapfreq2f[-1]; else headgapfreq2 = headgapfreq2_g; depth++; reccycle++; lgth1 = ien-ist+1; lgth2 = jen-jst+1; // if( lgth1 < 5 ) // fprintf( stderr, "\nWARNING: lgth1 = %d\n", lgth1 ); // if( lgth2 < 5 ) // fprintf( stderr, "\nWARNING: lgth2 = %d\n", lgth2 ); // #if STOREWM fprintf( stderr, "==== MSalign (depth=%d, reccycle=%d), ist=%d, ien=%d, jst=%d, jen=%d\n", depth, reccycle, ist, ien, jst, jen ); strncpy( ttt1, seq1[0]+ist, lgth1 ); strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt1[lgth1] = 0; ttt2[lgth2] = 0; fprintf( stderr, "seq1 = %s\n", ttt1 ); fprintf( stderr, "seq2 = %s\n", ttt2 ); #endif if( lgth2 <= 0 ) // lgth1 <= 0 ha? { // fprintf( stderr, "\n\n==== jimei\n\n" ); // exit( 1 ); for( i=0; i", wm ); #endif g = mi + fgcp2[j-1] * gapfreq1f[i]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j] * gapfreq1f[i-1]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1] * gapfreq2f[j]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i] * gapfreq2f[j-1]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } #else for( j=lgth2-1; j>-1; --j ) { m[j] = currentw[j+1] + fgcp1[lgth1-2] * gapfreq2f[j+1]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } #endif // for( j=0; j=imid; i-- ) firstm = -9999999.9; // firstmp = lgth1-1; firstmp = lgth1; for( i=lgth1-2; i>-1; i-- ) { #ifdef enablemultithread // fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref ); if( chudanpt && *chudanpt != chudanref ) { // fprintf( stderr, "\n\n## CHUUDAN!!! kouhan\n" ); *chudanres = 1; freearrays_rec1 ( w1, w2, initverticalw, lastverticalw, midw, midm, midn, jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj, m, mp, floatwork, intwork #if STOREWM , WMMTX, WMMTX2 #endif ); freearrays_rec2( gaps, aseq1, aseq2 ); return( -1.0 ); } #endif wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; // match_calc( currentw, seq1, seq2, i, lgth2 ); match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 ); currentw[lgth2-1] = initverticalw[i]; // m[lgth2] = fgcp1[i]; // WMMTX2[i][lgth2] += m[lgth2]; // fprintf( stderr, "m[] = %f\n", m[lgth2] ); mi = previousw[lgth2-1] + fgcp2[lgth2-2] * gapfreq1f[i+1]; // mi = previousw[lgth2-1]; mpi = lgth2 - 1; mjpt = m + lgth2 - 2; prept = previousw + lgth2 - 1; curpt = currentw + lgth2 - 2; mpjpt = mp + lgth2 - 2; for( j=lgth2-2; j>-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1] * gapfreq1f[i]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j] * gapfreq1f[i+1]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1] * gapfreq2f[j]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i] * gapfreq2f[j+1]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j+1] += *mjpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if STOREWM fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j 100 ) // naze 100 if( imid < firstmp-1 ) // naze 100 { jumpi = firstmp; imid = firstmp+1; } #if 0 else { jumpi = 0; imid = 1; } #endif #endif } #if 0 else if( jmid == lgth2 ) { fprintf( stderr, "CHUI1!\n" ); jumpi=0; jumpj=0; imid=jumpforwi[0]; jmid=lgth2-1; } #else // 060414 else if( jmid >= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { // fprintf( stderr, "#### CHUI3!\n" ); imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; if( imid == jumpi ) jumpi = imid-1; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif // fprintf( stderr, "#### FINAL i=%d, jumpi N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif freearrays_rec1 ( w1, w2, initverticalw, lastverticalw, midw, midm, midn, jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj, m, mp, floatwork, intwork #if STOREWM , WMMTX, WMMTX2 #endif ); // fprintf( stderr, "==== calling myself (first)\n" ); value = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist, ist+jumpi, jst, jst+jumpj, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada #if 0 fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] ); fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] ); #endif #if MEMSAVE #else for( i=0; i 0 ) { // for( i=0; i 0 ) { // for( i=0; i 1 || maxwm - value > 1 ) { fprintf( stderr, "WARNING value = %f, but maxwm = %f\n", value, maxwm ); for( i=0; i1-%d\n%s\n", i, mseq1[i] ); fprintf( stderr, "%s\n", aseq1[i] ); } for( i=0; i2-%d\n%s\n", i, mseq2[i] ); fprintf( stderr, "%s\n", aseq2[i] ); } // exit( 1 ); } else { fprintf( stderr, "value = %.0f, maxwm = %.0f -> ok\n", value, maxwm ); } #endif #if MEMSAVE #else for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp, headgapfreq1, headgapfreq2 ); #ifdef enablemultithread if( chudanres && *chudanres ) { // fprintf( stderr, "\n\n## CHUUDAN!!! relay\n" ); *chudanres = 1; freearrays( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1, cpmx2, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 ); return( -1.0 ); } #endif #if 0 fprintf( stderr, "\n" ); fprintf( stderr, " seq1[0] = %s\n", seq1[0] ); fprintf( stderr, " seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] ); fprintf( stderr, "\n" ); #endif // fprintf( stderr, "wm = %f\n", wm ); for( i=0; i output # See http://mafft.cbrc.jp/alignment/software/regionalrealignment.html # # v0.2, 2013/Sep/21, Fixed a windows-specific bug. # ##################################################################### def readfasta( fp, name, seq ) nseq = 0 tmpseq = "" while fp.gets if $_ =~ /^>/ then name.push( $_.sub(/>/,"").strip ) seq.push( tmpseq ) if nseq > 0 nseq += 1 tmpseq = "" else tmpseq += $_.strip end end seq.push( tmpseq ) return nseq end def resolve( tree ) while 1 # p tree tree.sub!( /\,([0-9]+):(\-?[0-9\.]+)\,([0-9]+):(\-?[0-9\.]+)/, ",XXX" ) hit1 = $1 hit2 = $2 hit3 = $3 hit4 = $4 # p hit1 # p hit2 # p hit3 # p hit4 # puts "introduce XXX" # p tree break unless tree.index(/XXX/) poshit = tree.index(/XXX/) # puts "poshit=" + poshit.to_s i = poshit height = 0 while i >= 0 break if height == 0 && tree[i..i] == '(' if tree[i..i] == ')' then height += 1 elsif tree[i..i] == '(' then height -= 1 end i -= 1 end poskakko = i # puts "poskakko = " + poskakko.to_s zenhan = tree[0..poskakko] zenhan = "" if poskakko == -1 # puts "zenhan = " + zenhan treelen = tree.length tree = zenhan + "(" + tree[poskakko+1..treelen] # puts "add (" # p tree tree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" ) # p tree end return tree end if ARGV.length != 2 then STDERR.puts "" STDERR.puts "Usage: ruby #{$0} setingfile inputfile > output" STDERR.puts "" exit 1 end infilename = ARGV[1] tname = [] tseq = [] infp = File.open( infilename, "r" ) tin = readfasta( infp, tname, tseq ) infp.close if tin == 0 then STDERR.puts "" STDERR.puts "Error in the '#{infilename}' file. Is this FASTA format?\n" STDERR.puts "" exit 1 end alnlen = tseq[0].length if alnlen == 0 then STDERR.puts "" STDERR.puts "Error in the '#{infilename}' file. Is this FASTA format?\n" STDERR.puts "" exit 1 end for i in 0..(tin-1) if alnlen != tseq[i].length then STDERR.puts "" STDERR.puts "Please insert gaps such that all the input sequences have the same length.\n" STDERR.puts "" exit 1 end end checkmap = [] for i in 0..(alnlen-1) checkmap.push(0) end outputseq = [] for i in 0..(tin-1) outputseq.push("") end settingfile = ARGV[0].to_s reg = [] startpos = [] endpos = [] realign = [] options = [] treeoption = "" revwarn = 0 sfp = File.open( settingfile, "r" ) while line = sfp.gets line.sub!(/#.*/,"") next if line.length < 2 if line.strip =~ /^treeoption / then treeoption = line.strip.sub(/.*treeoption/,"") break end end sfp.close sfp = File.open( settingfile, "r" ) while line = sfp.gets line.sub!(/#.*/,"") next if line.length < 2 next if line.strip =~ /^treeoption/ startposv = line.split(' ')[0].to_i - 1 endposv = line.split(' ')[1].to_i - 1 if startposv < 0 || endposv < 0 then STDERR.puts "\nError in the '#{settingfile}' file. Please check this line:\n" STDERR.puts line STDERR.puts "Sites must be numbered as 1, 2, ...\n" STDERR.puts "\n" exit 1 end if startposv >= alnlen || endposv >= alnlen then STDERR.puts "\nError in the '#{settingfile}' file. Please check this line:\n" STDERR.puts line STDERR.puts "Sites must be numbered as 1, 2, ... #{alnlen}\n" STDERR.puts "\n" exit 1 end if startposv > endposv then STDERR.puts "\nWarning. Please check this line:\n" STDERR.puts line STDERR.puts "Start position > End position ?\n" STDERR.puts "\n" revwarn = 1 # exit 1 end startpos.push( startposv ) endpos.push( endposv ) if startposv > endposv for k in (endposv)..(startposv) checkmap[k] += 1 end else for k in (startposv)..(endposv) checkmap[k] += 1 end end if line.split(' ')[2] == "realign" then realign.push( 1 ) elsif line.split(' ')[2] == "preserve" then realign.push( 0 ) else STDERR.puts "\n" STDERR.puts "The third column must be 'realign' or 'preserve'\n" STDERR.puts "Please check this line:\n" STDERR.puts line STDERR.puts "\n" exit 1 end if line =~ / \-\-/ && line =~ /realign/ then options.push( line.sub(/.*realign/,"").strip ) else options.push( treeoption ) end end sfp.close #p startpos #p endpos #p options #res = system "#{$MAFFTCOMMAND} #{treeoption} --treeout --retree 0 --thread -1 #{infilename} > _dum" res = system "#{$MAFFTCOMMAND} #{treeoption} --treeout --retree 0 #{infilename} > _dum" if res == false then STDERR.puts "\n" STDERR.puts "ERROR in building a guide tree" STDERR.puts "\n" exit 1 end treefp = File.open( "#{infilename}.tree", "r" ) tree = "" while line = treefp.gets tree += line.strip break if tree =~ /;$/ end treefp.close tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "") scale = 1.0 mtreefp = File.open("_tree", "w") #STDERR.puts "Tree = " + tree memi = [-1,-1] leni = [-1,-1] while tree.index( /\(/ ) tree = resolve( tree ) tree.sub!( /\(([0-9]+):(\-?[0-9\.]+),([0-9]+):(\-?[0-9\.]+)\)/, "XXX" ) memi[0] = $1.to_i leni[0] = $2.to_f * scale memi[1] = $3.to_i leni[1] = $4.to_f * scale if leni[0] > 10 || leni[1] > 10 then STDERR.puts "" STDERR.puts "Please check the scale of branch length!" STDERR.puts "The unit of branch lengths must be 'substitution/site'" STDERR.puts "If the unit is 'substition' in your tree, please" STDERR.puts "use the scale argument," STDERR.puts "% newick2mafft scale in > out" STDERR.puts "where scale = 1/(alignment length)" STDERR.puts "" exit 1 end # STDERR.puts "subtree = " + $& if memi[1] < memi[0] then memi.reverse! leni.reverse! end tree.sub!( /XXX/, memi[0].to_s ) # STDERR.puts "Tree = " + tree mtreefp.printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] ) end mtreefp.close numreg = startpos.length for i in 0..(numreg-1) partfp = File.open( "_part", "w" ) for j in 0..(tin-1) partfp.puts ">" + tname[j] if startpos[i] > endpos[i] then partfp.puts tseq[j][endpos[i]..startpos[i]].reverse else partfp.puts tseq[j][startpos[i]..endpos[i]] end end partfp.close if( realign[i] == 1 ) then STDERR.puts "Aligning region #{startpos[i]+1} - #{endpos[i]+1}" res = system "#{$MAFFTCOMMAND} #{options[i]} --inputorder --treein _tree _part > _partout" if res == false then STDERR.puts "\n" STDERR.puts "ERROR in aligning region #{startpos[i]+1} - #{endpos[i]+1}" STDERR.puts "Please check the option:" STDERR.puts "#{options[i]}" STDERR.puts "\n" exit 1 end else STDERR.puts "Copying region #{startpos[i]+1} - #{endpos[i]+1}" # system "cp _part _partout" File.rename( "_part", "_partout" ) end pname = [] pseq = [] partfp = File.open( "_partout", "r" ) pin = readfasta( partfp, pname, pseq ) partfp.close for j in 0..(tin-1) outputseq[j] += pseq[j] end end for j in 0..(tin-1) puts ">" + tname[j] puts outputseq[j] end STDERR.puts "Done." numdupsites = checkmap.select{|x| x>1}.length if numdupsites > 0 then STDERR.puts "" STDERR.puts "#########################################################" STDERR.puts "# Warning: #{numdupsites} sites were duplicatedly selected." STDERR.puts "#########################################################" STDERR.puts "" end numunselectedsites = checkmap.select{|x| x==0}.length if numunselectedsites > 0 then STDERR.puts "" STDERR.puts "#########################################################" STDERR.puts "# Warning: #{numunselectedsites} sites were not selected." STDERR.puts "#########################################################" STDERR.puts "" end if revwarn == 1 then STDERR.puts "" STDERR.puts "#########################################################" STDERR.puts "# Warning: The order of sites were reversed." STDERR.puts "#########################################################" STDERR.puts "" end STDERR.puts "" STDERR.puts " Tree: computed with #{treeoption} --treeout " for i in 0..(numreg-1) range = sprintf( "%6d - %6d", startpos[i]+1, endpos[i]+1 ) if realign[i] == 1 then STDERR.puts "#{range}: realigned with #{options[i]} --treein (tree)" else STDERR.puts "#{range}: preserved" end end STDERR.puts "" File.delete( "_dum" ) File.delete( "_tree" ) File.delete( "_part" ) File.delete( "_partout" ) mafft-7.123-without-extensions/core/MSalign11.c0000640000076500007650000003203712224724072020324 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 static void extendmseq( char **mseq1, char **mseq2, char **seq1, char **seq2, int i, int j, int prevhiti, int prevhitj ) { // char gap[] = "-"; char *gap; gap = newgapstr; int l; fprintf( stderr, "i=%d, prevhiti=%d\n", i, prevhiti ); fprintf( stderr, "j=%d, prevhitj=%d\n", j, prevhitj ); l = prevhiti - i - 1; fprintf( stderr, "l=%d\n", l ); while( l>0 ) { *--mseq1[0] = seq1[0][i+l--]; *--mseq2[0] = *gap; } l= prevhitj - j - 1; fprintf( stderr, "l=%d\n", l ); while( l>0 ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][j+l--]; } if( i < 0 || j < 0 ) return; *--mseq1[0] = seq1[0][i]; *--mseq2[0] = seq2[0][j]; fprintf( stderr, "added %c to mseq1, mseq1 = %s \n", seq1[0][i], mseq1[0] ); fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] ); } static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) { char tmpc = s1[0][i1]; char *seq2 = s2[0]; while( lgth2-- ) *match++ = amino_dis[(int)tmpc][(int)*seq2++]; } static float Atracking( float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk; // char gap[] = "-"; char *gap; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--mseq1[0] = seq1[0][ifi+l]; *--mseq2[0] = *gap; k++; } l= jin - jfi; while( --l ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][jfi+l]; k++; } if( iin <= 0 || jin <= 0 ) break; *--mseq1[0] = seq1[0][ifi]; *--mseq2[0] = seq2[0][jfi]; k++; iin = ifi; jin = jfi; } return( 0.0 ); } void backdp( float **WMMTX, float wmmax, float *maxinw, float *maxinh, int lgth1, int lgth2, int alloclen, float *w1, float *w2, float *initverticalw, float *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 ) { register int i, j; int prevhiti, prevhitj; // int lasti, lastj; float g; float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif float *currentw, *previousw, *wtmp; float mi; int mpi; int *mpjpt; float *mjpt, *prept, *curpt; float wm = 0.0; float forwwm; currentw = w1; previousw = w2; match_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 ); match_calc( currentw, seq1, seq2, lgth1-1, lgth2 ); prevhiti = iin; prevhitj = jin; fprintf( stderr, "prevhiti = %d, lgth1 = %d\n", prevhiti, lgth1 ); fprintf( stderr, "prevhitj = %d, lgth2 = %d\n", prevhitj, lgth2 ); extendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 ); for( i=0; i0; --j ) { m[j-1] = currentw[j]; mp[j] = 0; // iranai } for( j=0; j-1; i-- ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; match_calc( currentw, seq1, seq2, i, lgth2 ); #if 0 fprintf( stderr, "i=%d, currentw = \n", i ); for( j=0; j-1; j-- ) { // fprintf( stderr, "i,j=%d,%d %c-%c ", i, j, seq1[0][i], seq2[0][j] ); wm = *prept; g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; } g = *prept; if( g >= mi ) { mi = g; mpi = j+1; //iranai } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; } g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; //iranai } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "*curpt = %5.0f \n", *curpt ); #endif // forwwm = wm + MIN( maxinw[i], maxinh[j] ); forwwm = wm + MIN( maxinw[i], maxinh[j] ); WMMTX[i][j] = forwwm; if( forwwm == wmmax && i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( maxinw ); FreeFloatVec( maxinh ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); maxinw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); maxinh = AllocateFloatVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } mseq1[0] = mseq[0]; mseq2[0] = mseq[1]; if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeFloatMtx( WMMTX ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); WMMTX = AllocateFloatMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 for( i=0; i", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = -( j - mpi ); } g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; WMMTX[i][j] = *curpt; if( j wmmax ) { wmmax = g; iin = i; jin = lgth2-1; } } for( j=0; j wmmax ) { wmmax = g; iin = lgth1-1; jin = j; } } for( i=0; iMSres\n%s\n", mseq1[0] ); fprintf( stderr, ">MSres\n%s\n", mseq2[0] ); #endif #if 0 for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 1 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif return( wm ); } mafft-7.123-without-extensions/core/contrafoldwrap.c0000640000076500007650000001715011522410305021643 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 static char *whereiscontrafold; void unknown_n( char *out, char *in ) { while( *in ) { if( *in == 'a' || *in == 'A' ) *out = 'A'; else if( *in == 't' || *in == 'T' || *in == 'u' || *in == 'U' ) *out = 'U'; else if( *in == 'g' || *in == 'G' ) *out = 'G'; else if( *in == 'c' || *in == 'C' ) *out = 'C'; else if( *in == '-' ) *out = '-'; else *out = 'N'; out++; in++; } *out = 0; } void outcontrafold( FILE *fp, RNApair **pairprob, int length ) { int i; RNApair *pt; for( i=0; ibestpos!=-1; pt++ ) { if( pt->bestpos > i ) fprintf( fp, "%d %d %f\n", i, pt->bestpos, pt->bestscore ); } } #if 1 static void readcontrafold( FILE *fp, RNApair **pairprob, int length ) { char gett[10000]; int *pairnum; char *pt; int i; int left, right; float prob; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'd': whereiscontrafold = *++argv; fprintf( stderr, "whereiscontrafold = %s\n", whereiscontrafold ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static char com[10000]; static int *nlen; int left, right; int res; static char **name, **seq, **nogap; static int **gapmap; static int *order; int i, j; FILE *infp; RNApair ***pairprob; RNApair **alnpairprob; RNApair *pairprobpt; RNApair *pt; int *alnpairnum; float prob; int adpos; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( !whereiscontrafold ) whereiscontrafold = ""; getnumlen( infp ); rewind( infp ); if( dorp != 'd' ) { fprintf( stderr, "nuc only\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*2+1 ); nogap = AllocateCharMtx( njob, nlenmax*2+1 ); gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); alnpairnum = AllocateIntVec( nlenmax ); for( i=0; iin\n%s\n", nogap[i] ); fclose( infp ); #if 0 // contrafold v1 sprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 > _contrafoldout", whereiscontrafold ); #else // contrafold v2 sprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 _contrafoldout", whereiscontrafold ); #endif res = system( com ); if( res ) { fprintf( stderr, "error in contrafold\n" ); fprintf( stderr, "=================================================================\n" ); fprintf( stderr, "=================================================================\n" ); fprintf( stderr, "==\n" ); fprintf( stderr, "== This version of MAFFT supports CONTRAfold v2.02.\n" ); fprintf( stderr, "== If you have a lower version of CONTRAfold installed in the\n" ); fprintf( stderr, "== %s directory,\n", whereiscontrafold ); fprintf( stderr, "== please update it!\n" ); fprintf( stderr, "==\n" ); fprintf( stderr, "=================================================================\n" ); fprintf( stderr, "=================================================================\n" ); exit( 1 ); } infp = fopen( "_contrafoldout", "r" ); readcontrafold( infp, pairprob[i], nlenmax ); fclose( infp ); fprintf( stdout, ">%d\n", i ); outcontrafold( stdout, pairprob[i], nlenmax ); } for( i=0; ibestpos!=-1; pairprobpt++ ) { left = gapmap[i][j]; right = gapmap[i][pairprobpt->bestpos]; prob = pairprobpt->bestscore; for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) if( pt->bestpos == right ) break; if( pt->bestpos == -1 ) { alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); adpos = alnpairnum[left]; alnpairnum[left]++; alnpairprob[left][adpos].bestscore = 0.0; alnpairprob[left][adpos].bestpos = right; alnpairprob[left][adpos+1].bestscore = -1.0; alnpairprob[left][adpos+1].bestpos = -1; pt = alnpairprob[left]+adpos; } else adpos = pt-alnpairprob[left]; pt->bestscore += prob; if( pt->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); } } return( 0 ); #if 0 fprintf( stdout, "result=\n" ); for( i=0; ibestpos!=-1; pairprobpt++ ) { pairprobpt->bestscore /= (float)njob; left = i; right = pairprobpt->bestpos; prob = pairprobpt->bestscore; fprintf( stdout, "%d-%d, %f\n", left, right, prob ); } return( 0 ); #endif } mafft-7.123-without-extensions/core/dndblast.c0000640000076500007650000002136612042126725020425 0ustar katohkatoh#include "mltaln.h" #include #include #define DEBUG 0 #define TEST 0 int howmanyx( char *s ) { int val = 0; if( scoremtx == -1 ) { do { if( !strchr( "atgcuATGCU", *s ) ) val++; } while( *++s ); } else { do { if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; } while( *++s ); } return( val ); } void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; disopt = 0; divpairscore = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': disopt = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { int ktuple; int i, j; FILE *infp; FILE *hat2p; FILE *hat3p; char **seq = NULL; // by D.Mathog char **seq1; static char **name; static char **name1; static int nlen1[M]; double **mtx; double **mtx2; static int nlen[M]; char b[B]; double max; char com[1000]; int opt[M]; int res; char *home; char queryfile[B]; char datafile[B]; char fastafile[B]; char hat2file[B]; int pid = (int)getpid(); LocalHom **localhomtable, *tmpptr; #if 1 home = getenv( "HOME" ); #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ home = NULL; #endif #if DEBUG if( home ) fprintf( stderr, "home = %s\n", home ); #endif if( !home ) home = ""; sprintf( queryfile, "%s/tmp/query-%d", home, pid ); sprintf( datafile, "%s/tmp/data-%d", home, pid ); sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); sprintf( hat2file, "hat2-%d", pid ); arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( infp, &njob, &nlenmax ); #else dorp = NOTSPECIFIED; getnumlen( infp ); #endif if( dorp == 'd' ) { scoremtx = -1; pamN = NOTSPECIFIED; } else { nblosum = 62; scoremtx = 1; } constants( njob, seq ); rewind( infp ); name = AllocateCharMtx( njob, B+1 ); name1 = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); seq1 = AllocateCharMtx( 2, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i %s", queryfile, datafile, fastafile ); else sprintf( com, "blastall -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7( hat2p, mtx[i], i, name1, localhomtable[i] ); fclose( hat2p ); #if 0 for( j=0; jnext ) { if( tmpptr->opt == -1.0 ) continue; // fprintf( stderr, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } } #endif if( res < njob-i+i%10 ) { fprintf( stderr, "WARNING: count (blast) = %d < %d\n", res, njob-i+i%10 ); } #if 0 { int ii, jj; if( i < njob-1 ) for( jj=i; jj j ) continue; if( mtx[j][i] > mtx[i][j] ) continue; for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } fclose( hat3p ); #endif for( i=0; i %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); else sprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] ); fclose( hat2p ); if( res < njob - i ) { fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); exit( 1 ); } if( i == 0 ) for( j=0; j %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); } } } for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; fprintf( stderr, "weighti = %f\n", fastathreshold ); --argc; goto nextoption; case 'i': corethr = atof( *++argv ); fprintf( stderr, "corethr = %f\n", corethr ); --argc; goto nextoption; case 'm': fmodel = 1; break; case 'c': coreext = 1; break; case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 's': treemethod = 's'; break; case 'x': treemethod = 'x'; break; case 'p': treemethod = 'p'; break; case 'a': alg = 'a'; break; case 'A': alg = 'A'; break; case 'S': alg = 'S'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **oseq; static double **pscore; static double *eff; static double **node0, **node1; static double *gapc; static double *avgap; double tmpavgap; int i, j, m, goffset; static int ***topol; static double **len; FILE *prep; char c; int corestart, coreend; int alloclen; int winsize; char *pt, *ot; double gapmin; arguments( argc, argv ); getnumlen( stdin ); rewind( stdin ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*9+1 ); name = AllocateCharMtx( njob, B+1 ); oseq = AllocateCharMtx( njob, nlenmax*9+1 ); alloclen = nlenmax*9; topol = AllocateIntCub( njob, 2, njob ); len = AllocateDoubleMtx( njob, 2 ); pscore = AllocateDoubleMtx( njob, njob ); eff = AllocateDoubleVec( njob ); node0 = AllocateDoubleMtx( njob, njob ); node1 = AllocateDoubleMtx( njob, njob ); gapc = AllocateDoubleVec( alloclen ); avgap = AllocateDoubleVec( alloclen ); #if 0 Read( name, nlen, seq ); #else readData_pointer( stdin, name, nlen, seq ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } writePre( njob, name, nlen, seq, 0 ); if( tbutree == 0 ) { for( i=1; i seq[i] ) if( *pt != '-' ) { *ot-- = *pt; m--; } ot = oseq[i]+winsize+coreend-corestart+1; pt = seq[i]+coreend; if( coreext ) m = winsize; else m = 0; while( m && *(++pt) ) { if( *pt != '-' ) { *ot++ = *pt; m--; } } fprintf( stdout, ">%s\n", name[i] ); fprintf( stdout, "%s\n", oseq[i] ); } exit( 1 ); SHOWVERSION; return( 0 ); } mafft-7.123-without-extensions/core/mtxutl.c0000640000076500007650000002201212216775162020164 0ustar katohkatoh#include #include #include #include #include "mtxutl.h" void MtxuntDouble( double **mtx, int n ) { int i, j; for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } mseq1[0] += lgth1+lgth2; *mseq1[0] = 0; mseq2[0] += lgth1+lgth2; *mseq2[0] = 0; iin = lgth1; jin = lgth2; limk = lgth1+lgth2 + 1; for( k=0; k 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--mseq1[0] = seq1[0][ifi+l]; *--mseq2[0] = *gap; k++; } l= jin - jfi; while( --l ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][jfi+l]; k++; } if( iin <= 0 || jin <= 0 ) break; *--mseq1[0] = seq1[0][ifi]; *--mseq2[0] = seq2[0][jfi]; k++; iin = ifi; jin = jfi; } return( 0.0 ); } float G__align11( char **seq1, char **seq2, int alloclen, int headgp, int tailgp ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; register int i, j; int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm; /* int ?????? */ float g; float *currentw, *previousw; float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi = 0.0; static TLS float *m = NULL; static TLS int **ijp = NULL; static TLS int mpi = 0; static TLS int *mp = NULL; static TLS float *w1 = NULL; static TLS float *w2 = NULL; static TLS float *match = NULL; static TLS float *initverticalw = NULL; /* kufuu sureba iranai */ static TLS float *lastverticalw = NULL; /* kufuu sureba iranai */ static TLS char **mseq1 = NULL; static TLS char **mseq2 = NULL; static TLS char **mseq = NULL; static TLS int **intwork = NULL; static TLS float **floatwork = NULL; static TLS int orlgth1 = 0, orlgth2 = 0; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; if( mseq1 ) free( mseq1 ); mseq1 = NULL; if( mseq2 ) free( mseq2 ); mseq2 = NULL; if( w1 ) FreeFloatVec( w1 ); w1 = NULL; if( w2 ) FreeFloatVec( w2 ); w2 = NULL; if( match ) FreeFloatVec( match ); match = NULL; if( initverticalw ) FreeFloatVec( initverticalw ); initverticalw = NULL; if( lastverticalw ) FreeFloatVec( lastverticalw ); lastverticalw = NULL; if( m ) FreeFloatVec( m ); m = NULL; if( mp ) FreeIntVec( mp ); mp = NULL; if( mseq ) FreeCharMtx( mseq ); mseq = NULL; if( floatwork ) FreeFloatMtx( floatwork ); floatwork = NULL; if( intwork ) FreeIntMtx( intwork ); intwork = NULL; } return( 0.0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 <= 0 || lgth2 <= 0 ) { fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif #if 1 if( lgth1 == 0 && lgth2 == 0 ) return( 0.0 ); if( lgth1 == 0 ) { seq1[0][lgth2] = 0; while( lgth2 ) seq1[0][--lgth2] = *newgapstr; // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); return( 0.0 ); } if( lgth2 == 0 ) { seq2[0][lgth1] = 0; while( lgth1 ) seq2[0][--lgth1] = *newgapstr; // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); return( 0.0 ); } #endif wm = 0.0; if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } mseq1[0] = mseq[0]; mseq2[0] = mseq[1]; if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; *ijppt = -( j - mpi ); } if( (g=*prept) >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt + fpenalty) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } if( (g=*prept) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error } Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, tailgp ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "wm = %f\n", wm ); #endif return( wm ); } float G__align11_noalign( int scoremtx[0x80][0x80], int penal, int penal_ex, char **seq1, char **seq2, int alloclen ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; register int i, j; int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; // int resultlen; float wm; /* int ?????? */ float g; float *currentw, *previousw; float fpenalty = (float)penal; #if USE_PENALTY_EX float fpenalty_ex = (float)penal_ex; #endif #if 1 float *wtmp; float *mjpt, *prept, *curpt; // int *mpjpt; #endif static TLS float mi, *m; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); free( m ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } return( 0.0 ); } wm = 0.0; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 <= 0 || lgth2 <= 0 ) { fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; } if( (g=*prept) >= mi ) { mi = g; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt + fpenalty) > wm ) { wm = g; } if( (g=*prept) >= *mjpt ) { *mjpt = g; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; mjpt++; prept++; } lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error } #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "wm = %f\n", wm ); #endif return( wm ); } mafft-7.123-without-extensions/core/Lalignmm.c0000640000076500007650000015343312225721242020371 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define MEMSAVE 1 #define DEBUG 0 #define USE_PENALTY_EX 0 #define STOREWM 1 #define DPTANNI 10 #define LOCAL 0 static int reccycle = 0; static float localthr; static void match_ribosum( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize ) { int j, k, l; float scarr[38]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; int count = 0; float *matchpt; float **cpmxpdpt; int **cpmxpdnpt; int cpkd; if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { *matchpt = 0.0; for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif } static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize ) { int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; int count = 0; float *matchpt; float **cpmxpdpt; int **cpmxpdnpt; int cpkd; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { *matchpt = 0.0; for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif free( scarr ); } #if 0 static void match_add( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize ) { int j, k, l; float scarr[nalphabets]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; int count = 0; float *matchpt; float **cpmxpdpt; int **cpmxpdnpt; int cpkd; if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { // *matchpt = 0.0; // add dakara for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif } #endif #if 0 static float Atracking( char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int icyc, int jcyc, int ist, int ien, int jst, int jen ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, klim; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = ien-ist+1; lgth2 = jen-jst+1; gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i", wm ); #endif g = mi + fgcp2[j-1]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } // for( j=0; j=imid; i-- ) firstm = -9999999.9; firstmp = lgth1-1; for( i=lgth1-2; i>-1; i-- ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; // match_calc( currentw, seq1, seq2, i, lgth2 ); match_ribosum( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 ); currentw[lgth2-1] = initverticalw[i]; // m[lgth2] = fgcp1[i]; // WMMTX2[i][lgth2] += m[lgth2]; // fprintf( stderr, "m[] = %f\n", m[lgth2] ); mi = previousw[lgth2-1] + fgcp2[lgth2-2]; // mi = previousw[lgth2-1]; mpi = lgth2 - 1; mjpt = m + lgth2 - 2; prept = previousw + lgth2 - 1; curpt = currentw + lgth2 - 2; mpjpt = mp + lgth2 - 2; for( j=lgth2-2; j>-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j] += *curpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM // WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if 0 fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif #if STOREWM // break; #else break; #endif } } #if 0 jumpi=0; jumpj=0; imid=lgth1-1; jmid=lgth2-1; } #endif // fprintf( stderr, "imid = %d, but jumpi = %d\n", imid, jumpi ); // fprintf( stderr, "jmid = %d, but jumpj = %d\n", jmid, jumpj ); // for( j=0; j amino_dis['a']['g'] -1 ) fprintf( stdout, "%d %d %8.1f", i, j, WMMTX[i][j] ); if( WMMTX[i][j] == maxwm ) fprintf( stdout, "selected \n" ); else fprintf( stdout, "\n" ); } fprintf( stdout, "\n" ); } #endif #if 0 fprintf( stderr, "jumpbacki = \n" ); for( j=0; j N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( midw ); FreeFloatVec( midm ); FreeFloatVec( midn ); FreeIntVec( jumpbacki ); FreeIntVec( jumpbackj ); FreeIntVec( jumpforwi ); FreeIntVec( jumpforwj ); FreeIntVec( jumpdummi ); FreeIntVec( jumpdummj ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); #if STOREWM FreeFloatMtx( WMMTX ); FreeFloatMtx( WMMTX2 ); #endif return( value ); } static float MSalignmm_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, float **cpmx1, float **cpmx2, int ist, int ien, int jst, int jen, int alloclen, char **mseq1, char **mseq2, int depth, float **gapinfo, float **map ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { float value = 0.0; register int i, j; char **aseq1, **aseq2; int ll1, ll2; int lasti, lastj, imid, jmid=0; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; #if USE_PENALTY_EX float fpenalty_ex = (float)RNApenalty_ex; #endif // float fpenalty = (float)penalty; float *wtmp; // short *ijppt; int *mpjpt; // short **ijp; int *mp; int mpi; float *mjpt, *prept, *curpt; float mi; float *m; float *w1, *w2; // float *match; float *initverticalw; /* kufuu sureba iranai */ float *lastverticalw; /* kufuu sureba iranai */ int **intwork; float **floatwork; // short **shortmtx; #if STOREWM float **WMMTX; float **WMMTX2; #endif float *midw; float *midm; float *midn; int lgth1, lgth2; float maxwm = 0.0; int *jumpforwi; int *jumpforwj; int *jumpbacki; int *jumpbackj; int *jumpdummi; //muda int *jumpdummj; //muda int jumpi, jumpj = 0; char *gaps; int ijpi, ijpj; float *ogcp1; float *fgcp1; float *ogcp2; float *fgcp2; float firstm; int firstmp; #if 0 static char ttt1[50000]; static char ttt2[50000]; #endif localthr = -offset + 500; // 0? ogcp1 = gapinfo[0] + ist; fgcp1 = gapinfo[1] + ist; ogcp2 = gapinfo[2] + jst; fgcp2 = gapinfo[3] + jst; depth++; reccycle++; lgth1 = ien-ist+1; lgth2 = jen-jst+1; // if( lgth1 < 5 ) // fprintf( stderr, "\nWARNING: lgth1 = %d\n", lgth1 ); // if( lgth2 < 5 ) // fprintf( stderr, "\nWARNING: lgth2 = %d\n", lgth2 ); // #if 0 fprintf( stderr, "==== MSalign (depth=%d, reccycle=%d), ist=%d, ien=%d, jst=%d, jen=%d\n", depth, reccycle, ist, ien, jst, jen ); strncpy( ttt1, seq1[0]+ist, lgth1 ); strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt1[lgth1] = 0; ttt2[lgth2] = 0; fprintf( stderr, "seq1 = %s\n", ttt1 ); fprintf( stderr, "seq2 = %s\n", ttt2 ); #endif if( lgth2 <= 0 ) // lgth1 <= 0 ha? { // fprintf( stderr, "\n\n==== jimei\n\n" ); // exit( 1 ); for( i=0; i", wm ); #endif g = mi + fgcp2[j-1]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } // for( j=0; j=imid; i-- ) firstm = -9999999.9; firstmp = lgth1-1; for( i=lgth1-2; i>-1; i-- ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; // match_calc( currentw, seq1, seq2, i, lgth2 ); match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 ); currentw[lgth2-1] = initverticalw[i]; // m[lgth2] = fgcp1[i]; // WMMTX2[i][lgth2] += m[lgth2]; // fprintf( stderr, "m[] = %f\n", m[lgth2] ); mi = previousw[lgth2-1] + fgcp2[lgth2-2]; // mi = previousw[lgth2-1]; mpi = lgth2 - 1; mjpt = m + lgth2 - 2; prept = previousw + lgth2 - 1; curpt = currentw + lgth2 - 2; mpjpt = mp + lgth2 - 2; for( j=lgth2-2; j>-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j] += *curpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM // WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if 0 fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif #if STOREWM // break; #else break; #endif } } #if 0 jumpi=0; jumpj=0; imid=lgth1-1; jmid=lgth2-1; } #endif // fprintf( stderr, "imid = %d, but jumpi = %d\n", imid, jumpi ); // fprintf( stderr, "jmid = %d, but jumpj = %d\n", jmid, jumpj ); // for( j=0; j amino_dis['a']['g'] -1 ) fprintf( stdout, "%d %d %8.1f", i, j, WMMTX[i][j] ); if( WMMTX[i][j] == maxwm ) fprintf( stdout, "selected \n" ); else fprintf( stdout, "\n" ); } fprintf( stdout, "\n" ); } exit( 1 ); #endif #if 0 fprintf( stderr, "jumpbacki = \n" ); for( j=0; j N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( midw ); FreeFloatVec( midm ); FreeFloatVec( midn ); FreeIntVec( jumpbacki ); FreeIntVec( jumpbackj ); FreeIntVec( jumpforwi ); FreeIntVec( jumpforwj ); FreeIntVec( jumpdummi ); FreeIntVec( jumpdummj ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); #if STOREWM FreeFloatMtx( WMMTX ); FreeFloatMtx( WMMTX2 ); #endif free( gaps ); #if MEMSAVE free( aseq1 ); free( aseq2 ); #else FreeCharMtx( aseq1 ); FreeCharMtx( aseq2 ); #endif return( value ); } float Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, float **map ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; int i, j; int ll1, ll2; int lgth1, lgth2; float wm = 0.0; /* int ?????? */ char **mseq1; char **mseq2; // char **mseq; float *ogcp1; float *ogcp2; float *fgcp1; float *fgcp2; float **cpmx1; float **cpmx2; float **gapinfo; // float fpenalty; float fpenalty = (float)RNApenalty; int nglen1, nglen2; #if 0 fprintf( stderr, "eff in SA+++align\n" ); for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, mseq1, mseq2, 0, gapinfo, map ); #if DEBUG fprintf( stderr, " seq1[0] = %s\n", seq1[0] ); fprintf( stderr, " seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] ); #endif // fprintf( stderr, "wm = %f\n", wm ); #if 0 for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalign2m2m_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, mseq1, mseq2, 0, gapinfo, map ); #if DEBUG fprintf( stderr, " seq1[0] = %s\n", seq1[0] ); fprintf( stderr, " seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] ); #endif // fprintf( stderr, "wm = %f\n", wm ); #if 0 for( i=0; i 0 ) localhompt = localhompt->next; localhompt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); localhompt = localhompt->next; // fprintf( stderr, "tmppt = %p, localhompt = %p\n", tmppt, localhompt ); } tmppt = localhompt; st = 0; iscore = 0; while( *pt1 != 0 ) { // fprintf( stderr, "In in while loop\n" ); // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 isumscore += iscore; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = iscore * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif iscore = 0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } iscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // fprintf( stderr, "%c-%c, score(0) = %d\n", *pt1, *pt2, iscore ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( st ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 isumscore += iscore; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = (double)iscore * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (2)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = (double)sumscore * 5.8 / 600 / sumoverlap; } return( nlocalhom ); } #endif static int addlocalhom_r( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, int skip ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; double score; double sumscore; int sumoverlap; LocalHom *tmppt = NULL; // by D.Mathog, a guess int st; int nlocalhom = 0; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; sumscore = 0.0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess #if 0 fprintf( stderr, "nlocalhom = %d in addlocalhom\n", nlocalhom ); fprintf( stderr, "al1 = %s, al2 = %s\n", al1, al2 ); fprintf( stderr, "off1 = %d, off2 = %d\n", off1, off2 ); fprintf( stderr, "localhopt = %p, skip = %d\n", localhompt, skip ); #endif fprintf( stderr, "pt1 = \n%s\n, pt2 = \n%s\n", pt1, pt2 ); if( skip ) { while( --skip > 0 ) localhompt = localhompt->next; localhompt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); localhompt = localhompt->next; fprintf( stderr, "tmppt = %p, localhompt = %p\n", (void *)tmppt, (void *)localhompt ); } tmppt = localhompt; st = 0; score = 0.0; while( *pt1 != 0 ) { fprintf( stderr, "In in while loop\n" ); fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 sumscore += score; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = score * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif fprintf( stderr, "score (1)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); score = 0.0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } score += (double)n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 sumscore += score; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = score * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif fprintf( stderr, "score (2)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = sumscore * 5.8 / 600 / sumoverlap; } return( nlocalhom ); } void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; double score; double sumscore; int sumoverlap; LocalHom *tmppt; LocalHom *subnosento; int st; int saisho; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; sumscore = 0.0; sumoverlap = 0; start1 = 0; // by Mathog, a guess start2 = 0; // by Mathog, a guess subnosento = localhompt; while( subnosento->next ) subnosento = subnosento->next; tmppt = subnosento; saisho = ( localhompt->nokori == 0 ); fprintf( stderr, "localhompt = %p\n", (void *)localhompt ); fprintf( stderr, "tmppt = %p\n", (void *)tmppt ); fprintf( stderr, "subnosento = %p\n", (void *)subnosento ); st = 0; score = 0.0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( localhompt->nokori++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (1)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif score = 0.0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } score += (double)n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( *(pt1-1) != '-' && *(pt2-1) != '-' ) { if( localhompt->nokori++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (2)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } fprintf( stderr, "sumscore = %f\n", sumscore ); if( !divpairscore ) { if( !saisho ) subnosento = subnosento->next; for( tmppt=subnosento; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = sumscore * 5.8 / 600 / sumoverlap; fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess st = 0; iscore = 0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif iscore = 0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } iscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, iscore(0) = %d\n", *pt1, *pt2, iscore ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( *(pt1-1) != '-' && *(pt2-1) != '-' ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (2)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } if( !divpairscore ) { for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; // tmppt->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); tmppt->opt = (double)600 * 5.8 / 600; // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ) { int posinaln, pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; // int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess posinaln = 0; while( *pt1 != 0 ) { if( *pt1 != '-' && *pt2 != '-' && equiv[posinaln] > 0.0 ) { start1 = end1 = pos1; start2 = end2 = pos2; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ... (posinaln=%d)\n", posinaln ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->overlapaa = 1; // tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; tmppt->opt = equiv[posinaln] * scale; // fprintf( stdout, "*pt1=%c, *pt2=%c, equiv=%f\n", *pt1, *pt2, equiv[posinaln] ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; posinaln++; } } void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess st = 0; iscore = 0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif iscore = 0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } iscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, iscore(0) = %d\n", *pt1, *pt2, iscore ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( *(pt1-1) != '-' && *(pt2-1) != '-' ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (2)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } if( !divpairscore ) { for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; double score; double sumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; sumscore = 0.0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess st = 0; score = 0.0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (1)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif score = 0.0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } score += (double)n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (2)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif if( !divpairscore ) { for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = sumscore * 5.8 / 600 / sumoverlap; // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } char *cutal( char *al, int al_display_start, int start, int end ) { int pos; char *pt = al; char *val = NULL; pos = al_display_start; do { if( start == pos ) val = pt; if( end == pos ) break; // fprintf( stderr, "pos=%d, *pt=%c, val=%p\n", pos, *pt, val ); if( *pt != '-' ) pos++; } while( *pt++ != 0 ); *(pt+1) = 0; return( val ); } void ErrorExit( char *message ) { fprintf( stderr, "%s\n", message ); exit( 1 ); } void strncpy_caseC( char *str1, char *str2, int len ) { if( dorp == 'd' && upperCase > 0 ) { while( len-- ) *str1++ = toupper( *str2++ ); } else strncpy( str1, str2, len ); } void seqUpper( int nseq, char **seq ) /* not used */ { int i, j, len; for( i=0; i return 1 */ { int c, i = 0 ; int noteofflag = 0; for( i=0; i return 1 */ char s[] ; int l ; FILE *fp ; { int c = 0, i = 0 ; int noteofflag = 0; if( feof( fp ) ) return( 1 ); for( i=0; i M ) { fprintf( stderr, "TOO MANY SEQUENCE!\n" ); fprintf( stderr, "%d > %d\n", njob, M ); exit( 1 ); } } int allSpace( char *str ) { int value = 1; while( *str ) value *= ( !isdigit( *str++ ) ); return( value ); } void Read( char name[M][B], int nlen[M], char **seq ) { extern void FRead( FILE *x, char y[M][B], int z[M], char **w ); FRead( stdin, name, nlen, seq ); } void FRead( FILE *fp, char name[][B], int nlen[], char **seq ) { int i, j; char b[B]; fgets( b, B-1, fp ); #if DEBUG fprintf( stderr, "b = %s\n", b ); #endif if( strstr( b, "onnet" ) ) scoremtx = 1; else if( strstr( b, "DnA" ) ) { scoremtx = -1; upperCase = -1; } else if( strstr( b, "dna" ) ) { scoremtx = -1; upperCase = 0; } else if( strstr( b, "DNA" ) ) { scoremtx = -1; upperCase = 1; } else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2; else scoremtx = 0; #if DEBUG fprintf( stderr, " %s->scoremtx = %d\n", b, scoremtx ); #endif geta2 = GETA2; #if 0 if( strlen( b ) >=25 ) { b[25] = 0; #if DEBUG fprintf( stderr, "kimuraR = %s\n", b+20 ); #endif kimuraR = atoi( b+20 ); if( kimuraR < 0 || 20 < kimuraR ) ErrorExit( "Illeagal kimuraR value.\n" ); if( allSpace( b+20 ) ) kimuraR = NOTSPECIFIED; } else kimuraR = NOTSPECIFIED; #if DEBUG fprintf( stderr, "kimuraR = %d\n", kimuraR ); #endif if( strlen( b ) >=20 ) { b[20] = 0; #if DEBUG fprintf( stderr, "pamN = %s\n", b+15 ); #endif pamN = atoi( b+15 ); if( pamN < 0 || 400 < pamN ) ErrorExit( "Illeagal pam value.\n" ); if( allSpace( b+15 ) ) pamN = NOTSPECIFIED; } else pamN = NOTSPECIFIED; if( strlen( b ) >= 15 ) { b[15] = 0; #if DEBUG fprintf( stderr, "poffset = %s\n", b+10 ); #endif poffset = atoi( b+10 ); if( poffset > 500 ) ErrorExit( "Illegal extending gap ppenalty\n" ); if( allSpace( b+10 ) ) poffset = NOTSPECIFIED; } else poffset = NOTSPECIFIED; if( strlen( b ) >= 10 ) { b[10] = 0; #if DEBUG fprintf( stderr, "ppenalty = %s\n", b+5 ); #endif ppenalty = atoi( b+5 ); if( ppenalty > 0 ) ErrorExit( "Illegal opening gap ppenalty\n" ); if( allSpace( b+5 ) ) ppenalty = NOTSPECIFIED; } else ppenalty = NOTSPECIFIED; #endif for( i=0; i' ) ) value++; b = c; } rewind( fp ); return( value ); } void searchKUorWA( FILE *fp ) { int c, b; b = '\n'; while( !( ( ( c = getc( fp ) ) == '>' || c == EOF ) && b == '\n' ) ) b = c; ungetc( c, fp ); } static int onlyGraph( char *str ) { char tmp; char *res = str; char *bk = str; // while( (tmp=*str++) ) if( isgraph( tmp ) ) *res++ = tmp; while( (tmp=*str++) ) { if( 0x20 < tmp && tmp < 0x7f ) *res++ = tmp; if( tmp == '>' || tmp == '(' ) { fprintf( stderr, "========================================================\n" ); fprintf( stderr, "========================================================\n" ); fprintf( stderr, "=== \n" ); fprintf( stderr, "=== ERROR!! \n" ); fprintf( stderr, "=== In the '--anysymbol' and '--preservecase' modes, \n" ); fprintf( stderr, "=== '>' and '(' are unacceptable.\n" ); fprintf( stderr, "=== \n" ); fprintf( stderr, "========================================================\n" ); fprintf( stderr, "========================================================\n" ); exit( 1 ); } } *res = 0; return( res - bk ); } static int charfilter( char *str ) { char tmp; char *res = str; char *bk = str; while( (tmp=*str++) ) { if( tmp == '=' || tmp == '*' || tmp == '<' || tmp == '>' || tmp == '(' || tmp == ')' ) { fprintf( stderr, "\n" ); fprintf( stderr, "Characters '= * < > ( )' are not accepted in the --text mode, \nalthough most printable characters are ok.\n" ); fprintf( stderr, "\n" ); exit( 1 ); } if( 0x20 < tmp && tmp < 0x7f ) // if( tmp != '\n' && tmp != ' ' && tmp != '\t' ) // unprintable characters mo ok. *res++ = tmp; } *res = 0; return( res - bk ); } static int onlyAlpha_lower( char *str ) { char tmp; char *res = str; char *bk = str; while( (tmp=*str++) ) if( isalpha( tmp ) || tmp == '-' || tmp == '*' || tmp == '.' ) *res++ = tolower( tmp ); *res = 0; return( res - bk ); } static int onlyAlpha_upper( char *str ) { char tmp; char *res = str; char *bk = str; while( (tmp=*str++) ) if( isalpha( tmp ) || tmp == '-' || tmp == '*' || tmp == '.' ) *res++ = toupper( tmp ); *res = 0; return( res - bk ); } void kake2hiku( char *str ) { do if( *str == '*' ) *str = '-'; while( *str++ ); } char *load1SeqWithoutName_realloc_casepreserve( FILE *fpp ) { int c, b; char *cbuf; int size = N; char *val; val = malloc( (size+1) * sizeof( char ) ); cbuf = val; b = '\n'; while( ( c = getc( fpp ) ) != EOF && !( ( c == '>' || c == '(' || c == EOF ) && b == '\n' ) ) { *cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */ if( cbuf - val == size ) { size += N; fprintf( stderr, "reallocating...\n" ); val = (char *)realloc( val, (size+1) * sizeof( char ) ); if( !val ) { fprintf( stderr, "Allocation error in load1SeqWithoutName_realloc \n" ); exit( 1 ); } fprintf( stderr, "done.\n" ); cbuf = val + size-N; } b = c; } ungetc( c, fpp ); *cbuf = 0; onlyGraph( val ); // kake2hiku( val ); return( val ); } char *load1SeqWithoutName_realloc( FILE *fpp ) { int c, b; char *cbuf; int size = N; char *val; val = malloc( (size+1) * sizeof( char ) ); cbuf = val; b = '\n'; while( ( c = getc( fpp ) ) != EOF && !( ( c == '>' || c == '(' || c == EOF ) && b == '\n' ) ) { *cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */ if( cbuf - val == size ) { size += N; fprintf( stderr, "reallocating...\n" ); val = (char *)realloc( val, (size+1) * sizeof( char ) ); if( !val ) { fprintf( stderr, "Allocation error in load1SeqWithoutName_realloc \n" ); exit( 1 ); } fprintf( stderr, "done.\n" ); cbuf = val + size-N; } b = c; } ungetc( c, fpp ); *cbuf = 0; if( nblosum == -2 ) { charfilter( val ); } else { if( dorp == 'd' ) onlyAlpha_lower( val ); else onlyAlpha_upper( val ); kake2hiku( val ); } return( val ); } int load1SeqWithoutName_new( FILE *fpp, char *cbuf ) { int c, b; char *bk = cbuf; b = '\n'; while( ( c = getc( fpp ) ) != EOF && /* by T. Nishiyama */ !( ( c == '>' || c == '(' || c == EOF ) && b == '\n' ) ) { *cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */ b = c; } ungetc( c, fpp ); *cbuf = 0; if( dorp == 'd' ) onlyAlpha_lower( bk ); else onlyAlpha_upper( bk ); kake2hiku( bk ); return( 0 ); } void readDataforgaln( FILE *fp, char **name, int *nlen, char **seq ) { int i; static char *tmpseq = NULL; #if 0 if( !tmpseq ) { tmpseq = AllocateCharVec( N ); } #endif rewind( fp ); searchKUorWA( fp ); for( i=0; i', stdout ); // puts( dumname+1 ); strncat( name[npos], dumname, B-1 ); name[npos][B-1] = 0; if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq ); seqlen = strlen( tmpseq ); lpos = 0; for( j=0; j<5; j++ ) { if( regtable[0][j*2] == -1 && regtable[0][j*2+1] == -1 ) continue; startpos = regtable[0][j*2]; endpos = regtable[0][j*2+1]; if( startpos > endpos ) { endpos = regtable[0][j*2]; startpos = regtable[0][j*2+1]; } if( startpos < 0 ) startpos = 0; if( endpos < 0 ) endpos = 0; if( endpos >= seqlen ) endpos = seqlen-1; if( startpos >= seqlen ) startpos = seqlen-1; // fprintf( stderr, "startpos = %d, endpos = %d\n", startpos, endpos ); outlen = endpos - startpos+1; if( revtable[0][j] == 'f' ) { // fprintf( stderr, "regtable[%d][st] = %d\n", i, regtable[0][j*2+0] ); // fprintf( stderr, "regtable[%d][en] = %d\n", i, regtable[0][j*2+1] ); // fprintf( stderr, "outlen = %d\n", outlen ); // fprintf( stdout, "%.*s\n", outlen, tmpseq+regtable[0][j*2] ); strncpy( outseq[npos] + lpos, tmpseq+startpos, outlen ); lpos += outlen; } else { fs = AllocateCharVec( outlen+1 ); rs = AllocateCharVec( outlen+1 ); fs[outlen] = 0; strncpy( fs, tmpseq+startpos, outlen ); sreverse( rs, fs ); // fprintf( stdout, "%s\n", rs ); strncpy( outseq[npos] + lpos, rs, outlen ); lpos += outlen; free( fs ); free( rs ); } outseq[npos][lpos] = 0; } npos++; } free( tmpseq ); } } void cutData( FILE *fp, int **regtable, char **revtable, int *outtable ) { int i, j; int outlen, seqlen, startpos, endpos; static char *tmpseq = NULL; static char *dumname = NULL; char *fs, *rs; if( dumname == NULL ) { dumname = AllocateCharVec( N ); } rewind( fp ); searchKUorWA( fp ); for( i=0; i', stdout ); puts( dumname+1 ); seqlen = strlen( tmpseq ); if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq ); if( outtable[i] == 2 ) { startpos = 0; endpos = seqlen-1; outlen = endpos - startpos + 1; fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos ); } else { for( j=0; j<5; j++ ) { if( regtable[i][j*2] == -1 && regtable[i][j*2+1] == -1 ) continue; startpos = regtable[i][j*2]; endpos = regtable[i][j*2+1]; if( startpos > endpos ) { endpos = regtable[i][j*2]; startpos = regtable[i][j*2+1]; } if( startpos < 0 ) startpos = 0; if( endpos < 0 ) endpos = 0; if( endpos >= seqlen ) endpos = seqlen-1; if( startpos >= seqlen ) startpos = seqlen-1; outlen = endpos - startpos + 1; if( revtable[i][j] == 'f' ) { fprintf( stderr, "startpos = %d\n", startpos ); fprintf( stderr, "endpos = %d\n", endpos ); fprintf( stderr, "outlen = %d\n", outlen ); fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos ); } else { fs = AllocateCharVec( outlen+1 ); rs = AllocateCharVec( outlen+1 ); fs[outlen] = 0; strncpy( fs, tmpseq+startpos, outlen ); sreverse( rs, fs ); fprintf( stdout, "%s\n", rs ); free( fs ); free( rs ); } } } } free( tmpseq ); } } void catData( FILE *fp ) { int i; static char *tmpseq = NULL; static char *dumname = NULL; // char *cptr; if( dumname == NULL ) { dumname = AllocateCharVec( N ); } rewind( fp ); searchKUorWA( fp ); for( i=0; i_numo_s_%08d_numo_e_", i+1 ); } else { putc( '>', stdout ); } puts( dumname+1 ); tmpseq = load1SeqWithoutName_realloc( fp ); if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq ); puts( tmpseq ); free( tmpseq ); } } int countATGC( char *s, int *total ) { int nATGC; int nChar; char c; nATGC = nChar = 0; if( *s == 0 ) { *total = 0; return( 0 ); } do { c = tolower( *s ); if( isalpha( c ) ) { nChar++; if( c == 'a' || c == 't' || c == 'g' || c == 'c' || c == 'u' || c == 'n' ) nATGC++; } } while( *++s ); *total = nChar; return( nATGC ); } double countATGCbk( char *s ) { int nATGC; int nChar; char c; nATGC = nChar = 0; do { c = tolower( *s ); if( isalpha( c ) ) { nChar++; if( c == 'a' || c == 't' || c == 'g' || c == 'c' || c == 'u' || c == 'n' ) nATGC++; } } while( *++s ); return( (double)nATGC / nChar ); } int countnogaplen( char *seq ) { int val = 0; while( *seq ) if( *seq++ != '-' ) val++; return( val ); } int countnormalletters( char *seq, char *ref ) { int val = 0; while( *seq ) if( strchr( ref, *seq++ ) ) val++; return( val ); } void getnumlen_casepreserve( FILE *fp, int *nlenminpt ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; atgcnum += countATGC( tmpseq, &nsite ); total += nsite; free( tmpseq ); } free( tmpname ); atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } void getnumlen_nogap( FILE *fp, int *nlenminpt ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; atgcnum += countATGC( tmpseq, &nsite ); total += nsite; free( tmpseq ); } free( tmpname ); atgcfreq = (double)atgcnum / total; fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } void getnumlen_nogap_outallreg( FILE *fp, int *nlenminpt ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; atgcnum += countATGC( tmpseq, &nsite ); total += nsite; free( tmpseq ); } free( tmpname ); atgcfreq = (double)atgcnum / total; fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } static void escapehtml( char *res, char *ori, int maxlen ) { char *res0 = res; while( *ori ) { if( *ori == '<' ) { strcpy( res, "<" ); res += 3; } else if( *ori == '>' ) { strcpy( res, ">" ); res += 3; } else if( *ori == '&' ) { strcpy( res, "&" ); res += 4; } else if( *ori == '"' ) { strcpy( res, """ ); res += 5; } else if( *ori == ' ' ) { strcpy( res, " " ); res += 5; } else { *res = *ori; } res++; ori++; if( res - res0 -10 > N ) break; } *res = 0; } void getnumlen_nogap_outallreg_web( FILE *fp, FILE *ofp, int *nlenminpt, int *isalignedpt ) { int total; int nsite = 0; int atgcnum; int alnlen = 0, alnlen_prev; int i, tmp, lennormalchar; char *tmpseq, *tmpname, *tmpname2; double atgcfreq; tmpname = AllocateCharVec( N ); tmpname2 = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; alnlen_prev = -1; *isalignedpt = 1; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; atgcnum += countATGC( tmpseq, &nsite ); total += nsite; alnlen = strlen( tmpseq ); // fprintf( stdout, "##### alnlen, alnlen_prev = %d, %d\n", alnlen, alnlen_prev ); if( i>0 && alnlen_prev != alnlen ) *isalignedpt = 0; alnlen_prev = alnlen; atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); // if( dorp == NOTSPECIFIED ) // you kentou { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } if( dorp == 'd' ) lennormalchar = countnormalletters( tmpseq, "atgcuATGCU" ); else lennormalchar = countnormalletters( tmpseq, "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv" ); free( tmpseq ); fprintf( ofp, " \n", i, i, i, i, i, lennormalchar, tmpname2 ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); } free( tmpname ); free( tmpname2 ); atgcfreq = (double)atgcnum / total; fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); // if( dorp == NOTSPECIFIED ) // you kentou { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } fprintf( ofp, "\n" ); if( *isalignedpt ) { fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); } } void getnumlen( FILE *fp ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq; char *tmpname; double atgcfreq; tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; atgcnum += countATGC( tmpseq, &nsite ); total += nsite; // fprintf( stderr, "##### total = %d\n", total ); free( tmpseq ); } atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } free( tmpname ); } void WriteGapFill( FILE *fp, int locnjob, char name[][B], int nlen[M], char **aseq ) { static char b[N]; int i, j; int nalen[M]; static char gap[N]; static char buff[N]; #if IODEBUG fprintf( stderr, "IMAKARA KAKU\n" ); #endif nlenmax = 0; for( i=0; i%s\n", name[i]+1 ); for( j=0; j%s\n", name[i]+1 ); for( j=0; j%s\n", name[i]+1 ); for( j=0; j max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; max /= INTMTXSCALE; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i", b, 19 ) || !strncmp( " ", b, 23 ) ) break; } if( !strncmp( " ", b, 19 ) ) { junban[count] = atoi( b+31 ); nlocalhom = 0; } while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 25 ) ) break; pt = b + 25; score = atof( pt ); sumscore += score; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 30 ) ) break; pt = b + 30; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 26 ) ) break; pt = b + 26; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 29 ) ) break; pt = b + 29; len = atoi( pt ); sumlen += len; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( qal, al+24 ); pt = qal; while( *++pt != '<' ) ; *pt = 0; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( tal, al+24 ); pt = tal; while( *++pt != '<' ) ; *pt = 0; // fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa ); while( fgets( b, B-1, fp ) ) if( !strncmp( " :", b, 18 ) ) break; fgets( b, B-1, fp ); if( !strncmp( " ", b, 21 ) ) { dis[junban[count++]] = sumscore; sumscore = 0.0; fgets( b, B-1, fp ); fgets( b, B-1, fp ); scorepersite = sumscore / sumlen; if( scorepersite != (int)scorepersite ) { fprintf( stderr, "ERROR! sumscore=%f, sumlen=%f, and scorepersite=%f\n", sumscore, sumlen, scorepersite ); exit( 1 ); } if( !strncmp( " ", b, 23 ) ) break; } } free( junban ); return (int)scorepersite; } int ReadBlastm7_scoreonly( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int *junban; int overlapaa; double score, sumscore; int qstart, qend, tstart, tend; static char qal[N], tal[N], al[N]; int nlocalhom; junban = calloc( nin, sizeof( int ) ); count = 0; sumscore = 0.0; score = 0.0; while( 1 ) { if( feof( fp ) ) break; while( fgets( b, B-1, fp ) ) { if( !strncmp( " ", b, 19 ) || !strncmp( " ", b, 23 ) ) break; } if( !strncmp( " ", b, 19 ) ) { junban[count] = atoi( b+31 ); nlocalhom = 0; } while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 25 ) ) break; pt = b + 25; score = atof( pt ); sumscore += score; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 30 ) ) break; pt = b + 30; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 26 ) ) break; pt = b + 26; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 29 ) ) break; pt = b + 29; overlapaa = atoi( pt ); while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( qal, al+24 ); pt = qal; while( *++pt != '<' ) ; *pt = 0; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( tal, al+24 ); pt = tal; while( *++pt != '<' ) ; *pt = 0; // fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa ); // nlocalhom += addlocalhom_r( qal, tal, localhomlist+junban[count], qstart, tstart, score, overlapaa, nlocalhom ); while( fgets( b, B-1, fp ) ) if( !strncmp( " :", b, 18 ) ) break; fgets( b, B-1, fp ); if( !strncmp( " ", b, 21 ) ) { dis[junban[count++]] = sumscore; sumscore = 0.0; fgets( b, B-1, fp ); fgets( b, B-1, fp ); if( !strncmp( " ", b, 23 ) ) break; } } free( junban ); return count; } int ReadBlastm7( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int overlapaa; double score, sumscore; int qstart, qend, tstart, tend; static char qal[N], tal[N], al[N]; int nlocalhom; count = 0; sumscore = 0.0; score = 0.0; nlocalhom = 0; while( 1 ) { if( feof( fp ) ) break; while( fgets( b, B-1, fp ) ) { if( !strncmp( " ", b, 19 ) || !strncmp( " ", b, 23 ) ) break; } if( !strncmp( " ", b, 19 ) ) { junban[count] = atoi( b+31 ); nlocalhom = 0; } while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 25 ) ) break; pt = b + 25; score = atof( pt ); sumscore += score; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 30 ) ) break; pt = b + 30; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 26 ) ) break; pt = b + 26; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 29 ) ) break; pt = b + 29; overlapaa = atoi( pt ); while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( qal, al+24 ); pt = qal; while( *++pt != '<' ) ; *pt = 0; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( tal, al+24 ); pt = tal; while( *++pt != '<' ) ; *pt = 0; // fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa ); nlocalhom += addlocalhom_r( qal, tal, localhomlist+junban[count], qstart, tstart, score, overlapaa, nlocalhom ); while( fgets( b, B-1, fp ) ) if( !strncmp( " :", b, 18 ) ) break; fgets( b, B-1, fp ); if( !strncmp( " ", b, 21 ) ) { dis[junban[count++]] = sumscore; sumscore = 0.0; fgets( b, B-1, fp ); fgets( b, B-1, fp ); if( !strncmp( " ", b, 23 ) ) break; } } return count; } int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int opt; double z, bits; count = 0; #if 0 for( i=0; i<10000000 && count>+==========+", b, 14 ) ) { break; } } if( !count ) return -1; count = 0; while( 1 ) { if( strncmp( ">>+==========+", b, 14 ) ) { fgets( b, B-1, fp ); if( feof( fp ) ) break; continue; } junban[count++] = atoi( b+14 ); // fprintf( stderr, "t = %d\n", atoi( b+14 ) ); while( fgets( b, B-1, fp ) ) if( !strncmp( "; fa_opt:", b, 9 ) || !strncmp( "; sw_s-w opt:", b, 13 ) ) break; pt = strstr( b, ":" ) +1; opt = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_overlap:", b+4, 9 ) ) break; pt = strstr( b, ":" ) +1; overlapaa = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) +1; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) +1; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) +1; qal_display_start = atoi( pt ) - 1; pt = qal; while( (c = fgetc( fp )) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) + 1; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) + 1; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) + 1; tal_display_start = atoi( pt ) - 1; pt = tal; while( ( c = fgetc( fp ) ) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; // fprintf( stderr, "(%d-%d:%d-%d)\n", qstart, qend, tstart, tend ); // fprintf( stderr, "qal_display_start = %d, tal_display_start = %d\n", qal_display_start, tal_display_start ); // fprintf( stderr, "qal = %s\n", qal ); // fprintf( stderr, "tal = %s\n", tal ); qal2 = cutal( qal, qal_display_start, qstart, qend ); tal2 = cutal( tal, tal_display_start, tstart, tend ); // fprintf( stderr, "qal2 = %s\n", qal2 ); // fprintf( stderr, "tal2 = %s\n", tal2 ); // fprintf( stderr, "putting %d - %d, opt = %d\n", qmem, junban[count-1], opt ); putlocalhom( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa ); } // fprintf( stderr, "count = %d\n", count ); return count; } int ReadFasta34m10( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int overlapaa; int opt, qstart, qend, tstart, tend; double z, bits; int qal_display_start, tal_display_start; static char qal[N], tal[N]; char *qal2, *tal2; int c; count = 0; #if 0 for( i=0; i<10000000 && count>+==========+", b, 14 ) ) { break; } } if( !count ) return -1; count = 0; while( 1 ) { if( strncmp( ">>+==========+", b, 14 ) ) { fgets( b, B-1, fp ); if( feof( fp ) ) break; continue; } junban[count++] = atoi( b+14 ); // fprintf( stderr, "t = %d\n", atoi( b+14 ) ); while( fgets( b, B-1, fp ) ) if( !strncmp( "; fa_opt:", b, 9 ) || !strncmp( "; sw_s-w opt:", b, 13 ) ) break; pt = strstr( b, ":" ) +1; opt = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_overlap:", b+4, 9 ) ) break; pt = strstr( b, ":" ) +1; overlapaa = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) +1; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) +1; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) +1; qal_display_start = atoi( pt ) - 1; pt = qal; while( (c = fgetc( fp )) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) + 1; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) + 1; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) + 1; tal_display_start = atoi( pt ) - 1; pt = tal; while( ( c = fgetc( fp ) ) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; // fprintf( stderr, "(%d-%d:%d-%d)\n", qstart, qend, tstart, tend ); // fprintf( stderr, "qal_display_start = %d, tal_display_start = %d\n", qal_display_start, tal_display_start ); // fprintf( stderr, "qal = %s\n", qal ); // fprintf( stderr, "tal = %s\n", tal ); qal2 = cutal( qal, qal_display_start, qstart, qend ); tal2 = cutal( tal, tal_display_start, tstart, tend ); // fprintf( stderr, "qal2 = %s\n", qal2 ); // fprintf( stderr, "tal2 = %s\n", tal2 ); // fprintf( stderr, "putting %d - %d, opt = %d\n", qmem, junban[count-1], opt ); putlocalhom( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa ); } // fprintf( stderr, "count = %d\n", count ); return count; } int ReadFasta34m10_scoreonly_nucbk( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int pos; int opt; double z, bits; count = 0; while( !feof( fp ) ) { fgets( b, B-1, fp ); if( !strncmp( "+===========+", b, 13 ) ) { pos = atoi( b+13 ); if( strchr( b, 'r' ) ) continue; // pt = strchr( b, ')' ) + 1; pt = strchr( b, ']' ) + 1; sscanf( pt, "%d %lf %lf", &opt, &bits, &z ); dis[pos] += (double)opt; count++; #if 0 fprintf( stderr, "b=%s\n", b ); fprintf( stderr, "opt=%d\n", opt ); fprintf( stderr, "pos=%d\n", pos ); fprintf( stderr, "dis[pos]=%f\n", dis[pos] ); #endif } else if( 0 == strncmp( ">>><<<", b, 6 ) ) { break; } } if( !count ) return -1; return count; } int ReadFasta34m10_scoreonly_nuc( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int pos; int opt; double z, bits; int c; int *yonda; yonda = AllocateIntVec( nin ); for( c=0; c>>", b, 3 ) ) { for( c=0; c>><<<", b, 6 ) ) { break; } } free( yonda ); if( !count ) return -1; return count; } int ReadFasta34m10_scoreonly( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int pos; int opt; double z, bits; int c; int *yonda; yonda = AllocateIntVec( nin ); for( c=0; c>>", b, 3 ) ) { for( c=0; c>><<<", b, 6 ) ) { break; } } free( yonda ); if( !count ) return -1; return count; } int ReadFasta34( FILE *fp, double *dis, int nseq, char name[M][B], LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int overlapaa; int opt, qstart, qend, tstart, tend; double z, bits; count = 0; #if 0 for( i=0; i<10000000 && count>+==========+", b, 14 ) ) { break; } } if( !count ) return -1; count = 0; while( !feof( fp ) ) { if( !strncmp(">>+==========+", b, 14 ) ) { junban[count] = atoi( b+14 ); count++; fgets( b, B-1, fp ); // initn: pt = strstr( b, "opt: " ) + 5; localhomlist[junban[count-1]].opt = atof( pt ); fgets( b, B-1, fp ); // Smith-Waterman score pt = strstr( b, "ungapped) in " ) + 13; sscanf( pt, "%d", &overlapaa ); fprintf( stderr, "pt = %s, overlapaa = %d\n", pt, overlapaa ); pt = strstr( b, "overlap (" ) + 8; sscanf( pt, "(%d-%d:%d-%d)", &qstart, &qend, &tstart, &tend ); localhomlist[junban[count-1]].overlapaa = overlapaa; localhomlist[junban[count-1]].start1 = qstart-1; localhomlist[junban[count-1]].end1 = qend-1; localhomlist[junban[count-1]].start2 = tstart-1; localhomlist[junban[count-1]].end2 = tend-1; } fgets( b, B-1, fp ); } fprintf( stderr, "count = %d\n", count ); return count; } int ReadFasta3( FILE *fp, double *dis, int nseq, char name[M][B] ) { int count=0; char b[B]; char *pt; int junban[M]; int initn, init1, opt; double z; count = 0; #if 0 for( i=0; i<10000000 && count 0 ) { #if 0 /* /tmp/pre ¤Î´Ø·¸¤Ç¤Ï¤º¤·¤¿ */ if( ferror( prep_g ) ) prep_g = fopen( "pre", "w" ); if( !prep_g ) ErrorExit( "Cannot re-open pre." ); #endif rewind( prep_g ); signalSM[STATUS] = IMA_KAITERU; #if IODEBUG if( force ) fprintf( stderr, "FINAL " ); #endif if( devide ) dvWrite( prep_g, nseq, name, nlen, aseq ); else WriteGapFill( prep_g, nseq, name, nlen, aseq ); /* fprintf( prep_g, '\EOF' ); */ fflush( prep_g ); if( force ) signalSM[STATUS] = OSHIMAI; else signalSM[STATUS] = KAKIOWATTA; value = 1; signalSM[SEMAPHORE]++; #if IODEBUG fprintf( stderr, "signalSM[STATUS] = %c\n", signalSM[STATUS] ); #endif break; } else { #if IODEBUG fprintf( stderr, "YONDERUKARA_AKIRAMERU\n" ); #endif value = 0; signalSM[SEMAPHORE]++; if( !force ) break; #if IODEBUG fprintf( stderr, "MATSU\n" ); #endif sleep( 1 ); } } if( force && !value ) ErrorExit( "xced ga pre wo hanasanai \n" ); return( value ); #else if( force ) { rewind( prep_g ); writeData_pointer( prep_g, nseq, name, nlen, aseq ); } #endif return( 0 ); } void readOtherOptions( int *ppidptr, int *fftThresholdptr, int *fftWinSizeptr ) { if( calledByXced ) { FILE *fp = fopen( "pre", "r" ); char b[B]; if( !fp ) ErrorExit( "Cannot open pre.\n" ); fgets( b, B-1, fp ); sscanf( b, "%d %d %d", ppidptr, fftThresholdptr, fftWinSizeptr ); fclose( fp ); #if IODEBUG fprintf( stderr, "b = %s\n", b ); fprintf( stderr, "ppid = %d\n", ppid ); fprintf( stderr, "fftThreshold = %d\n", fftThreshold ); fprintf( stderr, "fftWinSize = %d\n", fftWinSize ); #endif } else { *ppidptr = 0; *fftThresholdptr = FFT_THRESHOLD; if( dorp == 'd' ) *fftWinSizeptr = FFT_WINSIZE_D; else *fftWinSizeptr = FFT_WINSIZE_P; } #if 0 fprintf( stderr, "fftThresholdptr=%d\n", *fftThresholdptr ); fprintf( stderr, "fftWinSizeptr=%d\n", *fftWinSizeptr ); #endif } void initSignalSM( void ) { // int signalsmid; #if IODEBUG if( ppid ) fprintf( stderr, "PID of xced = %d\n", ppid ); #endif if( !ppid ) { signalSM = NULL; return; } #if 0 signalsmid = shmget( (key_t)ppid, 3, IPC_ALLOC | 0666 ); if( signalsmid == -1 ) ErrorExit( "Cannot get Shared memory for signal.\n" ); signalSM = shmat( signalsmid, 0, 0 ); if( (int)signalSM == -1 ) ErrorExit( "Cannot attatch Shared Memory for signal!\n" ); signalSM[STATUS] = IMA_KAITERU; signalSM[SEMAPHORE] = 1; #endif } void initFiles( void ) { char pname[100]; if( ppid ) sprintf( pname, "/tmp/pre.%d", ppid ); else sprintf( pname, "pre" ); prep_g = fopen( pname, "w" ); if( !prep_g ) ErrorExit( "Cannot open pre" ); trap_g = fopen( "trace", "w" ); if( !trap_g ) ErrorExit( "cannot open trace" ); fprintf( trap_g, "PID = %d\n", getpid() ); fflush( trap_g ); } void WriteForFasta( FILE *fp, int locnjob, char **name, int nlen[M], char **aseq ) { static char b[N]; int i, j; int nalen[M]; for( i=0; i%s\n", name[i] ); for( j=0; j 0 ) { tmpptr1 = localhomtable[i][j].last; // fprintf( stderr, "reallocating, localhomtable[%d][%d].nokori = %d\n", i, j, localhomtable[i][j].nokori ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->extended = -1; tmpptr1->next = NULL; localhomtable[i][j].last = tmpptr1; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori ); } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori ); } tmpptr1->start1 = start1; tmpptr1->start2 = start2; tmpptr1->end1 = end1; tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; } // else { if( localhomtable[j][i].nokori++ > 0 ) { tmpptr2 = localhomtable[j][i].last; tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->extended = -1; tmpptr2->next = NULL; localhomtable[j][i].last = tmpptr2; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori ); } else { tmpptr2 = localhomtable[j]+i; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori ); } tmpptr2->start2 = start1; tmpptr2->start1 = start2; tmpptr2->end2 = end1; tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "i=%d, j=%d, st1=%d, en1=%d, opt = %f\n", i, j, tmpptr1->start1, tmpptr1->end1, opt ); } } } void readlocalhomtable( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ) { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( njob, njob ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } // else { if( nlocalhom[j][i]++ > 0 ) { tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->next = NULL; } else tmpptr2 = localhomtable[j]+i; tmpptr2->start2 = start1; // CHUUI!!!! tmpptr2->start1 = start2; tmpptr2->end2 = end1; // CHUUI!!!! tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt ); } } FreeIntMtx( nlocalhom ); } void readlocalhomtable_two( FILE*fp, int norg, int nadd, LocalHom **localhomtable, LocalHom **localhomtablex, char *kozoarivec ) // for test only { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; int **nlocalhomx = NULL; LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( norg, nadd ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } { if( nlocalhomx[j][i]++ > 0 ) { tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->next = NULL; } else tmpptr2 = localhomtablex[j]+i; tmpptr2->start2 = start1+1; // CHUUI!!!! tmpptr2->start1 = start2; tmpptr2->end2 = end1+1; // CHUUI!!!! tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt ); } } FreeIntMtx( nlocalhom ); FreeIntMtx( nlocalhomx ); } void readlocalhomtable_one( FILE*fp, int norg, int nadd, LocalHom **localhomtable, char *kozoarivec ) // for test only { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; LocalHom *tmpptr1=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( norg, nadd ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } } FreeIntMtx( nlocalhom ); } void outlocalhom_part( LocalHom **localhom, int norg, int nadd ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt ); } while( (tmpptr=tmpptr->next) ); } } void outlocalhom( LocalHom **localhom, int nseq ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt ); } while( (tmpptr=tmpptr->next) ); } } void outlocalhompt( LocalHom ***localhom, int n1, int n2 ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt, tmpptr->wimportance ); } while( (tmpptr=tmpptr->next) ); } } void FreeLocalHomTable_part( LocalHom **localhomtable, int n, int m ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable_two( LocalHom **localhomtable, int n, int m ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } for( i=n; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable_one( LocalHom **localhomtable, int n, int m ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable( LocalHom **localhomtable, int n ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } char *progName( char *str ) { char *value; if( ( value = strrchr( str, '/' ) ) != NULL ) return( value+1 ); else return( str ); } static void tabtospace( char *str ) { char *p; // fprintf( stderr, "before = %s\n", str ); while( NULL != ( p = strchr( str , '\t' ) ) ) { *p = ' '; } // fprintf( stderr, "after = %s\n", str ); } static char *extractfirstword( char *str ) { char *val = str; tabtospace( str ); while( *str ) { if( val == str && *str == ' ' ) { val++; str++; } else if( *str != ' ' ) { str++; } else if( *str == ' ' ) { *str = 0; } } return( val ); } void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order, int namelen ) { int pos, pos2, j; if( namelen == -1 ) namelen = 10; pos = 0; fprintf( fp, " %d %d\n", nseq, maxlen ); while( pos < maxlen ) { for( j=0; j%s\n", name[k]+1 ); for( j=0; j%s\n", name[k]+1 ); for( j=0; j 19 ) break; } for( i=0; i<20; i++ ) raw[20][i] = -1.0; while( !feof( mf ) ) { fgets( line, 999, mf ); if( line[0] == 'f' ) { // fprintf( stderr, "line = %s\n", line ); ptr1 = line; for( j=0; j<20; j++ ) { while( !isdigit( *ptr1 ) && *ptr1 != '-' && *ptr1 != '.' ) ptr1++; raw[20][j] = atof( ptr1 ); // fprintf( stderr, "raw[20][]=%f, %c %d\n", raw[20][j], inorder[i], j ); ptr1 = strchr( ptr1, ' ' ); if( ptr1 == NULL && j<19) showaamtxexample(); } break; } } k = 0; for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { if( i != j ) { ii = MAX( map[i], map[j] ); jj = MIN( map[i], map[j] ); } else ii = jj = map[i]; val[k++] = raw[ii][jj]; // fprintf( stderr, "%c-%c, %f\n", aaorder[i], aaorder[j], val[k-1] ); } } for( i=0; i<20; i++ ) val[400+i] = raw[20][map[i]]; fprintf( stderr, "inorder = %s\n", inorder ); fclose( mf ); free( inorder ); free( line ); FreeDoubleMtx( raw ); free( map ); return( val ); } static void tab2space( char *s ) // nen no tame { while( *s ) { if( *s == '\t' ) *s = ' '; s++; } } static int readasubalignment( char *s, int *t, int *preservegaps ) { int v = 0; char status = 's'; char *pt = s; *preservegaps = 0; tab2space( s ); while( *pt ) { if( *pt == ' ' ) { status = 's'; } else { if( status == 's' ) { if( *pt == '\n' || *pt == '#' ) break; status = 'n'; t[v] = atoi( pt ); if( t[v] == 0 ) { fprintf( stderr, "Format error? Sequences must be specified as 1, 2, 3...\n" ); exit( 1 ); } if( t[v] < 0 ) *preservegaps = 1; t[v] = abs( t[v] ); t[v] -= 1; v++; } } pt++; } t[v] = -1; return( v ); } static int countspace( char *s ) { int v = 0; char status = 's'; char *pt = s; tab2space( s ); while( *pt ) { if( *pt == ' ' ) { status = 's'; } else { if( status == 's' ) { if( *pt == '\n' || *pt == '#' ) break; v++; status = 'n'; if( atoi( pt ) == 0 ) { fprintf( stderr, "Format error? Sequences should be specified as 1, 2, 3...\n" ); exit( 1 ); } } } pt++; } return( v ); } void readsubalignmentstable( int nseq, int **table, int *preservegaps, int *nsubpt, int *maxmempt ) { FILE *fp; char *line; int linelen = 1000000; int nmem; int lpos; int i, p; int *tab01; line = calloc( linelen, sizeof( char ) ); fp = fopen( "_subalignmentstable", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _subalignmentstable\n" ); exit( 1 ); } if( table == NULL ) { *nsubpt = 0; *maxmempt = 0; while( 1 ) { fgets( line, linelen-1, fp ); if( feof( fp ) ) break; if( line[strlen(line)-1] != '\n' ) { fprintf( stderr, "too long line? \n" ); exit( 1 ); } if( line[0] == '#' ) continue; if( atoi( line ) == 0 ) continue; nmem = countspace( line ); if( nmem > *maxmempt ) *maxmempt = nmem; (*nsubpt)++; } } else { tab01 = calloc( nseq, sizeof( int ) ); for( i=0; i nseq-1 ) { fprintf( stderr, "Sequence %d does not exist in the input sequence file.\n", p+1 ); exit( 1 ); } } lpos++; } free( tab01 ); } fclose( fp ); free( line ); } void readmccaskill( FILE *fp, RNApair **pairprob, int length ) { char gett[1000]; int *pairnum; int i; int left, right; float prob; int c; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i' || c == EOF ) { break; } fgets( gett, 999, fp ); // fprintf( stderr, "gett = %s\n", gett ); sscanf( gett, "%d %d %f", &left, &right, &prob ); if( left >= length || right >= length ) { fprintf( stderr, "format error in hat4 - 2\n" ); exit( 1 ); } if( prob < 0.01 ) continue; // 080607, mafft ni dake eikyou if( left != right && prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", right, left, prob ); } } free( pairnum ); } void readpairfoldalign( FILE *fp, char *s1, char *s2, char *aln1, char *aln2, int q1, int q2, int *of1, int *of2, int sumlen ) { char gett[1000]; int *maptoseq1; int *maptoseq2; char dumc; int dumi; char sinseq[100], sinaln[100]; int posinseq, posinaln; int alnlen; int i; int pos1, pos2; char *pa1, *pa2; char qstr[1000]; *of1 = -1; *of2 = -1; maptoseq1 = AllocateIntVec( sumlen+1 ); maptoseq2 = AllocateIntVec( sumlen+1 ); posinaln = 0; // foldalign ga alingment wo kaesanaitok no tame. while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; ALIGNING", 10 ) ) break; } sprintf( qstr, "; ALIGNING %d against %d\n", q1+1, q2+1 ); if( strcmp( gett, qstr ) ) { fprintf( stderr, "Error in FOLDALIGN\n" ); fprintf( stderr, "qstr = %s, but gett = %s\n", qstr, gett ); exit( 1 ); } while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; --------", 10 ) ) break; } while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; ********", 10 ) ) break; // fprintf( stderr, "gett = %s\n", gett ); sscanf( gett, "%c %c %s %s %d %d", &dumc, &dumc, sinseq, sinaln, &dumi, &dumi ); posinaln = atoi( sinaln ); posinseq = atoi( sinseq ); // fprintf( stderr, "posinseq = %d\n", posinseq ); // fprintf( stderr, "posinaln = %d\n", posinaln ); maptoseq1[posinaln-1] = posinseq-1; } alnlen = posinaln; while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; --------", 10 ) ) break; } while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; ********", 10 ) ) break; // fprintf( stderr, "gett = %s\n", gett ); sscanf( gett, "%c %c %s %s %d %d", &dumc, &dumc, sinseq, sinaln, &dumi, &dumi ); posinaln = atof( sinaln ); posinseq = atof( sinseq ); // fprintf( stderr, "posinseq = %d\n", posinseq ); // fprintf( stderr, "posinaln = %d\n", posinaln ); maptoseq2[posinaln-1] = posinseq-1; } if( alnlen != posinaln ) { fprintf( stderr, "Error in foldalign?\n" ); exit( 1 ); } pa1 = aln1; pa2 = aln2; for( i=0; i -1 ) *pa1++ = s1[pos1]; else *pa1++ = '-'; if( pos2 > -1 ) *pa2++ = s2[pos2]; else *pa2++ = '-'; } *pa1 = 0; *pa2 = 0; *of1 = 0; for( i=0; i -1 ) break; } *of2 = 0; for( i=0; i -1 ) break; } // fprintf( stderr, "*of1=%d, aln1 = :%s:\n", *of1, aln1 ); // fprintf( stderr, "*of2=%d, aln2 = :%s:\n", *of2, aln2 ); free( maptoseq1 ); free( maptoseq2 ); } int myatoi( char *in ) { if( in == NULL ) { fprintf( stderr, "Error in myatoi()\n" ); exit( 1 ); } return( atoi( in ) ); } mafft-7.123-without-extensions/core/regtable2seq.c0000640000076500007650000001073111522407352021204 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 char *regfile; char *eregfile; void arguments( int argc, char *argv[] ) { int c; outnumber = 0; inputfile = NULL; regfile = NULL; eregfile = NULL; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'e': eregfile = *++argv; fprintf( stderr, "eregfile = %s\n", eregfile ); --argc; goto nextoption; case 'r': regfile = *++argv; fprintf( stderr, "regfile = %s\n", regfile ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'n' : outnumber = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } void readereg( FILE *regfp, int **regtable, char **revtable, int *outtable, int *noutpt, int *loutpt ) { char gett[1000]; int j; int mem; char cmem; char reg[5][100]; char out[100]; int startpos, endpos; *noutpt = 0; *loutpt = 0; fgets( gett, 999, regfp ); sscanf( gett, "%c %s %s %s %s %s", &cmem, reg[0], reg[1], reg[2], reg[3], reg[4] ); if( cmem != 'e' ) { fprintf( stderr, "Format error\n" ); exit( 1 ); } for( j=0; j<5; j++ ) { sscanf( reg[j], "%d-%d-%c", regtable[0]+(j*2), regtable[0]+(j*2)+1, revtable[0]+j ); fprintf( stderr, "%d %d-%d\n", 0, regtable[0][j*2], regtable[0][j*2+1] ); startpos = regtable[0][j*2]; endpos = regtable[0][j*2+1]; if( startpos > endpos ) { endpos = regtable[0][j*2]; startpos = regtable[0][j*2+1]; } if( startpos != -1 && endpos != -1 ) *loutpt += endpos - startpos + 1; } while( 1 ) { fgets( gett, 999, regfp ); if( feof( regfp ) ) break; sscanf( gett, "%d o=%s", &mem, out ); if( mem >= njob ) { fprintf( stderr, "Out of range\n" ); exit( 1 ); } outtable[mem] = atoi( out ); if( outtable[mem] ) *noutpt += 1; } } void readreg( FILE *regfp, int **regtable, char **revtable, int *outtable ) { char gett[1000]; int j; int mem; char reg[5][100]; char out[100]; while( 1 ) { fgets( gett, 999, regfp ); if( feof( regfp ) ) break; sscanf( gett, "%d %s %s %s %s %s o=%s", &mem, reg[0], reg[1], reg[2], reg[3], reg[4], out ); if( mem >= njob ) { fprintf( stderr, "Out of range\n" ); exit( 1 ); } for( j=0; j<5; j++ ) { sscanf( reg[j], "%d-%d-%c", regtable[mem]+(j*2), regtable[mem]+(j*2)+1, revtable[mem]+j ); fprintf( stderr, "%d %d-%d\n", mem, regtable[mem][j*2], regtable[mem][j*2+1] ); } outtable[mem] = atoi( out ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *regfp; int nlenmin; int **regtable; char **revtable; int *outtable; int i, nout, lout; char **outseq; char **name; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; dorp = NOTSPECIFIED; getnumlen_nogap( infp, &nlenmin ); if( regfile ) { regfp = fopen( regfile, "r" ); if( !regfp ) { fprintf( stderr, "Cannot open %s\n", regfile ); exit( 1 ); } regtable = AllocateIntMtx( njob, 5*2 ); revtable = AllocateCharMtx( njob, 5 ); outtable = AllocateIntVec( njob ); readreg( regfp, regtable, revtable, outtable ); cutData( infp, regtable, revtable, outtable ); } else if( eregfile ) { regfp = fopen( eregfile, "r" ); if( !regfp ) { fprintf( stderr, "Cannot open %s\n", eregfile ); exit( 1 ); } regtable = AllocateIntMtx( 1, 5*2 ); revtable = AllocateCharMtx( 1, 5 ); outtable = AllocateIntVec( njob ); readereg( regfp, regtable, revtable, outtable, &nout, &lout ); fprintf( stderr, "nout = %d, lout = %d\n", nout, lout ); outseq = AllocateCharMtx( nout, lout+1 ); name = AllocateCharMtx( nout, B ); cutAlignment( infp, regtable, revtable, outtable, name, outseq ); fprintf( stderr, "gappick! nout = %d\n", nout ); commongappick( nout, outseq ); for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'w': weboutfile = *++argv; fprintf( stderr, "weboutfile = %s\n", weboutfile ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *weboutfp; int nlenmin; int isaligned = 0; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( weboutfile ) { weboutfp = fopen( weboutfile, "w" ); if( !weboutfp ) { fprintf( stderr, "Cannot open %s\n", weboutfile ); exit( 1 ); } } dorp = NOTSPECIFIED; if( weboutfile ) { getnumlen_nogap_outallreg_web( infp, weboutfp, &nlenmin, &isaligned ); if( isaligned ) fprintf( stdout, "Aligned\n" ); else fprintf( stdout, "Not aligned\n" ); } else getnumlen_nogap_outallreg( infp, &nlenmin ); return( 0 ); } mafft-7.123-without-extensions/core/blosum.c0000640000076500007650000003331312225635445020135 0ustar katohkatoh#define DEFAULTGOP_B -1530 #define DEFAULTGEP_B -00 #define DEFAULTOFS_B -123 /* +10 -- -50 teido ka ? */ void BLOSUMmtx( int n, double **matrix, double *freq, char *amino, char *amino_grp ) { /* char locaminod[26] = "GASTPLIMVDNEQFYWKRHCXXX.-U"; */ // char locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-U"; char locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-J"; char locgrpd[] = { 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, 6, 6, 6, 1, }; double freqd[20] = { 0.077, 0.051, 0.043, 0.052, 0.020, 0.041, 0.062, 0.074, 0.023, 0.052, 0.091, 0.059, 0.024, 0.040, 0.051, 0.069, 0.059, 0.014, 0.032, 0.066, }; double tmpmtx30[] = { 4, -1, 8, 0, -2, 8, 0, -1, 1, 9, -3, -2, -1, -3, 17, 1, 3, -1, -1, -2, 8, 0, -1, -1, 1, 1, 2, 6, 0, -2, 0, -1, -4, -2, -2, 8, -2, -1, -1, -2, -5, 0, 0, -3, 14, 0, -3, 0, -4, -2, -2, -3, -1, -2, 6, -1, -2, -2, -1, 0, -2, -1, -2, -1, 2, 4, 0, 1, 0, 0, -3, 0, 2, -1, -2, -2, -2, 4, 1, 0, 0, -3, -2, -1, -1, -2, 2, 1, 2, 2, 6, -2, -1, -1, -5, -3, -3, -4, -3, -3, 0, 2, -1, -2, 10, -1, -1, -3, -1, -3, 0, 1, -1, 1, -3, -3, 1, -4, -4, 11, 1, -1, 0, 0, -2, -1, 0, 0, -1, -1, -2, 0, -2, -1, -1, 4, 1, -3, 1, -1, -2, 0, -2, -2, -2, 0, 0, -1, 0, -2, 0, 2, 5, -5, 0, -7, -4, -2, -1, -1, 1, -5, -3, -2, -2, -3, 1, -3, -3, -5, 20, -4, 0, -4, -1, -6, -1, -2, -3, 0, -1, 3, -1, -1, 3, -2, -2, -1, 5, 9, 1, -1, -2, -2, -2, -3, -3, -3, -3, 4, 1, -2, 0, 1, -4, -1, 1, -3, 1, 5, 0, -2, 4, 5, -2, -1, 0, 0, -2, -2, -1, 0, -2, -3, -2, 0, 0, -5, -3, -2, 5, 0, 0, -1, 0, 0, 4, 5, -2, 0, -3, -1, 1, -1, -4, 0, -1, -1, -1, -2, -3, 0, 4, 0, -1, 0, -1, -2, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, -2, -1, 0, -1, 0, -1, }; double tmpmtx45[] = { 5, -2, 7, -1, 0, 6, -2, -1, 2, 7, -1, -3, -2, -3, 12, -1, 1, 0, 0, -3, 6, -1, 0, 0, 2, -3, 2, 6, 0, -2, 0, -1, -3, -2, -2, 7, -2, 0, 1, 0, -3, 1, 0, -2, 10, -1, -3, -2, -4, -3, -2, -3, -4, -3, 5, -1, -2, -3, -3, -2, -2, -2, -3, -2, 2, 5, -1, 3, 0, 0, -3, 1, 1, -2, -1, -3, -3, 5, -1, -1, -2, -3, -2, 0, -2, -2, 0, 2, 2, -1, 6, -2, -2, -2, -4, -2, -4, -3, -3, -2, 0, 1, -3, 0, 8, -1, -2, -2, -1, -4, -1, 0, -2, -2, -2, -3, -1, -2, -3, 9, 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -3, -1, -2, -2, -1, 4, 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, 2, 5, -2, -2, -4, -4, -5, -2, -3, -2, -3, -2, -2, -2, -2, 1, -3, -4, -3, 15, -2, -1, -2, -2, -3, -1, -2, -3, 2, 0, 0, -1, 0, 3, -3, -2, -1, 3, 8, 0, -2, -3, -3, -1, -3, -3, -3, -3, 3, 1, -2, 1, 0, -3, -1, 0, -3, -1, 5, }; double tmpmtx50[] = { 5, -2, 7, -1, -1, 7, -2, -2, 2, 8, -1, -4, -2, -4, 13, -1, 1, 0, 0, -3, 7, -1, 0, 0, 2, -3, 2, 6, 0, -3, 0, -1, -3, -2, -3, 8, -2, 0, 1, -1, -3, 1, 0, -2, 10, -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, }; double tmpmtx62[] = { 6, -2, 8, -2, -1, 8, -3, -2, 2, 9, -1, -5, -4, -5, 13, -1, 1, 0, 0, -4, 8, -1, 0, 0, 2, -5, 3, 7, 0, -3, -1, -2, -4, -3, -3, 8, -2, 0, 1, -2, -4, 1, 0, -3, 11, -2, -4, -5, -5, -2, -4, -5, -6, -5, 6, -2, -3, -5, -5, -2, -3, -4, -5, -4, 2, 6, -1, 3, 0, -1, -5, 2, 1, -2, -1, -4, -4, 7, -1, -2, -3, -5, -2, -1, -3, -4, -2, 2, 3, -2, 8, -3, -4, -4, -5, -4, -5, -5, -5, -2, 0, 1, -5, 0, 9, -1, -3, -3, -2, -4, -2, -2, -3, -3, -4, -4, -2, -4, -5, 11, 2, -1, 1, 0, -1, 0, 0, 0, -1, -4, -4, 0, -2, -4, -1, 6, 0, -2, 0, -2, -1, -1, -1, -2, -3, -1, -2, -1, -1, -3, -2, 2, 7, -4, -4, -6, -6, -3, -3, -4, -4, -4, -4, -2, -4, -2, 1, -5, -4, -4, 16, -3, -3, -3, -5, -4, -2, -3, -5, 3, -2, -2, -3, -1, 4, -4, -3, -2, 3, 10, 0, -4, -4, -5, -1, -3, -4, -5, -5, 4, 1, -3, 1, -1, -4, -2, 0, -4, -2, 6, }; double tmpmtx80[] = { 7, -3, 9, -3, -1, 9, -3, -3, 2, 10, -1, -6, -5, -7, 13, -2, 1, 0, -1, -5, 9, -2, -1, -1, 2, -7, 3, 8, 0, -4, -1, -3, -6, -4, -4, 9, -3, 0, 1, -2, -7, 1, 0, -4, 12, -3, -5, -6, -7, -2, -5, -6, -7, -6, 7, -3, -4, -6, -7, -3, -4, -6, -7, -5, 2, 6, -1, 3, 0, -2, -6, 2, 1, -3, -1, -5, -4, 8, -2, -3, -4, -6, -3, -1, -4, -5, -4, 2, 3, -3, 9, -4, -5, -6, -6, -4, -5, -6, -6, -2, -1, 0, -5, 0, 10, -1, -3, -4, -3, -6, -3, -2, -5, -4, -5, -5, -2, -4, -6, 12, 2, -2, 1, -1, -2, -1, -1, -1, -2, -4, -4, -1, -3, -4, -2, 7, 0, -2, 0, -2, -2, -1, -2, -3, -3, -2, -3, -1, -1, -4, -3, 2, 8, -5, -5, -7, -8, -5, -4, -6, -6, -4, -5, -4, -6, -3, 0, -7, -6, -5, 16, -4, -4, -4, -6, -5, -3, -5, -6, 3, -3, -2, -4, -3, 4, -6, -3, -3, 3, 11, -1, -4, -5, -6, -2, -4, -4, -6, -5, 4, 1, -4, 1, -2, -4, -3, 0, -5, -3, 7, }; double tmpmtx0[] = { 2.4, -0.6, 4.7, -0.3, 0.3, 3.8, -0.3, -0.3, 2.2, 4.7, 0.5, -2.2, -1.8, -3.2, 11.5, -0.2, 1.5, 0.7, 0.9, -2.4, 2.7, 0.0, 0.4, 0.9, 2.7, -3.0, 1.7, 3.6, 0.5, -1.0, 0.4, 0.1, -2.0, -1.0, -0.8, 6.6, -0.8, 0.6, 1.2, 0.4, -1.3, 1.2, 0.4, -1.4, 6.0, -0.8, -2.4, -2.8, -3.8, -1.1, -1.9, -2.7, -4.5, -2.2, 4.0, -1.2, -2.2, -3.0, -4.0, -1.5, -1.6, -2.8, -4.4, -1.9, 2.8, 4.0, -0.4, 2.7, 0.8, 0.5, -2.8, 1.5, 1.2, -1.1, 0.6, -2.1, -2.1, 3.2, -0.7, -1.7, -2.2, -3.0, -0.9, -1.0, -2.0, -3.5, -1.3, 2.5, 2.8, -1.4, 4.3, -2.3, -3.2, -3.1, -4.5, -0.8, -2.6, -3.9, -5.2, -0.1, 1.0, 2.0, -3.3, 1.6, 7.0, 0.3, -0.9, -0.9, -0.7, -3.1, -0.2, -0.5, -1.6, -1.1, -2.6, -2.3, -0.6, -2.4, -3.8, 7.6, 1.1, -0.2, 0.9, 0.5, 0.1, 0.2, 0.2, 0.4, -0.2, -1.8, -2.1, 0.1, -1.4, -2.8, 0.4, 2.2, 0.6, -0.2, 0.5, 0.0, -0.5, 0.0, -0.1, -1.1, -0.3, -0.6, -1.3, 0.1, -0.6, -2.2, 0.1, 1.5, 2.5, -3.6, -1.6, -3.6, -5.2, -1.0, -2.7, -4.3, -4.0, -0.8, -1.8, -0.7, -3.5, -1.0, 3.6, -5.0, -3.3, -3.5, 14.2, -2.2, -1.8, -1.4, -2.8, -0.5, -1.7, -2.7, -4.0, 2.2, -0.7, 0.0, -2.1, -0.2, 5.1, -3.1, -1.9, -1.9, 4.1, 7.8, 0.1, -2.0, -2.2, -2.9, 0.0, -1.5, -1.9, -3.3, -2.0, 3.1, 1.8, -1.7, 1.6, 0.1, -1.8, -1.0, 0.0, -2.6, -1.1, 3.4, }; int i, j, count; double av; double *tmpmtx; if( n == 30 ) tmpmtx = tmpmtx30; else if( n == 45 ) tmpmtx = tmpmtx45; else if( n == 50 ) tmpmtx = tmpmtx50; else if( n == 62 ) tmpmtx = tmpmtx62; else if( n == 80 ) tmpmtx = tmpmtx80; else if( n == 0 ) tmpmtx = tmpmtx0; else if( n == -1 ) tmpmtx = loadaamtx(); else { fprintf( stderr, "blosum %d ?\n", n ); exit( 1 ); } count = 0; for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { matrix[i][j] = matrix[j][i] = (double)tmpmtx[count++]; } } if( n == -1 && tmpmtx[400] != -1.0 ) { for( i=0; i<20; i++ ) freq[i] = tmpmtx[400+i]; av = 0.0; for( i=0; i<20; i++ ) av += freq[i]; for( i=0; i<20; i++ ) freq[i] /= av; } else for( i=0; i<20; i++ ) freq[i] = freqd[i]; #if 0 av = 0.0; for( i=0; i<20; i++ ) av += matrix[i][i]; av /= 20; fprintf( stdout, "av = %f\n", av ); for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) matrix[i][j] /= av; av = wav = 0; count = 0; wcount = 0.0; tmptmp = 0.0; for( i=0; i<20; i++ ) { fprintf( stdout, "freq[%d] = %f\n", i, freq[i] ); tmptmp += freq[i]; for( j=0; j<20; j++ ) { av += matrix[i][j]; wav += freq[i] * freq[j] * matrix[i][j]; count++; wcount += freq[i] * freq[j]; } } av /= count; wav /= wcount; fprintf( stdout, "av = %f\n", av ); fprintf( stdout, "wav = %f\n", wav ); fprintf( stdout, "wcount = %f\n", wcount ); fprintf( stdout, "tmptmp = %f\n", tmptmp ); for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { fprintf( stderr, "## %d-%d, %f\n", i, j, matrix[i][j] ); } } exit( 1 ); #endif for( i=0; i<26; i++ ) amino[i] = locaminod[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i]; } void extendedmtx( double **matrix, double *freq, char *amino, char *amino_grp ) { int i; int j; for( i=0; ithread_no; int njob = targ->njob; int nbranch = targ->nbranch; int maxiter = targ->maxiter; int *ndonept = targ->ndonept; int *ntrypt = targ->ntrypt; int *collectingpt = targ->collectingpt; int *jobposintpt = targ->jobposintpt; int nkozo = targ->nkozo; float *gainlist = targ->gainlist; float *tscorelist = targ->tscorelist; int *generationofinput = targ->generationofinput; int *subgenerationpt = targ->subgenerationpt; float *basegainpt = targ->basegainpt; char *kozoarivec = targ->kozoarivec; char **mastercopy = targ->mastercopy; char ***candidates = targ->candidates; int *generationofmastercopypt = targ->generationofmastercopypt; int *branchtable = targ->branchtable; RNApair ***singlerna = targ->singlerna; LocalHom **localhomtable = targ->localhomtable; int alloclen = targ->alloclen; Node * stopol = targ->stopol; int ***topol = targ->topol; // double **len = targ->len; float **tscorehistory_detail = targ->tscorehistory_detail; int *finishpt = targ->finishpt; int **skipthisbranch = targ->skipthisbranch; int i, k, l, ii; float gain; int iterate; int **memlist; char *pairbuf; int locnjob; int s1, s2; int clus1, clus2; char **localcopy; char **mseq1, **mseq2; double *effarr, *effarr_kozo; // re-calc double *effarr1, *effarr2, *effarr1_kozo, *effarr2_kozo; char *indication1, *indication2; int length; RNApair ***grouprna1, ***grouprna2; RNApair *rnapairboth; LocalHom ***localhomshrink; int *gapmap1, *gapmap2; float tscore, mscore, oimpmatch, impmatch; int identity; double tmpdouble; float naivescore0 = 0, naivescore1; double *effarrforlocalhom; float *tscorehistory; int intdum; #if 0 int oscillating; int lin, ldf; #endif float maxgain; int bestthread; int branchpos; int subgenerationatfirst; double unweightedspscore; int myjob; int converged2 = 0; int chudanres; locnjob = njob; if( utree == 0 ) { fprintf( stderr, "Dynamic tree is not supported in the multithread version.\n" ); exit( 1 ); } if( score_check == 2 ) { fprintf( stderr, "Score_check 2 is not supported in the multithread version.\n" ); exit( 1 ); } if( weight == 2 ) { fprintf( stderr, "Weight 2 is not supported in the multithread version.\n" ); exit( 1 ); } if( cooling && cut > 0.0 ) { fprintf( stderr, "Cooling is not supported in the multithread version.\n" ); exit( 1 ); } tscorehistory = calloc( maxiter, sizeof( float ) ); if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); effarr = AllocateDoubleVec( locnjob ); effarrforlocalhom = AllocateDoubleVec( locnjob ); effarr1 = AllocateDoubleVec( locnjob ); effarr2 = AllocateDoubleVec( locnjob ); mseq1 = AllocateCharMtx( locnjob, 0 ); mseq2 = AllocateCharMtx( locnjob, 0 ); localcopy = AllocateCharMtx( locnjob, alloclen ); gapmap1 = AllocateIntVec( alloclen ); gapmap2 = AllocateIntVec( alloclen ); effarr1_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr2_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr_kozo = AllocateDoubleVec( locnjob ); for( i=0; imutex ); if( *collectingpt == 1 ) { *collectingpt = 0; *generationofmastercopypt = iterate; *subgenerationpt = 0; *basegainpt = 0.0; *ndonept = 0; *jobposintpt = 0; for( i=0; icollection_end ); pthread_mutex_unlock( targ->mutex ); } else { pthread_cond_broadcast( targ->collection_end ); pthread_mutex_unlock( targ->mutex ); freelocalarrays ( tscorehistory, grouprna1, grouprna2, rnapairboth, indication1, indication2, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, localcopy, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink ); // return( NULL ); pthread_exit( NULL ); } pthread_mutex_lock( targ->mutex ); while( *ndonept < nbranch ) pthread_cond_wait( targ->collection_start, targ->mutex ); pthread_mutex_unlock( targ->mutex ); // fprintf( stderr, "Thread 0 got a signal, *collectionpt = %d\n", *collectingpt ); /* Hoka no thread ga keisan */ pthread_mutex_lock( targ->mutex ); *collectingpt = 1; // chofuku #if 0 for( i=0; i maxgain ) { maxgain = gainlist[i]; bestthread = i; } } if( maxgain > 0.0 ) { // fprintf( stderr, "\nGain = %f\n", maxgain ); // fprintf( stderr, "best gain = %f by thread %d\n", gainlist[bestthread], bestthread ); // fprintf( stderr, "tscorelist[best] = %f by thread %d\n", tscorelist[bestthread], bestthread ); if( parallelizationstrategy == BESTFIRST ) { for( i=0; i0; i-- ) { // if( iterate-i < 15 ) fprintf( stderr, "hist[%d] = %f\n", i, tscorehistory[i] ); if( tscorehistory[i] == tscorelist[bestthread] ) { fprintf( stderr, "\nOscillating? %f == %f\n", tscorehistory[i], tscorelist[bestthread] ); *collectingpt = -1; break; } } tscorehistory[iterate] = tscorelist[bestthread]; #endif } else { fprintf( stderr, "\nConverged.\n" ); *collectingpt = -1; // pthread_cond_broadcast( targ->collection_end ); // pthread_mutex_unlock( targ->mutex ); // freelocalarrays(); // return( NULL ); // pthread_exit( NULL ); } #if 1 if( *finishpt ) { fprintf( stderr, "\nConverged2.\n" ); *collectingpt = -1; } #endif pthread_mutex_unlock( targ->mutex ); } pthread_mutex_lock( targ->mutex ); fprintf( stderr, "\nReached %d\n", maxiter ); *collectingpt = -1; pthread_cond_broadcast( targ->collection_end ); pthread_mutex_unlock( targ->mutex ); freelocalarrays ( tscorehistory, grouprna1, grouprna2, rnapairboth, indication1, indication2, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, localcopy, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink ); return( NULL ); pthread_exit( NULL ); } else { while( 1 ) { #if 0 if( iterate % 2 == 0 ) { lin = 0; ldf = +1; } else { lin = locnjob - 2; ldf = -1; } for( l=lin; l < locnjob-1 && l >= 0 ; l+=ldf ) for( k=0; k<2; k++ ) #endif pthread_mutex_lock( targ->mutex ); while( *collectingpt > 0 ) pthread_cond_wait( targ->collection_end, targ->mutex ); if( *collectingpt == -1 ) { pthread_mutex_unlock( targ->mutex ); freelocalarrays ( tscorehistory, grouprna1, grouprna2, rnapairboth, indication1, indication2, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, localcopy, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink ); return( NULL ); pthread_exit( NULL ); } // pthread_mutex_unlock( targ->mutex ); // pthread_mutex_lock( targ->mutex ); if( *jobposintpt == nbranch ) { if( *collectingpt != -1 ) *collectingpt = 1; // chofuku pthread_mutex_unlock( targ->mutex ); continue; } // fprintf( stderr, "JOB jobposintpt=%d\n", *jobposintpt ); myjob = branchtable[*jobposintpt]; l = myjob / 2; if( l == locnjob-2 ) k = 1; else k = myjob - l * 2; // fprintf( stderr, "JOB l=%d, k=%d\n", l, k ); branchpos = myjob; (*jobposintpt)++; iterate = *generationofmastercopypt; (*ntrypt)++; pthread_mutex_unlock( targ->mutex ); // fprintf( stderr, "\n IRANAI IRANAI *jobposintpt=%d, nbranch = %d\n", *jobposintpt, nbranch ); // fprintf( stderr, "branchpos = %d (thread %d)\n", branchpos, thread_no ); // fprintf( stderr, "iterate=%d, l=%d, k=%d (thread %d)\n", iterate, l, k, thread_no ); #if 0 fprintf( stderr, "STEP %03d-%03d-%d (Thread %d) ", iterate+1, l+1, k, thread_no ); fprintf( stderr, "STEP %03d-%03d-%d (thread %d) %s ", iterate+1, l+1, k, thread_no, use_fft?"\n":"\n" ); #endif // for( i=0; i<2; i++ ) for( j=0; jmutex ); for( i=0; imutex ); length = strlen( localcopy[0] ); if( nkozo ) { // double tmptmptmp; // tmptmptmp = 0.0; // clus1 = conjuctionfortbfast_kozo( &tmptmptmp, pair[0], s1, localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); clus1 = fastconjuction_noname_kozo( memlist[0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); for( i=0; i=0; i-- ) { oimpmatch += part_imp_match_out_scQ( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatch += part_imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } else { if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_scQ( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f\n", i, oimpmatch ); } } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); fprintf( stderr, "not supported\n" ); exit( 1 ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } // fprintf( stderr, "#### initial impmatch = %f\n", oimpmatch ); } else { oimpmatch = 0.0; } // fprintf( stderr, "#### tmpdouble = %f\n", tmpdouble ); mscore = (double)oimpmatch + tmpdouble; } else { fprintf( stderr, "score_check = %d\n", score_check ); fprintf( stderr, "Not supported\n" ); exit( 1 ); } // if( rnakozo ) foldalignedrna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, rnapairboth ); // if( !use_fft && !rnakozo ) if( !use_fft ) { commongappick_record( clus1, mseq1, gapmap1 ); commongappick_record( clus2, mseq2, gapmap2 ); } #if 0 fprintf( stderr, "##### mscore = %f\n", mscore ); #endif #if DEBUG if( !devide ) { fprintf( trap_g, "\n%d-%d-%d\n", iterate+1, l+1, k ); fprintf( trap_g, "group1 = %s\n", indication1 ); fprintf( trap_g, "group2 = %s\n", indication2 ); fflush( trap_g ); } #endif #if 0 printf( "STEP %d-%d-%d\n", iterate, l, k ); for( i=0; i%f\n", oimpmatch, impmatch ); naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch; fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); // rewind( stdout ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif #endif } else if( alg == 'R' ) { float wm; imp_match_init_strictR( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL ); // fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch ); naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch; // fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: %f->%f UP!\n", clus1, clus2, naivescore0, naivescore1 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); rewind( stdout ); for( i=0; i\n%s\n", mseq1[i] ); for( i=0; i\n%s\n", mseq2[i] ); exit( 1 ); } #endif } else if( alg == 'H' ) { float wm; imp_match_init_strictH( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL ); fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch ); naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch; fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } else { // imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); A__align_gapmap( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, gapmap1, gapmap2 ); fprintf( stderr, "A__align_gapmap\n" ); // fprintf( stderr, "##### impmatch = %f\n", impmatch ); } } } else if( use_fft ) { float totalwm; chudanres = 0; totalwm = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, subgenerationpt, subgenerationatfirst, &chudanres ); if( chudanres && parallelizationstrategy == BAATARI2 ) { // fprintf( stderr, "#### yarinaoshi!!! FFT-NS-i\n" ); goto yarinaoshi; } // fprintf( stderr, "totalwm = %f\n", totalwm ); #if 0 if( alg == 'Q' ) { fprintf( stderr, "totalwm = %f\n", totalwm ); naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } #endif if( alg == 'R' ) { fprintf( stderr, "totalwm = %f\n", totalwm ); naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } } #endif } else { if( alg == 'M' ) { chudanres = 0; MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, subgenerationpt, subgenerationatfirst, &chudanres, outgap, outgap ); if( chudanres && parallelizationstrategy == BAATARI2 ) { // fprintf( stderr, "#### yarinaoshi!!! NW-NS-i\n" ); goto yarinaoshi; } } else if( alg == 'A' ) { A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap==1 } else if( alg == 'Q' ) { float wm; wm = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL ); fprintf( stderr, "wm = %f\n", wm ); fprintf( stderr, "impmatch = %f\n", impmatch ); naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); // rewind( stderr ); // rewind( stdout ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } else if( alg == 'R' ) { float wm; wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL ); naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); // rewind( stderr ); // rewind( stdout ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } else if( alg == 'H' ) { float wm; wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL ); naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) { fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 ); } else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { rewind( stdout ); for( i=0; i\n%s\n", mseq1[i] ); for( i=0; i\n%s\n", mseq2[i] ); exit( 1 ); } #endif } else if( alg == 'a' ) { Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen ); } else ErrorExit( "Sorry!" ); } // fprintf( stderr, "## impmatch = %f\n", impmatch ); #if 1 if( parallelizationstrategy == BAATARI2 && *subgenerationpt != subgenerationatfirst ) { // fprintf( stderr, "\nYarinaoshi2!! (Thread %d)\n", thread_no ); goto yarinaoshi; } #endif identity = !strcmp( localcopy[s1], mastercopy[s1] ); identity *= !strcmp( localcopy[s2], mastercopy[s2] ); fprintf( stderr, "%03d-%04d-%d (thread %4d) identical \r", iterate+1, *ndonept, k, thread_no ); } else { identity = 1; fprintf( stderr, "%03d-%04d-%d (thread %4d) skip \r", iterate+1, *ndonept, k, thread_no ); } /* Bug? : idnetitcal but score change when scoreing mtx != JTT */ length = strlen( mseq1[0] ); if( identity ) { tscore = mscore; } else { if( score_check ) { if( constraint == 2 ) { #if 1 if( RNAscoremtx == 'r' ) intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #endif tscore = impmatch + tmpdouble; // fprintf( stderr, "tmpdouble=%f, impmatch = %f -> %f, tscore = %f\n", tmpdouble, oimpmatch, impmatch, tscore ); } else { intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); tscore = tmpdouble; } // fprintf( stderr, "#######ii=%d, iterate=%d score = %f -> %f \n", ii, iterate , mscore, tscore ); #if 0 for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq1[i] ); fprintf( stderr, "+++++++\n" ); for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq2[i] ); #endif } else { tscore = mscore + 1.0; // tscore = 0.0; // fprintf( stderr, "in line 705, tscore=%f\n", tscore ); // for( i=0; i 0 ) { if( parallelizationstrategy == BESTFIRST ) { if( gain > gainlist[thread_no] ) { gainlist[thread_no] = gain; for( i=0; imutex ); for( i=0; imutex ); tscorelist[thread_no] = tscore; } #if 0 fprintf( stderr, "tscore = %f mscore = %f accepted.\n", tscore, mscore ); fprintf( stderr, "\nbetter! gain = %f (thread %d)\r", gain, thread_no ); #else fprintf( stderr, "%03d-%04d-%d (thread %4d) better \r", iterate+1, *ndonept, k, thread_no ); #endif } else { #if 0 fprintf( stderr, "tscore = %f mscore = %f rejected.\r", tscore, mscore ); fprintf( stderr, "worse! gain = %f", gain ); #else fprintf( stderr, "%03d-%04d-%d (thread %4d) worse \r", iterate+1, *ndonept, k, thread_no ); #endif tscore = mscore; } } converged2 = 0; for( ii=iterate-2; ii>=0; ii-=1 ) { // fprintf( stderr, "Checking tscorehistory %f ?= %f\n", tscore, tscorehistory_detail[ii][branchpos] ); if( tscore == tscorehistory_detail[ii][branchpos] ) { converged2 = 1; break; } } if( parallelizationstrategy != BESTFIRST && converged2 ) { // fprintf( stderr, "\nFINISH!\n" ); pthread_mutex_lock( targ->mutex ); *finishpt = 1; pthread_mutex_unlock( targ->mutex ); } tscorehistory_detail[iterate][branchpos] = tscore; fprintf( stderr, "\r" ); pthread_mutex_lock( targ->mutex ); (*ndonept)++; // fprintf( stderr, "*ndonept = %d, nbranch = %d (thread %d) iterate=%d\n", *ndonept, nbranch, thread_no, iterate ); generationofinput[branchpos] = iterate; if( *ndonept == nbranch ) { if( *collectingpt != -1 ) *collectingpt = 1; // chofuku // fprintf( stderr, "Thread %d sends a signal, *ndonept = %d\n", thread_no, *ndonept ); pthread_cond_signal( targ->collection_start ); } pthread_mutex_unlock( targ->mutex ); } /* while( 1 ) */ } /* for( iterate ) */ // return( NULL ); } #endif int TreeDependentIteration( int locnjob, char **name, int nlen[M], char **aseq, char **bseq, int ***topol, double **len, int **skipthisbranch, int alloclen, LocalHom **localhomtable, RNApair ***singlerna, int nkozo, char *kozoarivec ) { int i, j, k, l, iterate, ii, iu, ju; int lin, ldf, length; int clus1, clus2; int s1, s2; static double **imanoten; static Node *stopol; static double *effarrforlocalhom = NULL; static double *effarr = NULL; static double *effarr1 = NULL; static double *effarr2 = NULL; static double *effarr_kozo = NULL; static double *effarr1_kozo = NULL; static double *effarr2_kozo = NULL; static double **mtx = NULL; static int **node = NULL; static int *branchnode = NULL; static double **branchWeight = NULL; static char **mseq1, **mseq2; static float ***history; FILE *trap; double tscore, mscore; int identity; int converged; int oscillating; float naivescore0 = 0.0; // by D.Mathog, a guess float naivescore1; #if 0 char pair[njob][njob]; #else static int **memlist; static char *pairbuf; #endif #if DEBUG + RECORD double score_for_check0, score_for_check1; static double **effmtx = NULL; extern double score_calc0(); #endif static char *indication1, *indication2; static LocalHom ***localhomshrink = NULL; float impmatch = 0.0, oimpmatch = 0.0; static int *gapmap1; static int *gapmap2; double tmpdouble; int intdum; static RNApair *rnapairboth; RNApair ***grouprna1, ***grouprna2; double unweightedspscore; if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } Writeoptions( trap_g ); fflush( trap_g ); if( effarr == NULL ) /* locnjob == njob ni kagiru */ { indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); effarr = AllocateDoubleVec( locnjob ); effarrforlocalhom = AllocateDoubleVec( locnjob ); effarr1 = AllocateDoubleVec( locnjob ); effarr2 = AllocateDoubleVec( locnjob ); mseq1 = AllocateCharMtx( locnjob, 0 ); mseq2 = AllocateCharMtx( locnjob, 0 ); mtx = AllocateDoubleMtx( locnjob, locnjob ); node = AllocateIntMtx( locnjob, locnjob ); branchnode = AllocateIntVec( locnjob ); branchWeight = AllocateDoubleMtx( locnjob, 2 ); history = AllocateFloatCub( niter, locnjob, 2 ); stopol = (Node *)calloc( locnjob * 2, sizeof( Node ) ); gapmap1 = AllocateIntVec( alloclen ); gapmap2 = AllocateIntVec( alloclen ); if( score_check == 2 ) imanoten = AllocateDoubleMtx( njob, njob ); effarr1_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr2_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr_kozo = AllocateDoubleVec( locnjob ); for( i=0; i 0 ) { threadarg_t *targ; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t collection_end; pthread_cond_t collection_start; int jobposint; int generationofmastercopy; int subgeneration; float basegain; int *generationofinput; float *gainlist; float *tscorelist; int ndone; int ntry; int collecting; int nbranch; int maxiter; char ***candidates; int *branchtable; float **tscorehistory_detail; int finish; nwa = nthread + 1; nbranch = (njob-1) * 2 - 1; maxiter = niter; targ = calloc( nwa, sizeof( threadarg_t ) ); handle = calloc( nwa, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &collection_end, NULL ); pthread_cond_init( &collection_start, NULL ); gainlist = calloc( nwa, sizeof( float ) ); tscorelist = calloc( nwa, sizeof( float ) ); branchtable = calloc( nbranch, sizeof( int ) ); generationofinput = calloc( nbranch, sizeof( int ) ); if( parallelizationstrategy == BESTFIRST ) candidates = AllocateCharCub( nwa, locnjob, alloclen ); for( i=0; i= 0 ; l+=ldf ) { for( k=0; k<2; k++ ) { if( l == locnjob-2 ) k = 1; #else for( jobpos=0; jobpos=0; i-- ) oimpmatch += part_imp_match_out_scQ( i, i ); } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) oimpmatch += part_imp_match_out_sc( i, i ); } } else { if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) oimpmatch += imp_match_out_scQ( i, i ); } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); fprintf( stderr, "not supported\n" ); exit( 1 ); } } // fprintf( stderr, "### oimpmatch = %f\n", oimpmatch ); } else { oimpmatch = 0.0; } #if 0 tmpdouble = 0.0; iu=0; for( i=s1; i>> oimpmatch = 0.0; if( use_fft ) { if( alg == 'Q' ) { part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) part_imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatch += part_imp_match_out_scQ( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatch += part_imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } else { if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_scQ( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f\n", i, oimpmatch ); } } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); fprintf( stderr, "not supported\n" ); exit( 1 ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } // fprintf( stderr, "#### initial impmatch = %f\n", oimpmatch ); } else { oimpmatch = 0.0; } // fprintf( stderr, "#### tmpdouble = %f\n", tmpdouble ); mscore = (double)oimpmatch + tmpdouble; } else { // fprintf( stderr, "score_check = %d\n", score_check ); /* atode kousokuka */ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); mscore = tmpdouble; /* atode kousokuka */ if( constraint ) { oimpmatch = 0.0; // shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink ); msshrinklocalhom_fast( memlist[0], memlist[1], localhomtable, localhomshrink ); if( use_fft ) { if( alg == 'Q' ) { part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) part_imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); } } else { if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); fprintf( stderr, "Not supported\n" ); exit( 1 ); } } } } // oimpmatch = 0.0; if( constraint ) { #if 0 // iranai if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_scQ( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } #endif } #if 0 if( alg == 'H' ) naivescore0 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + oimpmatch; else if( alg == 'Q' ) naivescore0 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + oimpmatch; else if( alg == 'R' ) naivescore0 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + oimpmatch; #endif // if( rnakozo ) foldalignedrna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, rnapairboth ); // if( !use_fft && !rnakozo ) if( !use_fft ) { commongappick_record( clus1, mseq1, gapmap1 ); commongappick_record( clus2, mseq2, gapmap2 ); } #if 0 fprintf( stderr, "##### mscore = %f\n", mscore ); #endif #if DEBUG if( !devide ) { fprintf( trap_g, "\nSTEP%d-%d-%d\n", iterate+1, l+1, k ); fprintf( trap_g, "group1 = %s\n", indication1 ); fprintf( trap_g, "group2 = %s\n", indication2 ); fflush( trap_g ); } #endif #if 0 printf( "STEP %d-%d-%d\n", iterate, l, k ); for( i=0; i%f\n", oimpmatch, impmatch ); naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch; fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); // rewind( stdout ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif #endif } else if( alg == 'R' ) { float wm; imp_match_init_strictR( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL ); // fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch ); naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch; // fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: %f->%f UP!\n", clus1, clus2, naivescore0, naivescore1 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); rewind( stdout ); for( i=0; i\n%s\n", mseq1[i] ); for( i=0; i\n%s\n", mseq2[i] ); exit( 1 ); } #endif } else if( alg == 'H' ) { float wm; imp_match_init_strictH( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL ); fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch ); naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch; fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } else { // imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); A__align_gapmap( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, gapmap1, gapmap2 ); // fprintf( stderr, "##### impmatch = %f\n", impmatch ); } } } else if( use_fft ) { float totalwm; totalwm = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, NULL, 0, NULL ); // fprintf( stderr, "totalwm = %f\n", totalwm ); #if 0 if( alg == 'Q' ) { fprintf( stderr, "totalwm = %f\n", totalwm ); naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } #endif if( alg == 'R' ) { fprintf( stderr, "totalwm = %f\n", totalwm ); naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } } #endif } else { if( alg == 'M' ) { MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } else if( alg == 'A' ) { A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); // outgap==1 } else if( alg == 'Q' ) { float wm; wm = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL ); fprintf( stderr, "wm = %f\n", wm ); fprintf( stderr, "impmatch = %f\n", impmatch ); naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); // rewind( stderr ); // rewind( stdout ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } else if( alg == 'R' ) { float wm; wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL ); naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 ); else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 1 // chuui if( abs( wm - naivescore1 ) > 100 ) { // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 ); // rewind( stderr ); // rewind( stdout ); // for( i=0; i\n%s\n", mseq1[i] ); // for( i=0; i\n%s\n", mseq2[i] ); // exit( 1 ); } #endif } else if( alg == 'H' ) { float wm; wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL ); naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ); if( naivescore1 > naivescore0 ) fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 ); else if( naivescore1 < naivescore0 ) { fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 ); } else fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 ); #if 0 // chuui if( abs( wm - naivescore1 ) > 100 ) { rewind( stdout ); for( i=0; i\n%s\n", mseq1[i] ); for( i=0; i\n%s\n", mseq2[i] ); exit( 1 ); } #endif } else if( alg == 'a' ) { Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen ); } else ErrorExit( "Sorry!" ); } // fprintf( stderr, "## impmatch = %f\n", impmatch ); if( checkC ) { extern double DSPscore(); extern double SSPscore(); static double cur; static double pre; /* pre = SSPscore( locnjob, bseq ); cur = SSPscore( locnjob, aseq ); */ pre = DSPscore( locnjob, bseq ); cur = DSPscore( locnjob, aseq ); fprintf( stderr, "Previous Sscore = %f\n", pre ); fprintf( stderr, "Currnet Sscore = %f\n\n", cur ); } // fprintf( stderr, "## impmatch = %f\n", impmatch ); identity = !strcmp( aseq[s1], bseq[s1] ); identity *= !strcmp( aseq[s2], bseq[s2] ); /* Bug? : idnetitcal but score change when scoreing mtx != JTT */ length = strlen( mseq1[0] ); if( identity ) { tscore = mscore; if( !devide ) fprintf( trap_g, "tscore = %f identical.\n", tscore ); fprintf( stderr, " identical. " ); converged++; } else { if( score_check ) { if( constraint == 2 ) { #if 1 if( RNAscoremtx == 'r' ) intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #endif tscore = impmatch + tmpdouble; // fprintf( stderr, "tmpdouble=%f, impmatch = %f -> %f, tscore = %f\n", tmpdouble, oimpmatch, impmatch, tscore ); } else { intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); tscore = tmpdouble; } // fprintf( stderr, "#######ii=%d, iterate=%d score = %f -> %f \n", ii, iterate , mscore, tscore ); #if 0 for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq1[i] ); fprintf( stderr, "+++++++\n" ); for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq2[i] ); #endif } else { tscore = mscore + 1.0; // tscore = 0.0; // fprintf( stderr, "in line 705, tscore=%f\n", tscore ); // for( i=0; i mscore - cut/100.0*mscore ) { writePre( locnjob, name, nlen, aseq, 0 ); for( i=0; i= locnjob * 2 ) { fprintf( trap_g, "Converged.\n\n" ); fprintf( stderr, "\nConverged.\n\n" ); if( scoreout ) { unweightedspscore = plainscore( njob, bseq ); fprintf( stderr, "\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore ); fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) ); if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" ); fprintf( stderr, "\n\n" ); } if( grouprna1 ) free( grouprna1 ); if( grouprna2 ) free( grouprna2 ); return( 0 ); } if( iterate >= 1 ) { /* oscillation check */ oscillating = 0; for( ii=iterate-2; ii>=0; ii-=2 ) { if( (float)tscore == history[ii][l][k] ) { oscillating = 1; break; } } if( ( oscillating && !cooling ) || ( oscillating && cut < 0.001 && cooling ) ) { fprintf( trap_g, "Oscillating.\n" ); fprintf( stderr, "\nOscillating.\n\n" ); if( scoreout ) { unweightedspscore = plainscore( njob, bseq ); fprintf( stderr, "\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore ); fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) ); if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" ); fprintf( stderr, "\n\n" ); } #if 1 /* hujuubun */ if( grouprna1 ) free( grouprna1 ); if( grouprna2 ) free( grouprna2 ); return( -1 ); #endif } } /* if( iterate ) */ } /* for( k ) */ } /* for( l ) */ if( scoreout ) { unweightedspscore = plainscore( njob, bseq ); fprintf( stderr, "\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore ); fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) ); if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" ); fprintf( stderr, "\n\n" ); } } /* for( iterate ) */ } if( grouprna1 ) free( grouprna1 ); if( grouprna2 ) free( grouprna2 ); return( 2 ); } /* int Tree... */ mafft-7.123-without-extensions/core/addsingle.c0000640000076500007650000023057212216774337020600 0ustar katohkatoh#include "mltaln.h" #define SMALLMEMORY 1 #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 static int nadd; static int treein; static int topin; static int treeout; static int distout; static int noalign; static int multidist; static int maxdist = 1; static float lenfaca, lenfacb, lenfacc, lenfacd; static int tuplesize; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define D6LENFACA 0.01 #define D6LENFACB 2500 #define D6LENFACC 2500 #define D6LENFACD 0.1 #define D10LENFACA 0.01 #define D10LENFACB 1000000 #define D10LENFACC 1000000 #define D10LENFACD 0.0 typedef struct _thread_arg { int njob; int nadd; int *nlen; int *follows; char **name; char **seq; LocalHom **localhomtable; float **iscore; float **nscore; int *istherenewgap; int **newgaplist; RNApair ***singlerna; double *eff_kozo_mapped; int alloclen; Treedep *dep; int ***topol; float **len; Addtree *addtree; #ifdef enablemultithread int *iaddshare; int thread_no; pthread_mutex_t *mutex_counter; #endif } thread_arg_t; #ifdef enablemultithread typedef struct _gaplist2alnxthread_arg { // int thread_no; int ncycle; int *jobpospt; int tmpseqlen; int lenfull; char **seq; int *newgaplist; int *posmap; pthread_mutex_t *mutex; } gaplist2alnxthread_arg_t; typedef struct _distancematrixthread_arg { int thread_no; int njob; int norg; int *jobpospt; int **pointt; int *nogaplen; float **imtx; float **nmtx; float *selfscore; pthread_mutex_t *mutex; } distancematrixthread_arg_t; typedef struct _jobtable2d { int i; int j; } Jobtable2d; typedef struct _dndprethread_arg { int njob; int thread_no; float *selfscore; float **mtx; char **seq; Jobtable2d *jobpospt; pthread_mutex_t *mutex; } dndprethread_arg_t; #endif typedef struct _blocktorealign { int start; int end; int nnewres; } Blocktorealign; static void cnctintvec( int *res, int *o1, int *o2 ) { while( *o1 != -1 ) *res++ = *o1++; while( *o2 != -1 ) *res++ = *o2++; *res = -1; } static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist ) { int i, regstart, regend, len1; regstart = 0; len1 = len+1; for( i=0; i lenb ) return -1; else if( lena < lenb ) return 1; else return( 0 ); } static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows ) { int i, j, k, posinold, newlen, *nmem; int n0, n1, localloclen, nhit, hit1, hit2; int *pickhistory; int nprof1, nprof2, pos, zure; char **prof1, **prof2; int *iinf0, *iinf1; int *group, *nearest, *g2n, ngroup; char ***mem; static char **tmpaln0 = NULL; static char **tmpaln1 = NULL; static char **tmpseq; int ***topolpick; int *tmpint; int *intptr, *intptrx; char *tmpseq0, *cptr, **cptrptr; localloclen = 4 * ( block->end - block->start + 1 ); // ookisugi? tmpaln0 = AllocateCharMtx( njob, localloclen ); tmpaln1 = AllocateCharMtx( njob, localloclen ); tmpseq = AllocateCharMtx( 1, *fullseqlenpt * 4 ); iinf0 = AllocateIntVec( njob ); iinf1 = AllocateIntVec( njob ); nearest = AllocateIntVec( njob ); // oosugi posinold = block->start; n0 = 0; n1 = 0; for( i=0; istart, block->end - block->start + 1 ); tmpseq[0][block->end - block->start + 1] = 0; commongappick( 1, tmpseq ); if( tmpseq[0][0] != 0 ) { if( i < norg ) { fprintf( stderr, "BUG!!!!\n" ); exit( 1 ); } strcpy( tmpaln0[n0], tmpseq[0] ); iinf0[n0] = i; nearest[n0] = follows[i-norg]; n0++; } else { strcpy( tmpaln1[n0], "" ); iinf1[n1] = i; n1++; } } mem = AllocateCharCub( n0, n0+1, 0 ); // oosugi nmem = AllocateIntVec( n0 ); // oosugi g2n = AllocateIntVec( n0 ); // oosugi group = AllocateIntVec( n0 ); // oosugi for( i=0; i %d -> group%d\n", i, nearest[i], group[i] ); // fprintf( stderr, "mem[%d][%d] = %s\n", group[i], j, mem[group[i]][j] ); } for( i=0; i newlen ) newlen = j; for( j=0; j<=i; j++ ) { for( k=0; mem[j][k]; k++ ) fillgap( mem[j][k], newlen ); } #endif } #if 0 fprintf( stderr, "After ingroupalignment (original order):\n" ); for( i=0; i-1; intptr++ ) { for( intptrx=g2n,k=0; k %d\n", k, topol[i][0][j] ); for( intptr=topol[i][1]; *intptr>-1; intptr++ ) { for( intptrx=g2n,k=0; k %d\n", k, topol[i][1][j] ); #if 0 fprintf( stderr, "\nHIT!!! \n" ); fprintf( stderr, "\nSTEP %d\n", i ); for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][0][j] ); fprintf( stderr, "\n" ); for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][1][j] ); fprintf( stderr, "\n" ); #endif } for( i=0; i-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][0][j] ); fprintf( stderr, "\n" ); for( j=0; topolpick[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][1][j] ); fprintf( stderr, "\n" ); #endif pos = 0; // for( j=0; topolpick[i][0][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][0][j]][k]); k++ ) prof1[pos++] = cptr; for( intptr=topolpick[i][0]; *intptr>-1; intptr++ ) for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) prof1[pos++] = cptr; nprof1 = pos; pos = 0; // for( j=0; topolpick[i][1][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][1][j]][k]); k++ ) prof2[pos++] = cptr; for( intptr=topolpick[i][1]; *intptr>-1; intptr++ ) for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) prof2[pos++] = cptr; nprof2 = pos; profilealignment2( nprof1, nprof2, prof1, prof2, localloclen, alg ); #if 0 for( j=0; jend - block->start + 1 - newlen ); // fprintf( stderr, "zure = %d, localloclen=%d, newlen=%d\n", zure, localloclen, newlen ); if( *fullseqlenpt < strlen( fullseq[0] ) - (block->end-block->start+1) + newlen + 1 ) { *fullseqlenpt = strlen( fullseq[0] ) * 2; fprintf( stderr, "reallocating..." ); for( i=0; iend+1; for( i=0; istart, tmpseq0 ); } for( i=0; istart, tmpseq0 ); } FreeCharMtx( tmpaln0 ); FreeCharMtx( tmpaln1 ); FreeCharMtx( tmpseq ); for( i=0; istart; n0 = 0; n1 = 0; for( i=0; istart, block->end - block->start + 1 ); tmpseq[0][block->end - block->start + 1] = 0; commongappick( 1, tmpseq ); // if( strlen( tmpseq[0] ) > 0 ) if( tmpseq[0][0] != 0 ) { if( i < norg ) { fprintf( stderr, "BUG!!!!\n" ); exit( 1 ); } strcpy( tmpaln0[n0], tmpseq[0] ); iinf0[n0] = i; n0++; } else { strcpy( tmpaln1[n0], "" ); iinf1[n1] = i; n1++; } } for( i=1; istart, tmpaln0[i], newlen ); for( i=0; istart, tmpaln1[i], newlen ); } posinold = block->end+1; posinnew = block->start + newlen; zure = ( block->end - block->start + 1 - strlen( tmpaln0[0] ) ); for( i=0; i 0 && l[i] > 0 ) { if( pg < l[i] ) { c[i] = l[i] - pg; } else { c[i] = 0; } } else { c[i] = l[i]; } prep = p[i]; } } void gaplist2alnx( int len, char *a, char *s, int *l, int *p, int lenlimit ) { int gaplen; int pos, pi, posl; int prevp = -1; int reslen = 0; char *sp; // char *abk = a; #if 0 int i; char *abk = a; fprintf( stderr, "s = %s\n", s ); fprintf( stderr, "posmap = " ); for( i=0; i lenlimit ) { fprintf( stderr, "Length over. Please recompile!\n" ); exit( 1 ); } while( gaplen-- ) *a++ = '-'; pos = prevp + 1; sp = s + pos; if( ( posl = pi - pos ) ) { if( ( reslen += posl ) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( posl-- ) *a++ = *sp++; } if( reslen++ > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } *a++ = *sp; prevp = pi; } gaplen = *l; pi = *p; if( (reslen+=gaplen) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( gaplen-- ) *a++ = '-'; pos = prevp + 1; sp = s + pos; if( ( posl = pi - pos ) ) { if( ( reslen += posl ) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( posl-- ) *a++ = *sp++; } *a = 0; // fprintf( stderr, "reslen = %d, strlen(a) = %d\n", reslen, strlen( abk ) ); // fprintf( stderr, "a = %s\n", abk ); } static void makenewgaplist( int *l, char *a ) { while( 1 ) { while( *a == '=' ) { a++; (*l)++; // fprintf( stderr, "a[] (i) = %s, *l=%d\n", a, *(l) ); } *++l = 0; if( *a == 0 ) break; a++; } *l = -1; } void arguments( int argc, char *argv[] ) { int c; nthread = 1; outnumber = 0; scoreout = 0; treein = 0; topin = 0; rnaprediction = 'm'; rnakozo = 0; nevermemsave = 0; inputfile = NULL; addfile = NULL; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; // chuui force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNAppenalty_ex = NOTSPECIFIED; RNApthr = NOTSPECIFIED; TMorJTT = JTT; consweight_multi = 1.0; consweight_rna = 0.0; nadd = 0; multidist = 0; tuplesize = -1; legacygapcost = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'R': rnaprediction = 'r'; break; case 's': RNAscoremtx = 'r'; break; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case 't': treeout = 1; break; case 'T': noalign = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif case 'S': scoreout = 1; break; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; case 's': treemethod = 's'; break; #endif case 'X': treemethod = 'X'; break; case 'E': treemethod = 'E'; break; case 'q': treemethod = 'q'; break; case 'n' : outnumber = 1; break; #if 0 case 'a': alg = 'a'; break; #endif case 'Q': alg = 'Q'; break; case 'H': alg = 'H'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'N': nevermemsave = 1; break; case 'B': break; case 'F': use_fft = 1; break; case 'G': force_fft = 1; use_fft = 1; break; case 'U': treein = 1; break; case 'V': topin = 1; break; case 'u': tbrweight = 0; weight = 0; break; case 'v': tbrweight = 3; break; case 'd': multidist = 1; break; case 'W': tuplesize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'd': disp = 1; break; #endif /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; case 'L': legacygapcost = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static float treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo ) { int i, l, m; int len1nocommongap, len2nocommongap; int len1, len2; int clus1, clus2; float pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; double *effarr1_kozo = NULL; double *effarr2_kozo = NULL; LocalHom ***localhomshrink = NULL; int *fftlog; int m1, m2; int *gaplen; int *gapmap; int *alreadyaligned; float dumfl = 0.0; int ffttry; RNApair ***grouprna1, ***grouprna2; if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } fftlog = AllocateIntVec( nseq ); effarr1 = AllocateDoubleVec( nseq ); effarr2 = AllocateDoubleVec( nseq ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); alreadyaligned = AllocateIntVec( nseq ); if( constraint ) { localhomshrink = (LocalHom ***)calloc( nseq, sizeof( LocalHom ** ) ); #if SMALLMEMORY if( multidist ) { for( i=0; i 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // for( i=0; i 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; // 2013/Jul17 commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } else if( alg == 'H' ) { imp_match_init_strictH( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } else if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } else if( alg == 'R' ) { imp_match_init_strictR( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'A' ): pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'Q' ): pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'R' ): pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; case( 'H' ): pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); // fprintf( stderr, "aseq[last] = %s\n", aseq[nseq-1] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; /* fprintf( stderr, "after align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "after align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ // writePre( nseq, name, nlen, aseq, 0 ); if( disp ) display( aseq, nseq ); if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. { adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] ); restorecommongaps( nseq, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus2, 0, mseq2, gaplen ); insertnewgaps( nseq, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' ); // for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( mergeoralign[l] == '2' ) { adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] ); restorecommongaps( nseq, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( nseq, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' ); // for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } #if 0 free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); #endif } #if SCOREOUT fprintf( stderr, "totalscore = %10.2f\n\n", tscore ); #endif free( gaplen ); free( gapmap ); if( rnakozo && rnaprediction == 'm' ) { free( grouprna1 ); free( grouprna2 ); } free( fftlog ); // iranai free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); free( alreadyaligned ); if( constraint ) { for( i=0; ithread_no; int *iaddshare = targ->iaddshare; #endif int njob = targ->njob; int *follows = targ->follows; int nadd = targ->nadd; int *nlen = targ->nlen; char **name = targ->name; char **seq = targ->seq; LocalHom **localhomtable = targ->localhomtable; float **iscore = targ->iscore; float **nscore = targ->nscore; int *istherenewgap = targ->istherenewgap; int **newgaplist = targ->newgaplist; RNApair ***singlerna = targ->singlerna; double *eff_kozo_mapped = targ->eff_kozo_mapped; int alloclen = targ->alloclen; Treedep *dep = targ->dep; int ***topol = targ->topol; float **len = targ->len; Addtree *addtree = targ->addtree; float pscore; // fprintf( stderr, "\nPreparing thread %d\n", thread_no ); norg = njob - nadd; njobc = norg+1; addmem = AllocateIntVec( nadd+1 ); depc = (Treedep *)calloc( njobc, sizeof( Treedep ) ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); bseq = AllocateCharMtx( njobc, alloclen ); namec = AllocateCharMtx( njob, 0 ); nlenc = AllocateIntVec( njob ); mergeoralign = AllocateCharVec( njob ); if( constraint ) { localhomtablec = (LocalHom **)calloc( njobc, sizeof( LocalHom *) ); // motto chiisaku dekiru. #if SMALLMEMORY if( multidist ) { for( i=0; imutex_counter ); iadd = *iaddshare; if( iadd == nadd ) { pthread_mutex_unlock( targ->mutex_counter ); break; } fprintf( stderr, "\r%d / %d (thread %d) \r", iadd, nadd, thread_no ); ++(*iaddshare); pthread_mutex_unlock( targ->mutex_counter ); } else #endif { iadd++; if( iadd == nadd ) break; fprintf( stderr, "\r%d / %d \r", iadd, nadd ); } for( i=0; i 0 ) { for( i=0; imutex_counter ); fprintf( stdout, "\nmergeoralign (iadd=%d) = ", iadd ); for( i=0; imutex_counter ); #endif singlerna = NULL; pscore = treebase( njobc, nlenc, bseq, 1, mergeoralign, mseq1, mseq2, topolc, effc, &alloclen, localhomtablec, singlerna, eff_kozo_mapped ); #if 0 pthread_mutex_lock( targ->mutex_counter ); // fprintf( stdout, "res (iadd=%d) = %s, pscore=%f\n", iadd, bseq[norg], pscore ); // fprintf( stdout, "effc (iadd=%d) = ", iadd ); // for( i=0; imutex_counter ); #endif #if 0 fprintf( trap_g, "done.\n" ); fclose( trap_g ); #endif // fprintf( stdout, "\n>seq[%d, iadd=%d] = \n%s\n", norg+iadd, iadd, seq[norg+iadd] ); // fprintf( stdout, "\n>bseq[%d, iadd=%d] = \n%s\n", norg, iadd, bseq[norg] ); strcpy( seq[norg+iadd], bseq[norg] ); rep = -1; for( i=0; i maxl ) maxl = nogaplen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); // makepointtable_nuc( pointt[i], grpseq ); // makepointtable_nuc_octet( pointt[i], grpseq ); if( tuplesize == 10 ) makepointtable_nuc_dectet( pointt[i], grpseq ); else if( tuplesize == 6 ) makepointtable_nuc( pointt[i], grpseq ); else { fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize ); exit( 1 ); } } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } if( nunknown ) fprintf( stderr, "\nWARNING : %d unknown characters\n", nunknown ); for( i=0; i 0 ) { distancematrixthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos = 0; targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i nogaplen[j] ) { longer=(float)nogaplen[i]; shorter=(float)nogaplen[j]; } else { longer=(float)nogaplen[j]; shorter=(float)nogaplen[i]; } lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); bunbo = MIN( selfscore[i], selfscore[j] ); if( j < norg ) { if( bunbo == 0.0 ) imtx[i][j-i] = 1.0; else imtx[i][j-i] = ( 1.0 - mtxv / bunbo ) * lenfac; } else { if( bunbo == 0.0 ) nmtx[i][j-norg] = 1.0; else nmtx[i][j-norg] = ( 1.0 - mtxv / bunbo ) * lenfac; } } free( table1 ); } } fprintf( stderr, "\ndone.\n\n" ); fflush( stderr ); for( i=0; i 0 ) { dndprethread_arg_t *targ; Jobtable2d jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthread, sizeof( dndprethread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 9.0 || mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j-i] = mtxv; } } } #if TEST for( i=0; i 1000 || nadd > 1000 ) use_fft = 0; fullseqlen = alloclen = nlenmax*4+1; //chuui! seq = AllocateCharMtx( njob, alloclen ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); if( multidist || tuplesize > 0 ) { iscore = AllocateFloatHalfMtx( norg ); nscore = AllocateFloatMtx( norg, nadd ); } else { iscore = AllocateFloatHalfMtx( njob ); nscore = NULL; } kozoarivec = AllocateCharVec( njob ); ordertable = AllocateIntVec( norg+1 ); if( constraint ) { #if SMALLMEMORY if( multidist ) { localhomtable = (LocalHom **)calloc( norg, sizeof( LocalHom *) ); for( i=0; i 0 ) // if mtx is internally computed { if( multidist == 1 ) { ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); // iscore ha muda. // hat2p = fopen( "hat2-1", "w" ); // WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore ); // fclose( hat2p ); dndpre( norg, seq, iscore ); // fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); // prep = fopen( "hat2i", "r" ); // if( prep == NULL ) ErrorExit( "Make hat2i." ); // readhat2_floathalf_pointer( prep, njob-nadd, name, iscore ); // fclose( prep ); // fprintf( stderr, "done.\n" ); // hat2p = fopen( "hat2-2", "w" ); // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore ); // fclose( hat2p ); } else { ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); } } else { if( multidist == 1 ) { fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " ); prep = fopen( "hat2n", "r" ); if( prep == NULL ) ErrorExit( "Make hat2n." ); readhat2_floathalf_part_pointer( prep, njob, nadd, name, nscore ); fclose( prep ); fprintf( stderr, "done.\n" ); fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); prep = fopen( "hat2i", "r" ); if( prep == NULL ) ErrorExit( "Make hat2i." ); readhat2_floathalf_pointer( prep, njob-nadd, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } else { fprintf( stderr, "Loading 'hat2' ... " ); prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_floathalf_pointer( prep, njob, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } } #if 1 if( distout ) { fprintf( stderr, "Error in v6.936!! Please contact kazutaka.katoh@aist.go.jp\n" ); exit( 1 ); hat2p = fopen( "hat2", "w" ); WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore ); fclose( hat2p ); exit( 1 ); } #endif singlerna = NULL; commongappick( norg, seq ); lenfull = strlen( seq[0] ); // newgaplist_o = AllocateIntMtx( nadd, alloclen ); //ookisugi newgaplist_o = AllocateIntMtx( nadd, lenfull*2 ); newgaplist_compact = AllocateIntVec( lenfull*2 ); istherenewgap = AllocateIntVec( nadd ); follower = AllocateIntMtx( norg, 1 ); for( i=0; i 1 ) cnctintvec( ordertable, topol[norg-2][0], topol[norg-2][1] ); else { ordertable[0] = 0; ordertable[1] = -1; } FreeFloatHalfMtx( iscoreo, norg ); #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex_counter; thread_arg_t *targ; int *iaddsharept; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); iaddsharept = calloc( 1, sizeof(int) ); *iaddsharept = 0; for( i=0; i 0 ) FreeFloatMtx( nscore ); // for( i=0; i%s (%d) \n%s\n", name[norg+i], norg+i, seq[norg+i] ); if( treeout ) { fp = fopen( "infile.tree", "a" ); if( fp == 0 ) { fprintf( stderr, "File error!\n" ); exit( 1 ); } for( i=0; i %d\n", follower[i][j]+norg, i ); } fclose( orderfp ); posmap = AllocateIntVec( lenfull+2 ); realign = calloc( lenfull+2, sizeof( Blocktorealign ) ); for( i=0; i= fullseqlen ) { fullseqlen = tmplen * 2+1; // fprintf( stderr, "Length over!\n" ); // fprintf( stderr, "strlen(tmpseq1)=%d\n", (int)strlen( tmpseq1 ) ); fprintf( stderr, "reallocating..." ); // fprintf( stderr, "alloclen=%d\n", alloclen ); // fprintf( stderr, "Please recompile!\n" ); // exit( 1 ); for( i=0; i 0 && ien > 500 ) { gaplist2alnxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; fprintf( stderr, "%d / %d (threads %d-%d)\r", iadd, nadd, 0, nthread ); targ = calloc( nthread, sizeof( gaplist2alnxthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); jobpos = 1; for( i=0; i%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 ); strcpy( seq[i], tmpseq1 ); } } } tmpseq1 = tmpseq[0]; // insertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); insertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); // fprintf( stderr, "%d = %s\n", iadd, tmpseq1 ); eq2dash( tmpseq1 ); strcpy( seq[norg+iadd], tmpseq1 ); // adjustposmap( lenfull, posmap, newgaplist_o[iadd] ); adjustposmap( lenfull, posmap, newgaplist_compact ); countnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda? // countnewres( lenfull, realign, posmap, newgaplist_compact ); // muda? } fprintf( stderr, "\r done. \n\n" ); #if 0 for( i=0; i%s\n", name[i] ); fprintf( stdout, "%s\n", seq[i] ); } #endif #if 0 fprintf( stderr, "realign[].nnewres = " ); for( i=0; i 1 ) { // fprintf( stderr, "i=%d: %d-%d\n", i, realign[i].start, realign[i].end ); fprintf( stderr, "\rRealigning %d/%d \r", i, lenfull ); // zure = dorealignment_compact( realign+i, seq, &fullseqlen, norg ); // zure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows ); zure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows ); #if 0 gappick0( check1, seq[0] ); fprintf( stderr, "check1 = %s\n", check1 ); if( strcmp( check1, check2 ) ) { fprintf( stderr, "CHANGED!!!!!\n" ); exit( 1 ); } #endif for( j=i+1; jaln0[%d] = \n%s\n", i, aln0[i] ); for( i=0; ialn1[%d] = \n%s\n", i, aln1[i] ); for( i=0; ialn2[%d] = \n%s\n", i, aln2[i] ); #endif free( effarr0 ); free( effarr2 ); } void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap { int i, j, newlen; double *effarr0, *effarr2; float dumfl; double eff; effarr0 = AllocateDoubleVec( n0 ); effarr2 = AllocateDoubleVec( n2 ); commongappick( n0, aln0 ); commongappick( n2, aln2 ); eff = 1.0 / (double)n0; for( i=0; ialn0[%d] = \n%s\n", i, aln0[i] ); for( i=0; ialn1[%d] = \n%s\n", i, aln1[i] ); for( i=0; ialn2[%d] = \n%s\n", i, aln2[i] ); #endif free( effarr0 ); free( effarr2 ); } void eq2dash( char *s ) { while( *s ) { if( *s == '=' ) *s = '-'; s++; } } void findnewgaps( int n, int rep, char **seq, int *gaplen ) { int i, pos, len, len1; len = strlen( seq[0] ); // for( i=0; i/ then output += " " + num.to_s num += 1 end end fp.close puts output + " # " + file end mafft-7.123-without-extensions/core/mafft.10000640000076500007650000003466311514755236017661 0ustar katohkatoh.\" Title: MAFFT .\" Author: Kazutaka Katoh .\" Generator: DocBook XSL Stylesheets v1.72.0 .\" Date: 2007-08-14 .\" Manual: Mafft Manual .\" Source: mafft 6.240 .\" .TH "MAFFT" "1" "2007\-06\-09" "mafft 6.240" "Mafft Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "THIS MANUAL IS FOR V6.2XX (2007)" Recent versions (v6.8xx; 2010 Nov.) have more features than those described below. See also the tips page at http://mafft.cbrc.jp/alignment/software/tips0.html .SH "NAME" .RS 0 .sp mafft \- Multiple alignment program for amino acid or nucleotide sequences .RE .SH "SYNOPSIS" .RS 0 .HP 6 \fBmafft\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBginsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 7 \fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBfftns\fR \fIinput\fR [>\ \fIoutput\fR] .HP 5 \fBnwns\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 14 \fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR] .HP .sp \fIinput\fR, \fIgroup1\fR and \fIgroup2\fR must be in FASTA format. .RE .SH "DESCRIPTION" .RS 0 \fBMAFFT\fR is a multiple sequence alignment program for unix\-like operating systems. It offers a range of multiple alignment methods. .SS "Accuracy\-oriented methods:" .sp .RS 4 \h'-04'\(bu\h'+03'L\-INS\-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): .HP 6 \fBmafft\fR \fB\-\-localpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'G\-INS\-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): .HP 6 \fBmafft\fR \fB\-\-globalpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBginsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'E\-INS\-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): .HP 6 \fBmafft\fR \fB\-\-ep\fR\ \fI0\fR \fB\-\-genafpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR] .br For E\-INS\-i, the \fB\-\-ep\fR \fI0\fR option is recommended to allow large gaps. .RE .SS "Speed\-oriented methods:" .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; two cycles only): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fIinput\fR [>\ \fIoutput\fR] .HP 7 \fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; max. 1000 iterations): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-2 (fast; progressive method): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBfftns\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'NW\-NS\-i (iterative refinement method without FFT approximation; two cycles only): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR] .HP 7 \fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'NW\-NS\-2 (fast; progressive method without the FFT approximation): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBnwns\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'NW\-NS\-PartTree\-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fB\-\-parttree\fR \fIinput\fR [>\ \fIoutput\fR] .RE .SS "Group\-to\-group alignments" .HP 6 .RS 4 \fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR] .sp or: .sp \fBmafft\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fB\-\-seed\fR\ \fIgroup1\fR \fB\-\-seed\fR\ \fIgroup2\fR /dev/null [>\ \fIoutput\fR] .RE .RE .RE .SH "OPTIONS" .SS "Algorithm" .RS 0 .PP \fB\-\-auto\fR .RS 4 Automatically selects an appropriate strategy from L\-INS\-i, FFT\-NS\-i and FFT\-NS\-2, according to data size. Default: off (always FFT\-NS\-2) .RE .PP \fB\-\-6merpair\fR .RS 4 Distance is calculated based on the number of shared 6mers. Default: on .RE .PP \fB\-\-globalpair\fR .RS 4 All pairwise alignments are computed with the Needleman\-Wunsch algorithm. More accurate but slower than \-\-6merpair. Suitable for a set of globally alignable sequences. Applicable to up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (G\-INS\-i). Default: off (6mer distance is used) .RE .PP \fB\-\-localpair\fR .RS 4 All pairwise alignments are computed with the Smith\-Waterman algorithm. More accurate but slower than \-\-6merpair. Suitable for a set of locally alignable sequences. Applicable to up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (L\-INS\-i). Default: off (6mer distance is used) .RE .PP \fB\-\-genafpair\fR .RS 4 All pairwise alignments are computed with a local algorithm with the generalized affine gap cost (Altschul 1998). More accurate but slower than \-\-6merpair. Suitable when large internal gaps are expected. Applicable to up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (E\-INS\-i). Default: off (6mer distance is used) .RE .\".PP .\"\fB\-\-fastswpair\fR .\".RS 4 .\"Distance is calculated based on a FASTA alignment. .\"FASTA is required. Default: off (6mer distance is used) .\".RE .PP \fB\-\-fastapair\fR .RS 4 All pairwise alignments are computed with FASTA (Pearson and Lipman 1988). FASTA is required. Default: off (6mer distance is used) .RE .\".PP .\"\fB\-\-blastpair\fR .\".RS 4 .\"Distance is calculated based on a BLAST alignment. BLAST is .\"required. Default: off (6mer distance is used) .\".RE .PP \fB\-\-weighti\fR \fInumber\fR .RS 4 Weighting factor for the consistency term calculated from pairwise alignments. Valid when either of \-\-globalpair, \-\-localpair, \-\-genafpair, \-\-fastapair or \-\-blastpair is selected. Default: 2.7 .RE .PP \fB\-\-retree\fR \fInumber\fR .RS 4 Guide tree is built \fInumber\fR times in the progressive stage. Valid with 6mer distance. Default: 2 .RE .PP \fB\-\-maxiterate\fR \fInumber\fR .RS 4 \fInumber\fR cycles of iterative refinement are performed. Default: 0 .RE .PP \fB\-\-fft\fR .RS 4 Use FFT approximation in group\-to\-group alignment. Default: on .RE .PP \fB\-\-nofft\fR .RS 4 Do not use FFT approximation in group\-to\-group alignment. Default: off .RE .PP \fB\-\-noscore\fR .RS 4 Alignment score is not checked in the iterative refinement stage. Default: off (score is checked) .RE .PP \fB\-\-memsave\fR .RS 4 Use the Myers\-Miller (1988) algorithm. Default: automatically turned on when the alignment length exceeds 10,000 (aa/nt). .RE .PP \fB\-\-parttree\fR .RS 4 Use a fast tree\-building method (PartTree, Katoh and Toh 2007) with the 6mer distance. Recommended for a large number (> ~10,000) of sequences are input. Default: off .RE .PP \fB\-\-dpparttree\fR .RS 4 The PartTree algorithm is used with distances based on DP. Slightly more accurate and slower than \-\-parttree. Recommended for a large number (> ~10,000) of sequences are input. Default: off .RE .PP \fB\-\-fastaparttree\fR .RS 4 The PartTree algorithm is used with distances based on FASTA. Slightly more accurate and slower than \-\-parttree. Recommended for a large number (> ~10,000) of sequences are input. FASTA is required. Default: off .RE .PP \fB\-\-partsize\fR \fInumber\fR .RS 4 The number of partitions in the PartTree algorithm. Default: 50 .RE .PP \fB\-\-groupsize\fR \fInumber\fR .RS 4 Do not make alignment larger than \fInumber\fR sequences. Valid only with the \-\-*parttree options. Default: the number of input sequences .RE .RE .SS "Parameter" .RS 0 .PP \fB\-\-op\fR \fInumber\fR .RS 4 Gap opening penalty at group\-to\-group alignment. Default: 1.53 .RE .PP \fB\-\-ep\fR \fInumber\fR .RS 4 Offset value, which works like gap extension penalty, for group\-to\-group alignment. Default: 0.123 .RE .PP \fB\-\-lop\fR \fInumber\fR .RS 4 Gap opening penalty at local pairwise alignment. Valid when the \-\-localpair or \-\-genafpair option is selected. Default: \-2.00 .RE .PP \fB\-\-lep\fR \fInumber\fR .RS 4 Offset value at local pairwise alignment. Valid when the \-\-localpair or \-\-genafpair option is selected. Default: 0.1 .RE .PP \fB\-\-lexp\fR \fInumber\fR .RS 4 Gap extension penalty at local pairwise alignment. Valid when the \-\-localpair or \-\-genafpair option is selected. Default: \-0.1 .RE .PP \fB\-\-LOP\fR \fInumber\fR .RS 4 Gap opening penalty to skip the alignment. Valid when the \-\-genafpair option is selected. Default: \-6.00 .RE .PP \fB\-\-LEXP\fR \fInumber\fR .RS 4 Gap extension penalty to skip the alignment. Valid when the \-\-genafpair option is selected. Default: 0.00 .RE .PP \fB\-\-bl\fR \fInumber\fR .RS 4 BLOSUM \fInumber\fR matrix (Henikoff and Henikoff 1992) is used. \fInumber\fR=30, 45, 62 or 80. Default: 62 .RE .PP \fB\-\-jtt\fR \fInumber\fR .RS 4 JTT PAM \fInumber\fR (Jones et al. 1992) matrix is used. \fInumber\fR>0. Default: BLOSUM62 .RE .PP \fB\-\-tm\fR \fInumber\fR .RS 4 Transmembrane PAM \fInumber\fR (Jones et al. 1994) matrix is used. \fInumber\fR>0. Default: BLOSUM62 .RE .PP \fB\-\-aamatrix\fR \fImatrixfile\fR .RS 4 Use a user\-defined AA scoring matrix. The format of \fImatrixfile\fR is the same to that of BLAST. Ignored when nucleotide sequences are input. Default: BLOSUM62 .RE .PP \fB\-\-fmodel\fR .RS 4 Incorporate the AA/nuc composition information into the scoring matrix. Default: off .RE .RE .SS "Output" .RS 0 .PP \fB\-\-clustalout\fR .RS 4 Output format: clustal format. Default: off (fasta format) .RE .PP \fB\-\-inputorder\fR .RS 4 Output order: same as input. Default: on .RE .PP \fB\-\-reorder\fR .RS 4 Output order: aligned. Default: off (inputorder) .RE .PP \fB\-\-treeout\fR .RS 4 Guide tree is output to the \fIinput\fR.tree file. Default: off .RE .PP \fB\-\-quiet\fR .RS 4 Do not report progress. Default: off .RE .RE .SS "Input" .RS 0 .PP \fB\-\-nuc\fR .RS 4 Assume the sequences are nucleotide. Default: auto .RE .PP \fB\-\-amino\fR .RS 4 Assume the sequences are amino acid. Default: auto .RE .PP \fB\-\-seed\fR \fIalignment1\fR [\fB--seed\fR \fIalignment2\fR \fB--seed\fR \fIalignment3\fR ...] .RS 4 Seed alignments given in \fIalignment_n\fR (fasta format) are aligned with sequences in \fIinput\fR. The alignment within every seed is preserved. .RE .RE .SH "FILES" .RS 0 .PP Mafft stores the input sequences and other files in a temporary directory, which by default is located in \fI/tmp\fR. .RE .SH "ENVIONMENT" .RS 0 .PP \fBMAFFT_BINARIES\fR .RS 4 Indicates the location of the binary files used by mafft. By default, they are searched in \fI/usr/local/lib/mafft\fR, but on Debian systems, they are searched in \fI/usr/lib/mafft\fR. .RE .PP \fBFASTA_4_MAFFT\fR .RS 4 This variable can be set to indicate to mafft the location to the fasta34 program if it is not in the PATH. .RE .RE .SH "SEE ALSO" .RS 0 .PP \fBmafft\-homologs\fR(1) .RE .SH "REFERENCES" .RS 0 .SS "In English" .sp .RS 4 \h'-04'\(bu\h'+03'Katoh and Toh (Bioinformatics 23:372\-374, 2007) PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences (describes the PartTree algorithm). .RE .sp .RS 4 \h'-04'\(bu\h'+03'Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment (describes [ancestral versions of] the G\-INS\-i, L\-INS\-i and E\-INS\-i strategies) .RE .sp .RS 4 \h'-04'\(bu\h'+03'Katoh, Misawa, Kuma and Miyata (Nucleic Acids Res. 30:3059\-3066, 2002) MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform (describes the FFT\-NS\-1, FFT\-NS\-2 and FFT\-NS\-i strategies) .RE .SS "In Japanese" .sp .RS 4 \h'-04'\(bu\h'+03'Katoh and Misawa (Seibutsubutsuri 46:312\-317, 2006) Multiple Sequence Alignments: the Next Generation .RE .sp .RS 4 \h'-04'\(bu\h'+03'Katoh and Kuma (Kagaku to Seibutsu 44:102\-108, 2006) Jissen\-teki Multiple Alignment .RE .RE .SH "AUTHORS" .RS 0 .PP \fBKazutaka Katoh\fR <\&kazutaka.katoh_at_aist.go.jp\&> .sp -1n .IP "" 4 Wrote Mafft. .PP \fBCharles Plessy\fR <\&charles\-debian\-nospam_at_plessy.org\&> .sp -1n .IP "" 4 Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. .RE .SH "COPYRIGHT" .RS 0 Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) .br Copyright \(co 2007 Charles Plessy (this manpage) .br .PP Mafft and its manpage are offered under the following conditions: .PP Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: .sp .RS 4 \h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. .RE .sp .RS 4 \h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. .RE .sp .RS 4 \h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. .RE .PP THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .br .RE mafft-7.123-without-extensions/core/treeOperation.c0000640000076500007650000004021511162641174021446 0ustar katohkatoh#include "mltaln.h" #define DEBUG 0 #define EF_THREEWAY 1.0 #define MAXBW 1.0 #define MINBW 0.01 #define MINLEN 0.001 #if DEBUG Node *stopol_g; #endif void checkMinusLength( int nseq, double **len ) { int i, j; for( i=0; i= 0 ) { free( tmpintvec[numintvec] ); } free( tmpintvec ); numintvec = 0; #endif } void treeCnv( Node *stopol, int locnseq, int ***topol, double **len, double **bw ) { int i; NodeInCub parent; int *count; int ccount; int rep; int tmpint; static int **tmpintvec = NULL; static int numintvec = 0; count = AllocateIntVec( 2 * locnseq ); /* oome */ if( !count ) ErrorExit( "Cannot allocate count.\n" ); checkMinusLength( locnseq, len ); /* uwagaki */ stopolInit( locnseq * 2, stopol ); for( i=0; ilength[0] ); } for( i=0, count=0; i<3; i++ ) { #if DEBUG fprintf( stderr, "ob->tmpChildren[%d] = %d\n", i, ob->tmpChildren[i] ); #endif if( oppositeNode != ob->children[i] ) dir_ch[count++] = i; else dir_pa = i; } #if DEBUG fprintf( stderr, "\n" ); #endif if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, oppositeNode-stopol_g ); #endif ErrorExit( "Invalid call\n" ); } tmpvalue0 = syntheticLength( ob->children[dir_ch[0]], ob ); tmpvalue1 = syntheticLength( ob->children[dir_ch[1]], ob ); #if DEBUG fprintf( stderr, "tmpvalue0 = %f\n", tmpvalue0 ); fprintf( stderr, "tmpvalue1 = %f\n", tmpvalue1 ); #endif if( tmpvalue0 ) tmpvalue0 = 1.0 / tmpvalue0; else nanflag = 1; if( tmpvalue1 ) tmpvalue1 = 1.0 / tmpvalue1; else nanflag = 1; if( nanflag ) value = 0.0; else { value = tmpvalue0 + tmpvalue1; value = 1.0 / value; } value += ob->length[dir_pa]; #if DEBUG fprintf( stderr, "value = %f\n", value ); #endif return( value ); } double calcW( Node *ob, Node *op ) { int i, count; int dir_ch[3]; int dir_pa = -10; // by katoh double a, b, c, f, s; double value; if( isLeaf( *ob ) ) return( 1.0 ); for( i=0, count=0; i<3; i++ ) { if( op != ob->children[i] ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) ErrorExit( "Invalid call of calcW\n" ); #if DEBUG fprintf( stderr, "In calcW\n" ); fprintf( stderr, "ob = %d\n", ob - stopol_g ); fprintf( stderr, "op = %d\n", op - stopol_g ); fprintf( stderr, "ob->children[c1] = %d\n", ob->children[dir_ch[0]] - stopol_g ); fprintf( stderr, "ob->children[c2] = %d\n", ob->children[dir_ch[1]] - stopol_g ); fprintf( stderr, "ob->children[pa] = %d\n", ob->children[dir_pa] - stopol_g ); fprintf( stderr, "\n" ); #endif a = syntheticLength( ob->children[dir_ch[0]], ob ); b = syntheticLength( ob->children[dir_ch[1]], ob ); c = syntheticLength( ob->children[dir_pa], ob ); #if DEBUG fprintf( stderr, "a = %f\n", a ); fprintf( stderr, "b = %f\n", b ); fprintf( stderr, "c = %f\n", c ); #endif if( !c ) return( MAXBW ); if ( !a || !b ) return( MINBW ); /* ? */ f = EF_THREEWAY; s = ( b*c + c*a + a*b ); value = a*b*(c+a)*(c+b) / ( c*(a+b) * f * s ); value = sqrt( value ); return( value ); } void calcBranchWeight( double **bw, int locnseq, Node *stopol, int ***topol, double **len ) { NodeInCub parent; int i; int rep; Node *topNode, *btmNode; double topW, btmW; for( i=locnseq; ichildren[i] != op ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, op-stopol_g ); #endif ErrorExit( "Incorrect call of weightFromABranch_rec" ); } for( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ ) result[n] *= *ob->weightptr[dir_ch[0]]; weightFromABranch_rec( result, ob->children[dir_ch[0]], ob ); for( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ ) result[n] *= *ob->weightptr[dir_ch[1]]; weightFromABranch_rec( result, ob->children[dir_ch[1]], ob ); } void weightFromABranch( int nseq, double *result, Node *stopol, int ***topol, int step, int LorR ) { Node *topNode, *btmNode; int i; if( step == nseq - 2 ) { topNode = stopol[nseq-2].children[0]; btmNode = stopol + nseq-3; #if DEBUG fprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\n", topNode - stopol_g, btmNode-stopol_g ); #endif } else { for( i=0; i<3; i++ ) { if( stopol[step].members[i][0] == topol[step][LorR][0] ) break; } if( i== 3 ) ErrorExit( "Incorrect call of weightFromABranch." ); btmNode = stopol[step].children[i]; topNode = stopol+step; } for( i=0; ichildren[i] != op ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, op-stopol_g ); #endif ErrorExit( "Incorrect call of weightFromABranch_rec" ); } // fprintf( stderr, "\n" ); sumweight = 0.0; count = 0; lastkozo = -1; for( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ ) { // fprintf( stderr, "member1! n=%d\n", n ); sumweight += seqweight[n]; if( kozoari[n] ) { count++; lastkozo = n; } } for( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ ) { // fprintf( stderr, "member2! n=%d\n", n ); sumweight += seqweight[n]; if( kozoari[n] ) { count++; lastkozo = n; } } // fprintf( stderr, "count = %d\n", count ); if( count == 1 ) strweight[lastkozo] = sumweight; else if( count > 1 ) { assignstrweight_rec( strweight, ob->children[dir_ch[0]], ob, kozoari, seqweight ); assignstrweight_rec( strweight, ob->children[dir_ch[1]], ob, kozoari, seqweight ); } } void assignstrweight( int nseq, double *strweight, Node *stopol, int ***topol, int step, int LorR, char *kozoari, double *seqweight ) { Node *topNode, *btmNode; int i; if( step == nseq - 2 ) { topNode = stopol[nseq-2].children[0]; btmNode = stopol + nseq-3; #if DEBUG fprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\n", topNode - stopol_g, btmNode-stopol_g ); #endif } else { for( i=0; i<3; i++ ) { if( stopol[step].members[i][0] == topol[step][LorR][0] ) break; } if( i== 3 ) ErrorExit( "Incorrect call of weightFromABranch." ); btmNode = stopol[step].children[i]; topNode = stopol+step; } for( i=0; i-1; i++ ) fprintf( stderr, "%3d ", topol[step][0][i] ); fprintf( stderr, "\n" ); for( i=0; topol[step][1][i]>-1; i++ ) fprintf( stderr, "%3d ", topol[step][1][i] ); fprintf( stderr, "\n" ); for( i=0; i= 3 #define WEIGHT 0 #define TOKYORIPARA 0.70 // 0.70 #define TOKYORIPARA_A 0.70 // changed #define LENFAC 1 #define HUKINTOTREE 1 #define DIANA 0 #define MAX6DIST 10.0 // kouzoutai ni sasareru pointer ha static #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define END_OF_VEC -1 static char *fastapath; static int doalign; static int fromaln; static int uselongest; static int treeout; static int classsize; static int picksize; static int maxl; static int tsize; static int reorder; static int pid; static int maxdepth = 0; static double tokyoripara; static double lenfaca, lenfacb, lenfacc, lenfacd; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define DLENFACA 0.01 #define DLENFACB 2500 #define DLENFACC 2500 #define DLENFACD 0.1 static char datafile[1000]; static char queryfile[1000]; static char resultfile[1000]; typedef struct _scores { double score; int selfscore; int orilen; int *pointt; int numinseq; char *name; // char *seq; // reallo // char **seqpt; int shimon; } Scores; int intcompare( const int *a, const int *b ) { return( *a - *b ); } int lcompare( const Scores *a, const Scores *b ) { if( a->orilen < b->orilen ) return 1; else if( a->orilen > b->orilen ) return -1; else return 0; } int dcompare( const Scores *a, const Scores *b ) { if( a->score > b->score ) return 1; else if( a->score < b->score ) return -1; else { if( a->selfscore < b->selfscore ) return 1; else if( a->selfscore > b->selfscore ) return -1; else { if( a->orilen < b->orilen ) return 1; else if( a->orilen > b->orilen ) return -1; else return 0; } } } static void getfastascoremtx( int **tmpaminodis ) { FILE *qfp; FILE *dfp; FILE *rfp; int i, j; char aa; int slen; int res; char com[10000]; static char *tmpseq; static char *tmpname; double *resvec; if( scoremtx == -1 ) { tmpaminodis['a']['a'] = 5; tmpaminodis['g']['g'] = 5; tmpaminodis['c']['c'] = 5; tmpaminodis['t']['t'] = 5; tmpaminodis['n']['n'] = -1; return; } tmpseq = calloc( 2000, sizeof( char ) ); tmpname = calloc( B, sizeof( char ) ); resvec = calloc( 1, sizeof( double ) ); // fprintf( stderr, "xformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); sprintf( tmpname, ">+===========+%d ", 0 ); strcpy( tmpseq, "AAAAAAXXXXXX" ); strcat( tmpseq, "CCCCCCXXXXXX" ); strcat( tmpseq, "DDDDDDXXXXXX" ); strcat( tmpseq, "EEEEEEXXXXXX" ); strcat( tmpseq, "FFFFFFXXXXXX" ); strcat( tmpseq, "GGGGGGXXXXXX" ); strcat( tmpseq, "HHHHHHXXXXXX" ); strcat( tmpseq, "IIIIIIXXXXXX" ); strcat( tmpseq, "KKKKKKXXXXXX" ); strcat( tmpseq, "LLLLLLXXXXXX" ); strcat( tmpseq, "MMMMMMXXXXXX" ); strcat( tmpseq, "NNNNNNXXXXXX" ); strcat( tmpseq, "PPPPPPXXXXXX" ); strcat( tmpseq, "QQQQQQXXXXXX" ); strcat( tmpseq, "RRRRRRXXXXXX" ); strcat( tmpseq, "SSSSSSXXXXXX" ); strcat( tmpseq, "TTTTTTXXXXXX" ); strcat( tmpseq, "VVVVVVXXXXXX" ); strcat( tmpseq, "WWWWWWXXXXXX" ); strcat( tmpseq, "YYYYYYXXXXXX" ); slen = strlen( tmpseq ); writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); fclose( dfp ); fprintf( stderr, "done.\n" ); for( i=0; i<20; i++ ) { aa = amino[i]; // fprintf( stderr, "checking %c\n", aa ); *tmpseq = 0; sprintf( tmpname, ">+===========+%d ", 0 ); for( j=0; j<6; j++ ) sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); if( scoremtx == -1 ) sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile ); else sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 2, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", fastapath ); exit( 1 ); } rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadFasta34m10_scoreonly( rfp, resvec, 1 ); fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 ); fclose( rfp ); if( ( (int)*resvec % 6 ) > 0.0 ) { fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); exit( 1 ); } tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 ); // fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] ); } tmpaminodis['X']['X'] = -1; free( tmpname ); free( tmpseq ); free( resvec ); } #if 0 static void getblastscoremtx( int **tmpaminodis ) { FILE *qfp; FILE *dfp; FILE *rfp; int i, j; char aa; int slen; int res; char com[10000]; static char *tmpseq; static char *tmpname; double *resvec; if( scoremtx == -1 ) { tmpaminodis['a']['a'] = 1; tmpaminodis['g']['g'] = 1; tmpaminodis['c']['c'] = 1; tmpaminodis['t']['t'] = 1; return; } tmpseq = calloc( 2000, sizeof( char ) ); tmpname = calloc( B, sizeof( char ) ); resvec = calloc( 1, sizeof( double ) ); // fprintf( stderr, "xformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); sprintf( tmpname, "\0", i ); // BUG!! strcpy( tmpseq, "AAAAAAXXXXXX" ); strcat( tmpseq, "CCCCCCXXXXXX" ); strcat( tmpseq, "DDDDDDXXXXXX" ); strcat( tmpseq, "EEEEEEXXXXXX" ); strcat( tmpseq, "FFFFFFXXXXXX" ); strcat( tmpseq, "GGGGGGXXXXXX" ); strcat( tmpseq, "HHHHHHXXXXXX" ); strcat( tmpseq, "IIIIIIXXXXXX" ); strcat( tmpseq, "KKKKKKXXXXXX" ); strcat( tmpseq, "LLLLLLXXXXXX" ); strcat( tmpseq, "MMMMMMXXXXXX" ); strcat( tmpseq, "NNNNNNXXXXXX" ); strcat( tmpseq, "PPPPPPXXXXXX" ); strcat( tmpseq, "QQQQQQXXXXXX" ); strcat( tmpseq, "RRRRRRXXXXXX" ); strcat( tmpseq, "SSSSSSXXXXXX" ); strcat( tmpseq, "TTTTTTXXXXXX" ); strcat( tmpseq, "VVVVVVXXXXXX" ); strcat( tmpseq, "WWWWWWXXXXXX" ); strcat( tmpseq, "YYYYYYXXXXXX" ); slen = strlen( tmpseq ); writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); fclose( dfp ); if( scoremtx == -1 ) sprintf( com, "formatdb -p f -i %s -o F", datafile ); else sprintf( com, "formatdb -i %s -o F", datafile ); system( com ); fprintf( stderr, "done.\n" ); for( i=0; i<20; i++ ) { aa = amino[i]; fprintf( stderr, "checking %c\n", aa ); *tmpseq = 0; for( j=0; j<6; j++ ) sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", "blastall" ); exit( 1 ); } rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7_scoreonly( rfp, resvec, 1 ); fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 ); fclose( rfp ); if( ( (int)*resvec % 6 ) > 0.0 ) { fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); exit( 1 ); } tmpaminodis[aa][aa] = (int)( *resvec / 6 ); } tmpaminodis['X']['X'] = 0; free( tmpname ); free( tmpseq ); free( resvec ); } #endif static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata ) { double *val; FILE *qfp; FILE *dfp; FILE *rfp; int i; char com[10000]; static char datafile[1000]; static char queryfile[1000]; static char resultfile[1000]; static int pid; static char *tmpseq; static char *tmpname; int slen; int res; static Scores *scoresbk = NULL; static int ninbk = 0; if( pid == 0 ) { pid = (int)getpid(); sprintf( datafile, "/tmp/data-%d", pid ); sprintf( queryfile, "/tmp/query-%d", pid ); sprintf( resultfile, "/tmp/fasta-%d", pid ); tmpseq = calloc( nlenmax+1, sizeof( char ) ); tmpname = calloc( B+1, sizeof( char ) ); } val = calloc( nin, sizeof( double ) ); // fprintf( stderr, "nin=%d, q=%d\n", nin, query ); if( rewritedata ) { scoresbk = scores; ninbk = nin; // fprintf( stderr, "\nformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); if( picks == NULL ) for( i=0; i+===========+%d ", i ); slen = scores[i].orilen; writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); } else for( i=0; i+===========+%d ", i ); slen = scores[picks[i]].orilen; writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); } fclose( dfp ); } gappick0( tmpseq, seq[scores[query].numinseq] ); sprintf( tmpname, ">+==========+%d ", 0 ); slen = scores[query].orilen; qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); // fprintf( stderr, "calling fasta, nin=%d\n", nin ); if( scoremtx == -1 ) sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile ); else sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", fastapath ); exit( 1 ); } // fprintf( stderr, "fasta done\n" ); //exit( 1 ); rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); // fprintf( stderr, "reading fasta\n" ); if( scoremtx == -1 ) res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin ); else res = ReadFasta34m10_scoreonly( rfp, val, nin ); // fprintf( stderr, "done. val[0] = %f\n", val[0] ); fclose( rfp ); #if 0 for( i=0; i %s\0", nin, queryfile, datafile, resultfile ); else sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile ); res = system( com ); if( res ) ErrorExit( "error in blast" ); rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7_scoreonly( rfp, val, nin ); fclose( rfp ); #if 0 for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'p': picksize = myatoi( *++argv ); fprintf( stderr, "picksize = %d\n", picksize ); --argc; goto nextoption; case 's': classsize = myatoi( *++argv ); fprintf( stderr, "groupsize = %d\n", classsize ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'T': tokyoripara = (double)atof( *++argv ); --argc; goto nextoption; case 'l': uselongest = 0; break; #if 1 case 'a': fmodel = 1; break; #endif case 'S': doalign = 'f'; break; case 'Z': fromaln = 1; break; case 'U': doalign = 1; break; case 'x': reorder = 0; break; case 't': treeout = 1; break; case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'L': legacygapcost = 1; break; #if 0 case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 'a': alg = 'a'; break; #endif case 'R': alg = 'R'; break; case 'Q': alg = 'Q'; break; case 'A': alg = 'A'; break; case 'N': nevermemsave = 1; break; case 'M': alg = 'M'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'G': use_fft = 1; force_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; case 'J': tbutree = 0; break; case 'X': treemethod = 'X'; // mix break; case 'E': treemethod = 'E'; // upg (average) break; case 'q': treemethod = 'q'; // minimum break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static int maxl; static int tsize; static int nunknown = 0; int seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else nunknown++; } *grp = END_OF_VEC; return( grp-grpbk ); } int seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else nunknown++; } *grp = END_OF_VEC; return( grp-grpbk ); } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } int commonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } #if 1 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen ) { int l, len1, len2; int clus1, clus2; float pscore, tscore; static int *fftlog; static char *indication1, *indication2; static double *effarr1 = NULL; static double *effarr2 = NULL; static char **mseq1, **mseq2; float dumfl = 0.0; int ffttry; int m1, m2; #if 0 int i, j; #endif if( effarr1 == NULL ) { fftlog = AllocateIntVec( nseq ); effarr1 = AllocateDoubleVec( nseq ); effarr2 = AllocateDoubleVec( nseq ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); mseq1 = AllocateCharMtx( nseq, 0 ); mseq2 = AllocateCharMtx( nseq, 0 ); for( l=0; l 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // fprintf( stdout, "mseq1 = %s\n", mseq1[0] ); // fprintf( stdout, "mseq2 = %s\n", mseq2[0] ); if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = 0; commonAlloc1 = 0; commonAlloc2 = 0; } if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); else ffttry = 0; if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "\bf" ); if( alg == 'M' ) { fprintf( stderr, "\bm" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } } else { fprintf( stderr, "\bd" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "\bm" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; case( 'Q' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); return; } #endif #if 0 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol { int i, l; int nlim; int clus1, clus2; nlim = nalign-1; for( l=0; l 0 ) // sprintf( outputfile, "%su%d", outputfile, uniform ); sprintf( outputfile + strlen(outputfile), "u%d", uniform ); fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile ); outfp = fopen( outputfile, "w" ); free( outputfile ); if( outfp == NULL ) { fprintf( stderr, "Cannot open %s\n", outputfile ); exit( 1 ); } for( j=0; jG%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] ); fclose( outfp ); #endif #if TREE if( treeout ) { treelen = 0; tmptree = calloc( 100, sizeof( char ) ); for( j=0; j 1 ) { *(*tree+1) = '('; *(*tree+2) = '\0'; } else { *(*tree+1) = '\0'; } for( j=0; j 1 ) strcat( *tree, ")\n" ); else strcat( *tree, "\n" ); // fprintf( stdout, "*tree = %s\n", *tree ); } #endif for( j=0; jselfscore; belongto = 0; while( i-- ) { // fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score ); if( ptr->selfscore > selfscore0 ) { selfscore0 = ptr->selfscore; belongto = ptr-scores; } ptr++; } #if 1 if( belongto != 0 ) { // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); ptr = calloc( 1, sizeof( Scores ) ); *ptr = scores[belongto]; scores[belongto] = *scores; *scores = *ptr; free( ptr ); } #endif } else { qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare ); belongto = (int)( 0.5 * nin ); // fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen ); if( belongto != 0 ) { // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); ptr = calloc( 1, sizeof( Scores ) ); *ptr = scores[belongto]; scores[belongto] = *scores; *scores = *ptr; free( ptr ); } } if( qinoya != scores->numinseq ) // if( 1 || qinoya != scores->numinseq ) { // fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt ); if( doalign ) { if( doalign == 'f' ) { blastresults = callfasta( seq, scores, nin, NULL, 0, 1 ); if( scores->selfscore != (int)blastresults[0] ) { fprintf( stderr, "\n\nWARNING1: selfscore\n" ); fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 ); fprintf( stderr, "scores->orilen = %d\n", scores->orilen ); fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] ); // if( abs( scores->selfscore - (int)blastresults[0] ) > 2 ) // exit( 1 ); // scores->selfscore = (int)blastresults[0]; //iinoka? // exit( 1 ); } } else gappick0( mseq1[0], seq[scores->numinseq] ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, scores[0].pointt ); } selfscore0 = scores[0].selfscore; for( i=0; iorilen > scores[i].orilen ) { longer = (double)scores->orilen; shorter = (double)scores[i].orilen; } else { longer = (double)scores[i].orilen; // nai shorter = (double)scores->orilen; //nai } #if LENFAC lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); #else lenfac = 1.0; #endif if( doalign ) { if( doalign == 'f' ) { scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1; if( scores[i].score < 0.0 ) scores[i].score = 0.0; } else { if( fromaln ) { // scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; } else { if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin ); gappick0( mseq2[0], seq[scores[i].numinseq] ); // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; // fprintf( stderr, "scores[i] = %f\n", scores[i].score ); // fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] ); // fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] ); // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); } } } else { scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac; if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST; } // if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen ); } if( doalign == 'f' ) free( blastresults ); if( doalign == 0 ) free( table1 ); //exit( 1 ); } // fprintf( stderr, "sorting .. " ); qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare ); // fprintf( stderr, "done.\n" ); maxdist = scores[nin-1].score; if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai. { if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) ) { maxdist = 0.0; } // fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 ); } // fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin ); if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score ); // kokoni if( nin < 2 || ... ) picks = AllocateIntVec( nin+1 ); s_p_map = AllocateIntVec( nin+1 ); s_y_map = AllocateIntVec( nin+1 ); pickkouho = AllocateIntVec( nin+1 ); closeh = AllocateIntVec( nin+1 ); // nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq ); // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu // fprintf( stderr, "selecting kouhos phase 2\n" ); // if( nkouho == 0 ) // { // fprintf( stderr, "selecting kouhos, phase 2\n" ); // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // } // fprintf( stderr, "\ndone\n\n" ); for( i=0; i 0 ) { // fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] ); // fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] ); picktmp = pickkouho[nkouho-1]; // fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho ); nkouho--; if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) ) { // fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq ); // fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] ); ; } else { *iptr++ = picktmp; npick++; // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq ); } } i = 1; while( npick0 ) { if( i ) { i = 0; rn = nkouho * 0.5; // fprintf( stderr, "rn = %d\n", rn ); } else { rn = rnd() * (nkouho); } picktmp = pickkouho[rn]; // fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho ); // fprintf( stderr, "#kouho before swap\n" ); // for( i=0; i 2 ) // exit( 1 ); // scores->selfscore = (int)blastresults[0]; //iinoka? } } else gappick0( mseq1[0], seq[scores[picks[j]].numinseq] ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, scores[picks[j]].pointt ); } selfscore0 = scores[picks[j]].selfscore; pickmtx[j][0] = 0.0; for( i=j+1; i scores[picks[i]].orilen ) { longer = (double)scores[picks[j]].orilen; shorter = (double)scores[picks[i]].orilen; } else { longer = (double)scores[picks[i]].orilen; shorter = (double)scores[picks[j]].orilen; } #if LENFAC lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); #else lenfac = 1.0; #endif if( doalign ) { if( doalign == 'f' ) { pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0; } else { if( fromaln ) { fprintf( stderr, "%d-%d/%d\r", j, i, npick ); pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11( orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; } else { // fprintf( stderr, "\r%d / %d ", i, nin ); gappick0( mseq2[0], seq[scores[picks[i]].numinseq] ); pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score ); } } } else { pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac; if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST; } } if( doalign == 'f' ) free( blastresults ); if( doalign == 0 ) free( table1 ); } dfromcp = AllocateDoubleMtx( npick, nin ); dfromc = AllocateDoubleMtx( npick, 0 ); for( i=0; i 2 ) { float avdist; float avdist1; float avdist2; float maxavdist; int splinter; int count; int dochokoho; splinter = 0; int *docholist; int *docholistbk; maxavdist = 0.0; for( i=0; i 1 ) { nyuko = 2; yukos[0] = picks[0]; yukos[1] = picks[1]; closeh[picks[0]] = yukos[0]; closeh[picks[1]] = yukos[1]; } else { nyuko = 1; yukos[0] = picks[0]; closeh[picks[0]] = yukos[0]; } #elif HUKINTOTREE if( npick > 2 ) { #if 0 float avdist; float maxavdist; int count; int splinter; maxavdist = 0.0; splinter=0; for( i=0; i, npick=%d members = \n", npick ); // for( i=0; i %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun ); #if 0 if( scores[picks[i]].orilen > scores[picks[j]].orilen ) { fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 ); tsukau[j] = 0; } else { fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 ); tsukau[i] = 0; } if( 0 && j == npick-1 ) tsukau[i] = 0; else tsukau[j] = 0; fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 ); #else tsukau[j] = 0; closeh[picks[j]] = closeh[picks[i]]; // fprintf( stderr, "%d => tsukawanai\n", j ); #endif } } } } for( ii=0,i=0; i yukomtx[i][j-i] ) minscoreinpick[i] = yukomtx[i][j-i]; } for( j=0; j yukomtx[j][i-j] ) minscoreinpick[i] = yukomtx[j][i-j]; } fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] ); } #endif #if TREE if( treeout ) { children = calloc( nyuko+1, sizeof( char * ) ); for( i=0; iselfscore ); // fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] ); uniform = -1; for( j=0; j scores[j].orilen ) { longer = scores[yukos[i]].orilen; shorter = scores[j].orilen; } else { shorter = scores[yukos[i]].orilen; longer = scores[j].orilen; } #if LENFAC // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA ); // fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen ); #else lenfac = 1.0; #endif #if 0 // iihazu -> dame ii = s_y_map[j]; jj=s_y_map[yukos[i]]; if( ii != -1 && jj != -1 ) { if( dfromc[ii][yukos[jj]] != -0.5 ) { dfromc[i][j] = dfromc[ii][yukos[jj]]; } else { if( ii > jj ) { kk = jj; jj = ii; ii = kk; } dfromc[ii][yukos[jj]] = dfromc[i][j] = yukomtx[ii][jj-ii]; } } else #else if( dfromc[i][j] == -0.5 ) #endif { if( doalign ) { if( doalign == 'f' ) { dfromc[i][j] = ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1; if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0; } else { if( fromaln ) { dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; } else { gappick0( mseq2[0], seq[scores[j].numinseq] ); dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; } } } else { dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac; if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST; } } // fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] ); // if( i == 1 ) // fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] ); } // fprintf( stderr, "i=%d, freeing\n", i ); if( !doalign ) free( table1 ); if( doalign && doalign == 'f' ) free( blastresults ); } fprintf( stderr, " \r" ); for( i=0; iselfscore, scores->orilen, scores[nin-1].orilen, nin ); for( j=0; j%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 ); } else // if( belongto == -1 ) #else belongto = s_y_map[j]; if( belongto == -1 ) #endif { belongto = 0; // default ha horyu minscore = dfromc[0][j]; for( i=0; i ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 ) // if( rnd() < 0.5 ) // CHUUI !!!!! { // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] ); minscore = dfromc[i][j]; belongto = i; } } } #if 0 if( dfromc[belongto][j] > minscoreinpick[belongto] ) { fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); belongto = npick; } else fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); #endif // fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] ); // fprintf( stderr, "numin = %d\n", numin[belongto] ); outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) ); outs[belongto][numin[belongto]] = scores[j]; numin[belongto]++; } free( dfromcp ); FreeDoubleMtx( dfromc ); // fprintf( stderr, "##### npick = %d\n", npick ); // fprintf( stderr, "##### nyuko = %d\n", nyuko ); if( nyuko > 2 ) { fprintf( stderr, "upgma " ); // veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len ); fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL, 1 ); fprintf( stderr, "\r \r" ); } else { topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) ); topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) ); topol[0][0][0] = 0; topol[0][0][1] = -1; topol[0][1][0] = 1; topol[0][1][1] = -1; } FreeFloatHalfMtx( yukomtx, npick ); #if 0 ii = nyuko-1; fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko ); for( j=0; j 1 ) { fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] ); for( j=0; jnuminseq ); } for( i=0; i= classsize || !aligned ) val = 0; else val = 1; if( nyuko > 1 ) { int *mem1p, *mem2p; int mem1size, mem2size; int v1 = 0, v2 = 0, v3 = 0; int nlim; int l; static int *mem1 = NULL; static int *mem2 = NULL; char **parttree = NULL; // by Mathog #if TREE if( treeout ) { parttree = (char **)calloc( nyuko, sizeof( char * ) ); for( i=0; inuminseq; } } *mptr = -1; mem2p = topol[l][1]; mptr = mem2; mem2size = 0; while( *mem2p != -1 ) { // fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] ); i = numin[*mem2p]; ptr = outs[*(mem2p++)]; mem2size += i; while( i-- ) { *mptr++ = (ptr++)->numinseq; } } *mptr = -1; qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare ); qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare ); // selhead( mem1, numin[0] ); // selhead( mem2, numin[1] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, "mem1 (nin=%d) = \n", nin ); for( i=0; ; i++ ) { fprintf( stderr, "%d ", mem1[i]+1 ); if( mem1[i] == -1 ) break; } fprintf( stderr, "\n" ); fprintf( stderr, "mem2 (nin=%d) = \n", nin ); for( i=0; ; i++ ) { fprintf( stderr, "%d ", mem2[i]+1 ); if( mem2[i] == -1 ) break; } fprintf( stderr, "\n" ); #endif #if 0 fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size ); fprintf( stderr, "before alignment\n" ); for( j=0; j v2 ) { v3 = v1; v1 = v2; v2 = v3; } // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 ); // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 ); // fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] ); sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] ); strcpy( parttree[v1], *tree ); // fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] ); // fprintf( stderr, "*tree = %s\n", *tree ); free( parttree[v2] ); parttree[v2] = NULL; } #endif #if 0 fprintf( stderr, "after alignment\n" ); for( j=0; j maxdepth ) maxdepth = *depthpt; (*depthpt)++; } } else { #if TREE if( treeout ) { sprintf( *tree, "%s", children[0] ); free( children[0] ); free( children ); } #endif } for( i=0; i njob ) tokyoripara = 0.0; alloclen = nlenmax * 2; name = AllocateCharMtx( njob, B+1 ); if( classsize == 1 ) seq = AllocateCharMtx( njob, 0 ); else seq = AllocateCharMtx( njob, alloclen+1 ); nlen = AllocateIntVec( njob ); tmpseq = calloc( nlenmax+1, sizeof( char ) ); pointt = AllocateIntMtx( njob, 0 ); grpseq = AllocateIntVec( nlenmax + 1 ); order = (int *)calloc( njob + 1, sizeof( int ) ); whichgroup = (int *)calloc( njob, sizeof( int ) ); weight = (double *)calloc( njob, sizeof( double ) ); fprintf( stderr, "alloclen = %d in main\n", alloclen ); for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { if( seq_grp_nuc( grpseq, tmpseq ) < 6 ) { fprintf( stderr, "Seq %d, too short.\n", i+1 ); fprintf( stderr, "name = %s\n", name[i] ); fprintf( stderr, "seq = %s\n", seq[i] ); exit( 1 ); // continue; } makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { if( seq_grp( grpseq, tmpseq ) < 6 ) { fprintf( stderr, "Seq %d, too short.\n", i+1 ); fprintf( stderr, "name = %s\n", name[i] ); fprintf( stderr, "seq = %s\n", seq[i] ); exit( 1 ); // continue; } makepointtable( pointt[i], grpseq ); } // fprintf( stdout, ">%s\n", name[i] ); // fprintf( stdout, "%s\n", seq[i] ); } if( nunknown ) fprintf( stderr, "\nWARNING : %d unknown characters\n", nunknown ); // exit( 1 ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } pid = (int)getpid(); sprintf( datafile, "/tmp/data-%d", pid ); sprintf( queryfile, "/tmp/query-%d", pid ); sprintf( resultfile, "/tmp/fasta-%d", pid ); scores = (Scores *)calloc( njob, sizeof( Scores ) ); // fprintf( stderr, "\nCalculating i-i scores ... \n" ); for( i=0; iselfscore = %d\n", i, scores[i].selfscore ); free( blastresults ); #else pscore = 0; if( scoremtx == -1 ) { st = 1; en = 0; for( pt=seq[i]; *pt; pt++ ) { if( *pt == 'u' ) *pt = 't'; aan = amino_n[(int)*pt]; if( aan<0 || aan >= 4 ) *pt = 'n'; if( *pt == 'n' ) { en++; if( st ) continue; else pscore += tmpaminodis[(int)*pt][(int)*pt]; } else { st = 0; en = 0; pscore += tmpaminodis[(int)*pt][(int)*pt]; } } scores[i].selfscore = pscore - en * tmpaminodis['n']['n']; } else { st = 1; en = 0; for( pt=seq[i]; *pt; pt++ ) { aan = amino_n[(int)*pt]; if( aan<0 || aan >= 20 ) *pt = 'X'; if( *pt == 'X' ) { en++; if( st ) continue; else pscore += tmpaminodis[(int)*pt][(int)*pt]; } else { st = 0; en = 0; pscore += tmpaminodis[(int)*pt][(int)*pt]; } } scores[i].selfscore = pscore - en * tmpaminodis['X']['X']; } #endif } else { pscore = 0; for( pt=seq[i]; *pt; pt++ ) { pscore += amino_dis[(int)*pt][(int)*pt]; } scores[i].selfscore = pscore; } // fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, pointt[i] ); scores[i].selfscore = commonsextet_p( table1, pointt[i] ); free( table1 ); } } if( tmpaminodis ) FreeIntMtx( tmpaminodis ); depth = 0; #if TREE if( treeout ) { tree = (char **)calloc( 1, sizeof( char *) ); *tree = NULL; // splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight ); completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) ); if( inputfile ) sprintf( treefile, "%s.tree", inputfile ); else sprintf( treefile, "splittbfast.tree" ); treefp = fopen( treefile, "w" ); fprintf( treefp, "%s\n", *tree ); fclose( treefp ); } else completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); #else completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); #endif fprintf( stderr, "\nDone.\n\n" ); #if 1 groupnum = 0; groupid = -1; paramem = NULL; npara = 0; for( i=0; i 1 && classsize > 2 ) { qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); // selhead( paramem, npara ); alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); } free( paramem ); paramem = NULL; npara = 0; } sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 ); } else { paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) ); paramem[npara++] = pos; sprintf( tmpname, "Group-para %s", name[pos]+1 ); } tmpname[B-1] = 0; if( classsize > 1 && classsize <= njob ) strcpy( name[pos]+1, tmpname ); } if( paramem ) { paramem[npara] = -1; if( npara > 1 && classsize > 2 ) { qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); // selhead( paramem, npara ); alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); } free( paramem ); paramem = NULL; npara = 0; } #else for( i=0; i 1 ) { fprintf( stderr, "\n\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize ); fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum ); fprintf( stderr, "Note that the alignment is not completed.\n" ); if( reorder ) fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); #if TREE if( treeout ) { fprintf( stderr, "\n" ); fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); } // else // { // fprintf( stderr, "To output guide tree,\n" ); // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); // } #endif if( !doalign ) { fprintf( stderr, "\n" ); fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); } else { fprintf( stderr, "\n\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "nseq = %d\n", njob ); fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize ); // fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" ); // fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" ); // fprintf( stderr, "please try \n" ); // fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" ); // fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" ); // fprintf( stderr, "and performs only intra-group alignments.\n" ); if( reorder ) fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); #if TREE if( treeout ) { fprintf( stderr, "\n" ); fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); } // else // { // fprintf( stderr, "To output guide tree,\n" ); // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); // } #endif if( !doalign || fromaln ) { fprintf( stderr, "\n" ); fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); } #if TREE if( treeout ) free( treefile ); #endif #if 0 fprintf( stdout, "weight =\n" ); for( i=0; ilen != 0; pat2++ ) // excl. len=0 { if( pat2->len + offset2 == offset1 ) { gmatch = diaf1 * pat2->freq; } } for( pat1=pat1bk+1; pat1->len != 0; pat1++ ) // excl. len=0 { for( pat2=pat2bk+1; pat2->len != 0; pat2++ ) // excl. len=0 { if( pat1->len + offset1 == pat2->len + offset2 ) { gmatch += pat1->freq * pat2->freq; // if( r ) fprintf( stderr, "match1!!, len=%d, gmatch=%f * %f\n", pat2->len, pat1->freq, pat2->freq ); } } } return( gmatch ); } #endif static float countnocountmatchx( Gappat *pat1, Gappat *pat2, int offset1, int offset2, int r ) { Gappat *pat1bk = pat1; Gappat *pat2bk = pat2; float val = 0.0; // pat1[][0] ha total gap. for( pat1=pat1bk+1; pat1->len != 0; pat1++ ) { for( pat2=pat2bk+1; pat2->len != 0; pat2++ ) { if( pat1->len + offset1 == pat2->len + offset2 ) { val += pat1->freq * pat2->freq; if( r ) fprintf( stderr, "y %d-%d, len=%d,%d, val = %f\n", (int)(pat1-pat1bk), (int)(pat2-pat2bk), pat1->len, pat2->len, val ); // 070405 // if( r ) fprintf( stderr, "y %d-%d, len=%d,%d, val = %f\n", pat1-pat1bk, pat2-pat2bk, pat1->len, pat2->len, val ); } } } if( r ) fprintf( stderr, "nocountmatch=%f\n", val ); return( val ); } #if 0 // by D.Mathog static float countnocountmatch( Gappat *pat1, Gappat *pat2, int r ) { // return( 0.0 ); Gappat *pat1bk = pat1; Gappat *pat2bk = pat2; float val = 0.0; // pat1[][0] ha total gap. for( pat1=pat1bk+1; pat1->len != 0; pat1++ ) { // if( r ) fprintf( stderr, "b %d-%d, len=%d,%d\n", pat1-pat1bk, pat2-pat2bk, pat1->len, pat2->len ); for( pat2=pat2bk+1; pat2->len != 0; pat2++ ) { if( pat1->len == pat2->len ) { // if( r ) fprintf( stderr, " x%d-%d, len=%d,%d\n", pat1-pat1bk, pat2-pat2bk, pat1->len, pat2->len ); val += pat1->freq * pat2->freq; // if( r ) fprintf( stderr, "y %d-%d, val = %f\n", pat1-pat1bk, pat2-pat2bk,val ); // if( r ) fprintf( stderr, "z tsugi, %d-%d, len=%d,%d\n", pat1-pat1bk+1, pat2-pat2bk+1, (pat1+1)->len, (pat2+1)->len ); } // if( r ) fprintf( stderr, "a %d-%d, len=%d,%d\n", pat1-pat1bk, pat2-pat2bk, pat1->len, pat2->len ); } } // fprintf( stderr, "nocountmatch=%f\n", val ); return( val ); } #endif static float countnocountx( Gappat *pat1, float diaf1, Gappat *pat2, int offset1, int r ) { // return( 0.0 ); float gmatch; Gappat *pat1bk = pat1; Gappat *pat2bk = pat2; gmatch = 0.0; for( pat2=pat2bk+1; pat2->len != 0; pat2++ ) // excl. len=0 { if( pat2->len == offset1 ) { gmatch = diaf1 * pat2->freq; // if( r ) fprintf( stderr, "match0!!, len=%d, gmatch=%f * %f\n", pat2->len, diaf1, pat2->freq ); } } for( pat1=pat1bk+1; pat1->len != 0; pat1++ ) // excl. len=0 { for( pat2=pat2bk+1; pat2->len != 0; pat2++ ) // excl. len=0 { if( pat1->len + offset1 == pat2->len ) { gmatch += pat1->freq * pat2->freq; // if( r ) fprintf( stderr, "match1!!, len=%d, gmatch=%f * %f\n", pat2->len, pat1->freq, pat2->freq ); } } } return( gmatch ); } #if 0 // by D.Mathog static float countnocount( Gappat *pat1, Gappat *pat2, int offset1, int offset2 ) //osoi { // return( 0.0 ); Gappat *pat1bk = pat1; Gappat *pat2bk = pat2; float val = 0.0; // pat1[][0] ha total gap. for( pat1=pat1bk+1; pat1->len != -1; pat1++ ) { for( pat2=pat2bk+1; pat2->len != -1; pat2++ ) { if( pat1->len+offset1 == pat2->len+offset2 ) { val += pat1->freq * pat2->freq; } } } // fprintf( stderr, "nocount=%f\n", val ); return( val ); } #endif #if 1 // tditeration float imp_match_out_scH( int i1, int j1 ) { // fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold ); // fprintf( stderr, "val = %f\n", impmtx[i1][j1] ); return( impmtx[i1][j1] ); } #endif static void imp_match_out_veadH( float *imp, int i1, int lgth2 ) { #if FASTMATCHCALC float *pt = impmtx[i1]; while( lgth2-- ) *imp++ += *pt++; #else int j; float *pt = impmtx[i1]; for( j=0; jstart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = pt - seq1[i] - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = pt - seq1[i] - 0; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = pt - seq1[i] - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = pt - seq2[j] - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = pt - seq2[j] - 0; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = pt - seq2[j] - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 1 k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "#### impmtx[k1][k2] = %f, tmpptr->fimportance=%f, effij=%f\n", impmtx[k1][k2], tmpptr->fimportance, effij ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); // fprintf( stderr, "%d (%c) - %d (%c) - %f\n", k1, *pt1, k2, *pt2, tmpptr->fimportance * effij ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } #else while( k1 <= end1 && k2 <= end2 ) { fprintf( stderr, "k1,k2=%d,%d - ", k1, k2 ); if( !nocount1[k1] && !nocount2[k2] ) { impmtx[k1][k2] += tmpptr->wimportance * eff1[i] * eff2[j] * fastathreshold; fprintf( stderr, "marked\n" ); } else fprintf( stderr, "no count\n" ); k1++; k2++; } #endif tmpptr = tmpptr->next; } } } #if 0 if( clus1 == 1 && clus2 == 6 ) { fprintf( stderr, "\n" ); fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; float wm; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_scH( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; // float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS Gappat **gappat1; static TLS Gappat **gappat2; static TLS float *digf1; static TLS float *digf2; static TLS float *diaf1; static TLS float *diaf2; static TLS float *gapz1; static TLS float *gapz2; static TLS float *gapf1; static TLS float *gapf2; static TLS float *ogcp1g; static TLS float *ogcp2g; static TLS float *fgcp1g; static TLS float *fgcp2g; static TLS float *ogcp1; static TLS float *ogcp2; static TLS float *fgcp1; static TLS float *fgcp2; static TLS float **cpmx1; static TLS float **cpmx2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float fpenalty = (float)penalty; float tmppenal; float cumpenal; float *fgcp2pt; float *ogcp2pt; float fgcp1va; float ogcp1va; int maegap; #if 0 fprintf( stderr, "#### eff in SA+++align\n" ); fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "#### strlen( seq1[0] ) = %d\n", strlen( seq1[0] ) ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); free( gappat1 ); free( gappat2 ); FreeFloatVec( digf1 ); FreeFloatVec( digf2 ); FreeFloatVec( diaf1 ); FreeFloatVec( diaf2 ); FreeFloatVec( gapz1 ); FreeFloatVec( gapz2 ); FreeFloatVec( gapf1 ); FreeFloatVec( gapf2 ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatVec( ogcp1g ); FreeFloatVec( ogcp2g ); FreeFloatVec( fgcp1g ); FreeFloatVec( fgcp2g ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); digf1 = AllocateFloatVec( ll1+2 ); digf2 = AllocateFloatVec( ll2+2 ); diaf1 = AllocateFloatVec( ll1+2 ); diaf2 = AllocateFloatVec( ll2+2 ); gappat1 = (Gappat **)calloc( ll1+2, sizeof( Gappat * ) ); gappat2 = (Gappat **)calloc( ll2+2, sizeof( Gappat * ) ); gapz1 = AllocateFloatVec( ll1+2 ); gapz2 = AllocateFloatVec( ll2+2 ); gapf1 = AllocateFloatVec( ll1+2 ); gapf2 = AllocateFloatVec( ll2+2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); ogcp1g = AllocateFloatVec( ll1+2 ); ogcp2g = AllocateFloatVec( ll2+2 ); fgcp1g = AllocateFloatVec( ll1+2 ); fgcp2g = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { float t = 0.0; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( outgap == 1 ) { // if( g ) fprintf( stderr, "init-match penal1=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); // initverticalw[0] += g; // currentw[0] += g; // if( g ) fprintf( stderr, "init-match penal2=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); // initverticalw[0] += g; // currentw[0] += g; for( i=1; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif // tmppenal = fpenalty; tmppenal = diaf2[j] * ( 1.0 - gapf1[i] ) * fpenalty; if( gappat2[j][0].freq ) { tmppenal += ( gappat2[j][0].freq ) * ( 1.0 - gapf1[i] ) * fpenalty; // tmppenal -= ( countnocountx( gappat1[i], diaf1[i], gappat2[j], j-mpi-1, 0 ) ) * fpenalty; maegap = ijp[i-1][mpi]; maegap = 0; if( maegap == 0 ) { tmppenal -= ( countnocountx( gappat1[i], diaf1[i], gappat2[j], j-mpi-1, 0 ) ) * fpenalty; } #if 0 // attahouga yoi hazu else if( maegap < 0 ) // i jump { maegap = -maegap; tmppenal -= ( countnocountxx( gappat1[i], diaf1[i], gappat2[j], j-mpi-1+maegap, 0 ) ) * fpenalty; } else // j jump { tmppenal -= ( countnocountxx( gappat1[i], diaf1[i], gappat2[j], j-mpi-1, maegap ) ) * fpenalty; } #endif } if( (g=mi+tmppenal) > wm ) { // if( seq1[0][i] == 'A' && seq2[0][j] == 'A' ) fprintf( stderr, "jump i start=%f (i,j=%d,%d, *ijppt=%d, digf2[j]=%f, diaf2[j]=%f), %c-%c\n", g-mi, i, j, -(j-mpi), digf2[j], diaf2[j], seq1[0][i], seq2[0][j] ); wm = g; *ijppt = -( j - mpi ); } if( (g=*prept) >= mi ) { // fprintf( stderr, "jump i end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); mi = g; mpi = j-1; } else if( j != 1 ) { // mi += ( ogcp2g[j-0] + fgcp2g[j] ) * fpenalty * 0.5; // fprintf( stderr, "%c%c/%c%c exp, og=%f,fg=%f\n", '=', '=', seq2[0][j-1], seq2[0][j], ogcp2g[j-0] * fpenalty*0.5, fgcp2g[j] * fpenalty*0.5 ); } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif // tmppenal = fpenalty; tmppenal = diaf1[i] * ( 1.0 - gapf2[j] ) * fpenalty; if( gappat1[i][0].freq ) { tmppenal += ( gappat1[i][0].freq ) * ( 1.0 - gapf2[j] ) * fpenalty; // tmppenal -= ( countnocountx( gappat2[j], diaf2[j], gappat1[i], i-*mpjpt-1, 1 ) ) * fpenalty; maegap = ijp[*mpjpt][j-1]; if( maegap == 0 ) { tmppenal -= ( countnocountx( gappat2[j], diaf2[j], gappat1[i], i-*mpjpt-1, 1 ) ) * fpenalty; } #if 0 // attahouga yoi hazu else if( maegap > 0 ) // j jump { tmppenal -= ( countnocountxx( gappat2[j], diaf2[j], gappat1[i], i-*mpjpt-1+maegap, 0 ) ) * fpenalty; } else // i jump { maegap = -maegap; tmppenal -= ( countnocountxx( gappat2[j], diaf2[j], gappat1[i], i-*mpjpt-1, maegap ) ) * fpenalty; } #endif } if( (g=*mjpt+tmppenal) > wm ) { // if( seq1[0][i] == 'S' && seq2[0][j] == 'S' ) fprintf( stderr, "jump j start at %d, %d, g=%f, %c-%c\n", i, j, g-*mjpt, seq1[0][i], seq2[0][j] ); wm = g; *ijppt = +( i - *mpjpt ); } if( (g=*prept) >= *mjpt ) { // fprintf( stderr, "jump j end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); *mjpt = g; *mpjpt = i-1; } else if( i != 1 ) { // m[j] += ( ogcp1g[i-0] + fgcp1g[i] ) * fpenalty * 0.5; // fprintf( stderr, "%c%c/%c%c exp, og=%f,fg=%f\n", seq1[0][i-1], seq1[0][i], '=', '=', ogcp1g[i-0] * fpenalty*0.5, fgcp1g[i] * fpenalty*0.5 ); } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fgcp2pt++; ogcp2pt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm; /* int ?????? */ float g; float *currentw, *previousw; float fpenalty = (float)penalty; float fpenalty_OP = (float)penalty_OP; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif #if 1 float *wtmp; int *ijpipt; int *ijpjpt; float *mjpt, *Mjpt, *prept, *curpt; int *mpjpt, *Mpjpt; #endif static float mi, *m; static float Mi, *largeM; static int **ijpi; static int **ijpj; static int mpi, *mp; static int Mpi, *Mp; static float *w1, *w2; static float *match; static float *initverticalw; /* kufuu sureba iranai */ static float *lastverticalw; /* kufuu sureba iranai */ static char **mseq1; static char **mseq2; static char **mseq; static float **cpmx1; static float **cpmx2; static int **intwork; static float **floatwork; static int orlgth1 = 0, orlgth2 = 0; float tbk; int tbki, tbkj; wm = 0.0; if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( lgth1 <= 0 || lgth2 <= 0 ) { fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( largeM ); FreeIntVec( Mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); largeM = AllocateFloatVec( ll2+2 ); Mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } mseq1[0] = mseq[0]; mseq2[0] = mseq[1]; if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeIntMtx( commonJP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); commonJP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijpi = commonIP; ijpj = commonJP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; // *ijpipt = i - 1; // iranai *ijpjpt = mpi; } if( (g=*prept) >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt + fpenalty) > wm ) { wm = g; *ijpipt = *mpjpt; *ijpjpt = j - 1; //IRU! } if( (g=*prept) >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 1 g = tbk + fpenalty_OP; if( g > wm ) { wm = g; *ijpipt = tbki; *ijpjpt = tbkj; // fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); } if( Mi > tbk ) { tbk = Mi; //error desu. tbki = i-1; tbkj = Mpi; } if( *Mjpt > tbk ) { tbk = *Mjpt; tbki = *Mpjpt; tbkj = j-1; } if( *prept > *Mjpt ) { *Mjpt = *prept; *Mpjpt = i-1; } if( *prept > Mi ) { Mi = *prept; Mpi = j-1; } #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijpipt++; ijpjpt++; mjpt++; Mjpt++; prept++; mpjpt++; Mpjpt++; } lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error } #if 0 for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "wm = %f\n", wm ); #endif return( wm ); } mafft-7.123-without-extensions/core/Ralignmm.c0000640000076500007650000010227712225721317020402 0ustar katohkatoh#include "mltaln.h" #include "dp.h" #define MACHIGAI 0 #define OUTGAP0TRY 1 #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 #define FASTMATCHCALC 1 static TLS float **impmtx = NULL; #if 1 // tditeration to naiveRscore_imp de tsukawareru. float imp_match_out_scR( int i1, int j1 ) { // fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold ); // fprintf( stderr, "val = %f\n", impmtx[i1][j1] ); return( impmtx[i1][j1] ); } #endif static void imp_match_out_veadR( float *imp, int i1, int lgth2 ) { #if FASTMATCHCALC float *pt = impmtx[i1]; while( lgth2-- ) *imp++ += *pt++; #else int j; float *pt = impmtx[i1]; for( j=0; jstart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = pt - seq1[i] - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = pt - seq1[i] - 0; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = pt - seq1[i] - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = pt - seq2[j] - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = pt - seq2[j] - 0; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = pt - seq2[j] - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 1 k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "#### impmtx[k1][k2] = %f, tmpptr->fimportance=%f, effij=%f\n", impmtx[k1][k2], tmpptr->fimportance, effij ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); // fprintf( stderr, "%d (%c) - %d (%c) - %f\n", k1, *pt1, k2, *pt2, tmpptr->fimportance * effij ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } #else while( k1 <= end1 && k2 <= end2 ) { fprintf( stderr, "k1,k2=%d,%d - ", k1, k2 ); if( !nocount1[k1] && !nocount2[k2] ) { impmtx[k1][k2] += tmpptr->wimportance * eff1[i] * eff2[j] * fastathreshold; fprintf( stderr, "marked\n" ); } else fprintf( stderr, "no count\n" ); k1++; k2++; } #endif tmpptr = tmpptr->next; } } } #if 0 if( clus1 == 1 && clus2 == 6 ) { fprintf( stderr, "\n" ); fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; float *scarr; scarr = calloc( nalphabets, sizeof( float ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijp, int icyc, int jcyc ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; float wm; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_scR( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; // float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif #if 1 float *wtmp; int *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS float mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS float *w1, *w2; static TLS float *match; static TLS float *initverticalw; /* kufuu sureba iranai */ static TLS float *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS float *digf1; static TLS float *digf2; static TLS float *diaf1; static TLS float *diaf2; static TLS float *gapz1; static TLS float *gapz2; static TLS float *gapf1; static TLS float *gapf2; static TLS float *ogcp1g; static TLS float *ogcp2g; static TLS float *fgcp1g; static TLS float *fgcp2g; static TLS float *ogcp1; static TLS float *ogcp2; static TLS float *fgcp1; static TLS float *fgcp2; static TLS float **cpmx1; static TLS float **cpmx2; static TLS int **intwork; static TLS float **floatwork; static TLS int orlgth1 = 0, orlgth2 = 0; float fpenalty = (float)penalty; float tmppenal; float cumpenal; float *fgcp2pt; float *ogcp2pt; float fgcp1va; float ogcp1va; float kyokaipenal; #if 0 fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "#### seq2[0] = %s\n", seq2[0] ); #endif if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 == 0 || lgth2 == 0 ) { fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( digf1 ); FreeFloatVec( digf2 ); FreeFloatVec( diaf1 ); FreeFloatVec( diaf2 ); FreeFloatVec( gapz1 ); FreeFloatVec( gapz2 ); FreeFloatVec( gapf1 ); FreeFloatVec( gapf2 ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatVec( ogcp1g ); FreeFloatVec( ogcp2g ); FreeFloatVec( fgcp1g ); FreeFloatVec( fgcp2g ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); digf1 = AllocateFloatVec( ll1+2 ); digf2 = AllocateFloatVec( ll2+2 ); diaf1 = AllocateFloatVec( ll1+2 ); diaf2 = AllocateFloatVec( ll2+2 ); gapz1 = AllocateFloatVec( ll1+2 ); gapz2 = AllocateFloatVec( ll2+2 ); gapf1 = AllocateFloatVec( ll1+2 ); gapf2 = AllocateFloatVec( ll2+2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); ogcp1g = AllocateFloatVec( ll1+2 ); ogcp2g = AllocateFloatVec( ll2+2 ); fgcp1g = AllocateFloatVec( ll1+2 ); fgcp2g = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { float t = 0.0; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif kyokaipenal = 0.0; if( outgap == 1 ) { g = 0.0; g += ogcp1g[0] * ( 1.0-ogcp2g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal1=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); g += ogcp2g[0] * ( 1.0-ogcp1g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal2=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); g += fgcp1g[0] * ( 1.0-fgcp2g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal1=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); g += fgcp2g[0] * ( 1.0-fgcp1g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal2=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); kyokaipenal = g; initverticalw[0] += g; currentw[0] += g; cumpenal = 0.0; for( i=1; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif // tmppenal = ( (1.0-gapz1[i+1])*(1.0-fgcp2g[j]+ogcp2g[j]-digf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf1[i])*(1.0-fgcp2g[j]) + gapf1[i]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada tmppenal = ( (1.0-gapz1[i+1])*(1.0-fgcp2g[j]+ogcp2g[j]) + gapz1[i+1]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf1[i])*(1.0-fgcp2g[j]+ogcp2g[j]) + gapf1[i]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i])*(1.0-diaf2[j]) + gapf1[i] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i])*fgcp2g[j] + gapf1[i] ) * 0.5 * fpenalty; // tmppenal = *fgcp2pt-fpenalty*0.5*gapf1[i]; // tmppenal = *fgcp2pt; if( (g=mi+tmppenal) > wm ) { // fprintf( stderr, "jump i start=%f (j=%d, fgcp2g[j]=%f, digf2[j]=%f, diaf2[j]=%f), %c-%c\n", g-mi, j, fgcp2g[j], digf2[j], diaf2[j], seq1[0][i], seq2[0][j] ); wm = g; *ijppt = -( j - mpi ); } // tmppenal = ( (1.0-gapz1[i])*(1.0-ogcp2g[j]+fgcp2g[j]-digf2[j]) ) * 0.5 * fpenalty; // mada tmppenal = ( (1.0-gapz1[i])*(1.0-ogcp2g[j]+fgcp2g[j]) + gapz1[i]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf1[i-1])*(1.0-ogcp2g[j]+fgcp2g[j]) + gapf1[i-1]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = *ogcp2pt-fpenalty*0.5*(gapf2[j-1]+gapf1[i-1]); // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i-1])*(1.0-diaf2[j]) + gapf1[i-1] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i-1])*ogcp2g[j] + gapf1[i-1] ) * 0.5 * fpenalty; // tmppenal = *prept+*ogcp2pt; if( (g=*prept+tmppenal) >= mi ) { // fprintf( stderr, "jump i end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); mi = g; mpi = j-1; } else if( j != 1 ) { mi += ( ogcp2g[j-0] + fgcp2g[j] ) * fpenalty * 0.5; // CHUUI honto ha iru // fprintf( stderr, "%c%c/%c%c exp, og=%f,fg=%f\n", '=', '=', seq2[0][j-1], seq2[0][j], ogcp2g[j-0] * fpenalty*0.5, fgcp2g[j] * fpenalty*0.5 ); } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif // tmppenal = ( (1.0-gapz2[j+1])*(1.0-fgcp1g[i]+ogcp1g[i]-digf1[i]) ) * 0.5 * fpenalty; // mada tmppenal = ( (1.0-gapz2[j+1])*(1.0-fgcp1g[i]+ogcp1g[i]) + gapz2[j+1]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j])*(1.0-fgcp1g[i]+ogcp1g[i]) + gapf2[j]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j])*(1.0-fgcp1g[i]) + gapf2[j]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j])*(1.0-diaf1[i]) + gapf2[j] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j])*fgcp1g[i] + gapf2[j] ) * 0.5 * fpenalty; // tmppenal = fgcp1va-fpenalty*0.5*gapf2[j]; // tmppenal = fgcp1va; if( (g=*mjpt+tmppenal) > wm ) { // if( seq1[0][i] == 'Y' && seq2[0][j] == 'B' ) // fprintf( stderr, "jump j start=%f, %c-%c\n", g-*mjpt, seq1[0][i], seq2[0][j] ); wm = g; *ijppt = +( i - *mpjpt ); } // tmppenal = ( (1.0-gapz2[j])*(1.0-ogcp1g[i]+fgcp1g[i]-digf1[i]) ) * 0.5 * fpenalty; // mada tmppenal = ( (1.0-gapz2[j])*(1.0-ogcp1g[i]+fgcp1g[i]) + gapz2[j]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j-1])*(1.0-ogcp1g[i]+fgcp1g[i]) + gapf2[j-1]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j-1])*(1.0-ogcp1g[i]) + gapf2[j-1]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ogcp1va-fpenalty*0.5*(gapf1[i-1]+gapf2[j-1]); // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j-1]) * (1.0-diaf1[i]) + gapf2[j-1] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j-1]) * ogcp1g[i] + gapf2[j-1] ) * 0.5 * fpenalty; // tmppenal = 0.5 * fpenalty - ( (1.0-gapf2[j-1]) * (ogcp1g[i]) + gapf2[j-1] ) * ( 0.5 * fpenalty ); // tmppenal = ogcp1va-fpenalty*0.5*gapf2[j-1]; // tmppenal = ogcp1va; if( (g=*prept+tmppenal) >= *mjpt ) { // if( seq1[0][i] == 'T' && seq2[0][j] == 'T' ) // fprintf( stderr, "jump j end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); *mjpt = g; *mpjpt = i-1; } else if( i != 1 ) { m[j] += ( ogcp1g[i-0] + fgcp1g[i] ) * fpenalty * 0.5; // CHUUI honto ha iru // fprintf( stderr, "%c%c/%c%c exp, og=%f,fg=%f\n", seq1[0][i-1], seq1[0][i], '=', '=', ogcp1g[i-0] * fpenalty*0.5, fgcp1g[i] * fpenalty*0.5 ); } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fgcp2pt++; ogcp2pt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i test.fftns2 # FFT-NS-2 % mafft --maxiterate 100 sample > test.fftnsi # FFT-NS-i % mafft --globalpair sample > test.gins1 # G-INS-1 % mafft --globalpair --maxiterate 100 sample > test.ginsi # G-INS-i % mafft --localpair sample > test.lins1 # L-INS-1 % mafft --localpair --maxiterate 100 sample > test.linsi # L-INS-i % diff test.fftns2 sample.fftns2 % diff test.fftnsi sample.fftnsi % diff test.gins1 sample.gins1 % diff test.ginsi sample.ginsi % diff test.lins1 sample.lins1 If you have the './extensions' directory, % mafft-qinsi samplerna > test.qinsi # Q-INS-i % mafft-xinsi samplerna > test.xinsi # X-INS-i % diff test.qinsi samplerna.qinsi % diff test.xinsi samplerna.xinsi If you use the multithread version, the results of iterative refinement methods (*-*-i) are not always identical. Try this test with the single- thread mode (--thread 0). 4. INPUT FORMAT fasta format. The type of input sequences (nucleotide or amino acid) is automatically recognized based on the frequency of A, T, G, C, U and N. 5. USAGE % /usr/local/bin/mafft input > output See also http://mafft.cbrc.jp/alignment/software/ 6. UNINSTALL # rm -r /usr/local/libexec/mafft # rm /usr/local/bin/mafft # rm /usr/local/bin/fftns # rm /usr/local/bin/fftnsi # rm /usr/local/bin/nwns # rm /usr/local/bin/nwnsi # rm /usr/local/bin/linsi # rm /usr/local/bin/ginsi # rm /usr/local/bin/mafft-* # rm /usr/local/share/man/man1/mafft* 7. LICENSE See the './license' file. If you have the extensions, see also the './license.extensions' file, mafft-7.123-without-extensions/test/0000750000076500007650000000000012227121003016471 5ustar katohkatohmafft-7.123-without-extensions/test/sample.linsi0000640000076500007650000007220012226665654021044 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] --------------------MNGTE-G---------------------DNFYVPFSNKTG --------------------------------LARSPYEYPQY----------------- -------YLAEPW---------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLN YILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLV VLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIPE GMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEKEVTRMVVLMVIGFL VCWVPYASVAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMI TTLCC-----GKNPLGDDE--SGASTSK-TEVSSVS-TSPVSPA---------------- ------------------------------------------------------------ ------------------------ > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] --------------------MNGTE-G---------------------PNFYVPFSNITG --------------------------------VVRSPFEQPQY----------------- -------YLAEPW---------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLN YILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLV VLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIPE GMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------TTQKAEKEVTRMVIIMVIFFL ICWLPYASVAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCML TSLCC-----GKNPLGDDE--ASATASK-TETSQVA-PA--------------------- ------------------------------------------------------------ ------------------------ > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 --------------------MNGTE-G---------------------INFYVPMSNKTG --------------------------------VVRSPFEYPQY----------------- -------YLAEPW---------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLN YILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLV VLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMPE GMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------TTQKAEKEVTRMVILMVLGFM LAWTPYAVVAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMI TTICC-----GKNPFGDEDVSSTVSQSK-TEVSSVS-SSQVSPA---------------- ------------------------------------------------------------ ------------------------ > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish --------------------MNGTE-G---------------------KNFYVPMSNRTG --------------------------------LVRSPFEYPQY----------------- -------YLAEPW---------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLN FILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLV VLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIPE GMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEREVTKMVILMVFGFL IAWTPYATVAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCML TTIFC-----GKNPLGDDE-SSTVSTSK-TEVSSVS-PA--------------------- ------------------------------------------------------------ ------------------------ > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish --------------------MNGTE-G---------------------NNFYVPLSNRTG --------------------------------LVRSPFEYPQY----------------- -------YLAEPW---------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLN FILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLV VLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIPE GIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEREVTKMVILMVLGFL VAWTPYATVAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCML TTLFC-----GKNPLGDEE-SSTVSTSK-TEVSSVS-PA--------------------- ------------------------------------------------------------ ------------------------ > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- --------------------MKQVP-E-------------------FHEDFYIPIPLDIN ------------------------------NLSAYSPFLVPQD----------------- -------HLGNQG---------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLN YILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLA VVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLF-GWS-----RYIPE GLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEREVTKMVVVMVLGFL VCWAPYASFSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM KMVC------GKN-IEEDE--ASTSSQV-TQVSSVA-PEK-------------------- ------------------------------------------------------------ ------------------------ > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] --------------------MRKMS-E---------------------EEFYL-----FK ------------------------------NISSVGPWDGPQY----------------- -------HIAPVW---------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLN YILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLA FLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFF-GWS-----RFIPE GLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------TTQKAEREVSRMVVVMVGSFC VCYVPYAAFAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM KMVC------GKA-MTDES--DTCSSQK-TEVSTVS-STQVGPN---------------- ------------------------------------------------------------ ------------------------ > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ---------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLA IISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWPH GLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEKEVTRMVVVMVLAFC ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------ > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWS-LQRLAGRHPQDSYEDSTQ-S---------------------SIFTYTNSN--- --------------------------------STRGPFEGPNY----------------- -------HIAPRW---------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLN WILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLA IISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWPH GLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEKEVTRMVVVMVLAFC FCWGPYAFFACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL QLF-------GKK-VDDGS--ELSSASK-TEVSSV---SSVSPA---------------- ------------------------------------------------------------ ------------------------ > 10== Z68193 1 human Red Opsin <>[] MAQQWS-LQRLAGRHPQDSYEDSTQ-S---------------------SIFTYTNSN--- --------------------------------STRGPFEGPNY----------------- -------HIAPRW---------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLN WILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLA IISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIF-GWS-----RYWPH GLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEKEVTRMVVVMIFAYC VCWGPYTFFACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL QLF-------GKK-VDDGS--ELSSASK-TEVSSV---SSVSPA---------------- ------------------------------------------------------------ ------------------------ > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD--DDTTR-G---------------------SVFTYTNTN--- --------------------------------NTRGPFEGPNY----------------- -------HIAPRW---------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLN WILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLA IISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIF-GWS-----RYWPH GLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEREVSRMVVVMIVAFC ICWGPYASFVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM QLF-------GKK-VDDGS--EASTTSR-TEVSSVS-NSSVAPA---------------- ------------------------------------------------------------ ------------------------ > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWE-AAFAARRRHEE--EDTTR-D---------------------SVFTYTNSN--- --------------------------------NTRGPFEGPNY----------------- -------HIAPRW---------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLN WILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLA IISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIF-GWS-----RYWPH GLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------STQKAEKEVSRMVVVMIVAYC FCWGPYTFFACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL QLF-------GKK-VDDGS--EVST-SR-TEVSSVS-NSSVSPA---------------- ------------------------------------------------------------ ------------------------ > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] ------------------------M-S---------------------SNSSQAPPN--- --------------------------------GTPGPFDGPQW----------------- ------PYQAPQS---------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLN YILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLA ILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLL-GWS-----SYVPE GLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------TTQRAEREVTRMVIVMVMAFL LCWLPYSTFALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL EMLCCG--YQPQR-TGKAS--PGTPGPH-ADVTAAGLRNKVMPAHP----V--------- ------------------------------------------------------------ ------------------------ > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] --------------------MESGNVS---------------------SSLF-------- --------------------------------GNVSTALRPEA--RLSA---ETRLLGWN VPPEELRHIPEHWLTYPEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSN ILVINLAFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNA FIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPE GYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQA KKMNVESL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RSNVDKNKETAEIRIAKAAITICFLFF CSWTPYGVMSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQ KRCPWL--ALNEK-APESS--AVASTST-TQEPQQT---------------TAA------ ------------------------------------------------------------ ------------------------ > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 --------------------MEYHNVS---------------------SVL--------- --------------------------------GNVSSVLRPDA--RLSA---ESRLLGWN VPPDELRHIPEHWLIYPEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSN ILVINLAFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNA FIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPE GYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQA KKMNVDSL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RSNVDKSKEAAEIRIAKAAITICFLFF ASWTPYGVMSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQ KRCPWL--AISEK-APESR--AAISTST-TQEQQQT---------------TAA------ ------------------------------------------------------------ ------------------------ > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] --------------------MEPLC----------------------------------- --------------------------------NASEPPLRPEA--R-SSGNGDLQFLGWN VPPDQIQYIPEHWLTQLEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSN MFVLNLAVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNA AIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPE GYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQA KKMNVESL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RSNVDKSKETAEIRIAKAAITICFLFF VSWTPYGVMSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQ KRCPWL--GVNEK-SGEIS--SAQSTTT-QEQQQTT---------------AA------- ------------------------------------------------------------ ------------------------ > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 --------------------MDALC----------------------------------- --------------------------------NASEPPLRPEA--RMSSGSDELQFLGWN VPPDQIQYIPEHWLTQLEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSN MFVLNLAVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNA AIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPE GYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQA KKMNVESL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RSNVDKSKETAEIRIAKAAITICFLFF VSWTPYGVMSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQ KRCPWL--GVNEK-SGEAS--SAQSTTT-QEQTQQT---------------SAA------ ------------------------------------------------------------ ------------------------ > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------MTNAT-G---------------------PQMAY-----YG --------------------------------AASMDFGYPE---GVSIVD--------F VRPEIKPYVHQHWYNYPPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPAN ILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLC MISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFF-GWG-----NYILE GILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQA KKMNVSTL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-NEADAQRAEIRIAKTALVNVSLWF ICWTPYALISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAIT QHLPWF--CVHET-ETKSN--DDSQSNS-TVAQDKA------------------------ ------------------------------------------------------------ ------------------------ > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------MANVT-G---------------------PQMAF-----YG --------------------------------SGAATFGYPE---GMTVAD--------F VPDRVKHMVLDHWYNYPPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPAN MLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLC MISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFF-GWG-----SYTLE GILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQA KKMNVTNL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-NEAETQRAEIRIAKTALVNVSLWF ICWTPYAAITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAIT QHLPWF--CVHEK-DPNDV--EENQSSN-TQTQEKS------------------------ ------------------------------------------------------------ ------------------------ > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] MES--------------FAVAAAQL-G---------------------PHFA-------- ----------------------------------PLS--------NGSVVD--------K VTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPAN LLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMC MISLDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPE GNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQA KKMNVKSL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-SEDAEKSAEGKLAKVALVTITLWF MAWTPYLVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK EKCPCC--VFGKV-DDGKS--SDAQSQA-TASEAES------KA---------------- ------------------------------------------------------------ ------------------------ > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] MES--------------FAVAAAQL-G---------------------PHFA-------- ----------------------------------PLS--------NGSVVD--------K VTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPAN LLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMC MISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAF-GWS-----RYVPE GNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQA KKMNVKSL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-SEDAEKSAEGKLAKVALVTITLWF MAWTPYLVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK EKCPCC--VFGKV-DDGKS--SDAQSQA-TASEAES------KA---------------- ------------------------------------------------------------ ------------------------ > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MDS--------------FAAVATQL-G---------------------PQFA-------- ----------------------------------APS--------NGSVVD--------K VTPDMAHLISPYWDQFPAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPAN LLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMC MISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVF-GWS-----RYVPE GNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQA KKMNVKSL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-SEDADKSAEGKLAKVALVTISLWF MAWTPYLVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK EKCPCC--VFGKV-DDGKS--SEAQSQA-TTSEAES------KA---------------- ------------------------------------------------------------ ------------------------ > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHL---------PETPFDLAHS-G---------------------PRFQ-------- ----------------------------------AQSSG------NGSVLD--------N VLPDMAHLVNPYWSRFAPMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPAN LLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMC MIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLI-GWS-----AYVPE GNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQA KKMNVKSL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-SEDCDKSAEGKLAKVALTTISLWF MAWTPYLVICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLK EKCPMC--VFGNT-DEPKP--DAPASDTETTSEADS------KA---------------- ------------------------------------------------------------ ------------------------ > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLL---------PEPPLAMALL-G---------------------PRFE-------- ----------------------------------AQTGG------NRSVLD--------N VLPDMAPLVNPHWSRFAPMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPAN LLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMC MIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLI-GWS-----SYVPE GNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQA KKMNVKSL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-SEDCDKSAENKLAKVALTTISLWF MAWTPYLIICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLK EKCPMC--VCGTT-DEPKP--DAPPSDTETTSEAES------KD---------------- ------------------------------------------------------------ ------------------------ > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------MIAVS-G---------------------PSYE-------- ----------------------------------AFSYGGQARFNNQTVVD--------K VPPDMLHLIDANWYQYPPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSN LFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMT MIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMF-GWN-----RYVPE GNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQA KKMNVASL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RS-SENQNTSAECKLAKVALMTISLWF MAWTPYLVINFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALF AKFPSL--AC-AA-EPSSD--AVSTTSG-TTTVTDN------EKS------NA------- ------------------------------------------------------------ ------------------------ > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] ----------------------MAN-Q---------------------LSYS-------- ----------------------------------SLGWPYQP---NASVVD--------T MPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTN LLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMV MITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFF-GWS-----RYVPE GNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQA KKMNVASL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RANADQQKQSAECRLAKVAMMTVGLWF MAWTPYLIISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALY QRFPSL--ACGSG-ESGSD--VKSEASA-TTTMEEK------PKI------PEA------ ------------------------------------------------------------ ------------------------ > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ----------------------MVE-S---------------------TTLV-------- ----------------------------------NQTWWY-----NPTV----------- -------DIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPAN MFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMA MISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVF-NWG-----AYVPE GILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMA KRLNAKEL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RK-AQ-AGASAEMKLAKISMVIITQFM LSWSPYAIIALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQ TTFPWLLTCCQFD-EKECE--DANDAEE-EVVASER----GGESR------DAAQMKEMM AMMQKMQAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQ GAPPQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] -----------------------MG-R---------------------DLRD-------- ----------------------------------NETWWY-----NPSI----------- -------VVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPAN MFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMA MISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIF-GWG-----AYTLE GVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMA KRLNAKEL---------------------------------------------------- ------------------------------------------------------------ ---------------------------------RK-AQ-AGANAEMRLAKISIVIVSQFL LSWSPYAVVALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAIS QTFPWVLTCCQFD-DKETE--DDKDAET-EIPAGES--SDAAPSA------DAAQMKEMM AMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQ GAPP------AAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ ---------------------MPHLLSGFLEVTASPA---PTW----------------D APPDNVSGCGEQI----NYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSN YLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLC VISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVND DKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG --------------FPRVQPESVISLNG-------------------------------- ------------------------------VVKLQKE-------------------VEEC AN------------------LSRLLKH------ER-KNISIFKREQKAATTLGIIVGAFT VCWLPFFLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSR SLLQCQ--YRNIN-RKLSA----AGMHE-ALKLAER------PERSEFVLQNSDHCGK-- ------------------------------------------------------------ -------------------KGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ ---------------------MPHLLSGFLEVTASPA---PTW----------------D APPDNVSGCGEQI----NYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSN YLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLC VISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVND DKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG --------------FPRVQPESVISLNG-------------------------------- ------------------------------VVKLQKE-------------------VEEC AN------------------LSRLLKH------ER-KNISIFKREQKAATTLGIIVGAFT VCWLPFFLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYR SLLQCQ--YRNIN-RKLSA----AGMHE-ALKLAER------PERSEFVLQNSDHCGK-- ------------------------------------------------------------ -------------------KGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] --------------------MDVLSPG-------------------------------QG --------------------------N---NTTSPPA---PFE----------------- -TGGNTTGIS-------DVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVAN YLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLC AIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSD PDACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKV EKTGADTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALCA--------- ------NGAVRQGD---------------------------------DGAALEVIEVHRV GNSKEHLPLPSEAG--PTPCAPASFERKNERNAEA-KRKMALARERKTVKTLGIIMGTFI LCWLPFFIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK KIIKCK--FCRQ------------------------------------------------ ------------------------------------------------------------ ------------------------ > 33=p A35181 serotonin receptor class 1A - rat --------------------MDVFSFG-------------------------------QG --------------------------N---NTTASQE---PFG----------------- -TGGNVTSIS-------DVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVAN YLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLC AIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSD PDACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKV EKKGAGTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPCT--------- ------NGAVRQGD---------------------------------DEATLEVIEVHRV GNSKEHLPLPSESG--SNSYAPACLERKNERNAEA-KRKMALARERKTVKTLGIIMGTFI LCWLPFFIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK KIIKCK--FCRR------------------------------------------------ ------------------------------------------------------------ ------------------------ > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] --------------------MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG LVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---------- -TDDRYWSLT-------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVAN YLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLV AIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDK TGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQM TKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNR AKKLPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQC NNGKK-------------------ISSNDTPYSRT-REKLELKRERKAARTLAIITGAFL ICWLPFFIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ KILFGK--YRRGH----------------------------------------------- ------------------------------------------------------------ -------------------R---- > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail --------------------MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG LVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---------- -TDDRYWSLT-------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVAN YLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLV AIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDK TGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQM TKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNR AKKLPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQC NNGKK-------------------ISSNDTPYSRT-REKLELKRERKAARTLAIITGAFL ICWLPFFIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ KILFGK--YRRGH----------------------------------------------- ------------------------------------------------------------ -------------------R---- > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi --------------------MEGAE-GQEELD-------------WEALYLRLP------ -------------------------LQ---NCSWNSTGWEPNW--NVTVV---------- -PNTTWWQASAPFDT--PAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAAN NLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLV AIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVSE DLRCVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGAT ARGGVG--------PPPV------------------------------------------ ----PAGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSC ANGLEADPPTTGYGAVAAAYYPSLVRR------KP-KEAADSKRERKAAKTLAIITGAFV ACWLPFFVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ RLLCGR--RVRRR----------------------------------------------- ------------------------------------------------------------ -------------------RAPQ- mafft-7.123-without-extensions/test/sample.fftns20000640000076500007650000007077012226665654021142 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] -------------------MNGTE------------------------GDNF-------- YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK----- ----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F TVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FR FGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNN ESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------ ---------------------------------------------------AAAAQQ--- ------------------------------------------------------------ --------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT- HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC---------GKN PLGD-DE--SGASTSKTEVSSVS-TSPV-------------------------------- --------------------------------------------SPA------------- ------ > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] -------------------MNGTE------------------------GPNF-------- YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ----- ----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F TTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FR FGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNN ESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------ ---------------------------------------------------AAAQQQ--- ------------------------------------------------------------ --------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT- HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC---------GKN PLGD-DE--ASATASKTE------TSQV-------------------------------- --------------------------------------------APA------------- ------ > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 -------------------MNGTE------------------------GINF-------- YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK----- ----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F TVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FR FSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHN ESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE------------------------ ---------------------------------------------------AAAQQQ--- ------------------------------------------------------------ --------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT- NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC---------GKN PFGD-EDVSSTVSQSKTEVSSVS-SSQV-------------------------------- --------------------------------------------SPA------------- ------ > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish -------------------MNGTE------------------------GKNF-------- YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ----- ----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F TVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK FSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNN ESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA------------------------ ---------------------------------------------------AAAQQQ--- ------------------------------------------------------------ --------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF- NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC---------GKN PLGD-DE-SSTVSTSKTEVSS------V-------------------------------- --------------------------------------------SPA------------- ------ > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish -------------------MNGTE------------------------GNNF-------- YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ----- ----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F TVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK FSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNN ESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA------------------------ ---------------------------------------------------AAAQQQ--- ------------------------------------------------------------ --------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF- NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC---------GKN PLGD-EE-SSTVSTSKTEVSS------V-------------------------------- --------------------------------------------SPA------------- ------ > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- -------------------MKQVPEF----------------------HEDF-------- YIPIPLDI-NNLS--AYSPF----------------LVPQD-------HLGNQGI----- ----FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-S PLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FT FKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNN ESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL------------------------ ---------------------------------------------------AAKAQA--- ------------------------------------------------------------ --------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS- HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC---------GKN -IEE-DE--ASTSSQVTQVSS------V-------------------------------- --------------------------------------------APEK------------ ------ > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] -------------------MRKMS------------------------EEEF-------- YL-----F-KNIS--SVGPW----------------DGPQY-------HIAPVWA----- ----FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-V FPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FR FSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRS ESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA------------------------ ---------------------------------------------------VAAQQQ--- ------------------------------------------------------------ --------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN- NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC---------GKA -MTD-ES--DTCSSQKTEVSTVS-STQV-------------------------------- --------------------------------------------GPN------------- ------ > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------------------------DLAETVIA-S TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------ ---------------------------------------------------VAKQQK--- ------------------------------------------------------------ --------------ESESTQK------AEKEVTRMVVVMVLAFC---------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------ > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] -------------------MAQQWSL----------QRLAGRHPQDSYEDST-------- QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV----- ----YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S TISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VR FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------ ---------------------------------------------------VAKQQK--- ------------------------------------------------------------ --------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA- NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK -VDD-GS--ELSSASKTEVSSV---SSV-------------------------------- --------------------------------------------SPA------------- ------ > 10== Z68193 1 human Red Opsin <>[] -------------------MAQQWSL----------QRLAGRHPQDSYEDST-------- QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV----- ----YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR FDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV QSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA------------------------ ---------------------------------------------------VAKQQK--- ------------------------------------------------------------ --------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA- NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK -VDD-GS--ELSSASKTEVSSV---SSV-------------------------------- --------------------------------------------SPA------------- ------ > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] -------------------MTEAWNV----------AVFAARRSRDD-DDTT-------- RGSV-FTY-TNTNN-TRGPF----------------EGPNY-------HIAPRWV----- ----YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-S TISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IK FDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGC QSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA------------------------ ---------------------------------------------------VAAQQK--- ------------------------------------------------------------ --------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA- NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF----------GKK -VDD-GS--EASTTSRTEVSSVS-NSSV-------------------------------- --------------------------------------------APA------------- ------ > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] -------------------MA-AWEA----------AFAARRRHEE--EDTT-------- RDSV-FTY-TNSNN-TRGPF----------------EGPNY-------HIAPRWV----- ----YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-S TISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IK FDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGV QSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA------------------------ ---------------------------------------------------VAAQQK--- ------------------------------------------------------------ --------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA- NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF----------GKK -VDD-GS--EVST-SRTEVSSVS-NSSV-------------------------------- --------------------------------------------SPA------------- ------ > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] -------------------MS---------------------------SNSS-------- QAP--------PNG-TPGPF----------------DGPQW------PYQAPQST----- ----YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-S SVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQ FQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN-- NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA------------------------ ---------------------------------------------------AAAQQK--- ------------------------------------------------------------ --------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT- HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY-----QPQR -TGK-AS--PGTPGPHADVTAAGLRNKV-------------------------------- --------------------------------------------MPAHP---V------- ------ > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] ----------MESGNVSSSLFGNVST----------ALRPEARL----SA---------- -ETRLLGW--------NVPP----------------EELR--------HIPEHWLTYPEP PESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--T PIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--K MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDT RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------ ---------------------------------------------------LRDQAKKM- --------------------------------NVESL----------------------- -----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF- GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL--------N EKAP-ES-SAVASTSTTQEP-QQ-TTAA-------------------------------- ------------------------------------------------------------ ------ > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 ----------MEYHNVSSVL-GNVSS----------VLRPDARL----SA---------- -ESRLLGW--------NVPP----------------DELR--------HIPEHWLIYPEP PESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--T PIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--K MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDT RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------ ---------------------------------------------------LRDQAKKM- --------------------------------NVDSL----------------------- -----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF- GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI--------S EKAP-ES-RAAISTSTTQEQ-QQ-TTAA-------------------------------- ------------------------------------------------------------ ------ > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ----------ME------PLCNASEP----------PLRPEAR-----SSGN-------- GDLQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP PASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A PIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT RLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA------------------------ ---------------------------------------------------LREQAKKM- --------------------------------NVESL----------------------- -----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF- GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV--------N EKSG-EI-SSAQST-TTQEQ-QQ-TTAA-------------------------------- ------------------------------------------------------------ ------ > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 ----------MD------ALCNASEP----------PLRPEARM----SSGS-------- DELQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP PASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A PIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT RLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA------------------------ ---------------------------------------------------LREQAKKM- --------------------------------NVESL----------------------- -----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF- GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV--------N EKSG-EA-SSAQST-TTQEQTQQ-TSAA-------------------------------- ------------------------------------------------------------ ------ > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 -------------------MTNATGP----------QMAYYGAA----SMD--------- -----FGYPEGVSIVDFVRP----------------EIKP--------YVHQHWYNYPPV NPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-V PFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PK LTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNT FSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA------------------------ ---------------------------------------------------MRAQAKKM- --------------------------------NVSTL----------------------- -----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM- GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HET ETKS-ND-DSQSNSTVAQDKA--------------------------------------- ------------------------------------------------------------ ------ > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 -------------------MANVTGP----------QMAFYGSG----AAT--------- -----FGYPEGMTVADFVPD----------------RVKH--------MVLDHWYNYPPV NPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-F PPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PK LTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNT ITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA------------------------ ---------------------------------------------------MRAQAKKM- --------------------------------NVTNL----------------------- -----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL- GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HEK DPND-VE-ENQSSNTQTQEKS--------------------------------------- ------------------------------------------------------------ ------ > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ----------ME---SFAVAAAQLGP----------HFAPLS------------------ ----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP MTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNP RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ ---------------------------------------------------MREQAKKM- --------------------------------NVKSL----------------------- -----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF- KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV DDGK-SS-DAQSQATASEAESKA------------------------------------- ------------------------------------------------------------ ------ > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ----------ME---SFAVAAAQLGP----------HFAPLS------------------ ----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP MTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ ---------------------------------------------------MREQAKKM- --------------------------------NVKSL----------------------- -----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF- KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV DDGK-SS-DAQSQATASEAESKA------------------------------------- ------------------------------------------------------------ ------ > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' ----------MD---SFAAVATQLGP----------QFAAPS------------------ ----------NGSVVDKVTP----------------DMAH--------LISPYWDQFPAM DPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP MTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ ---------------------------------------------------MREQAKKM- --------------------------------NVKSL----------------------- -----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF- KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV DDGK-SS-EAQSQATTSEAESKA------------------------------------- ------------------------------------------------------------ ------ > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] -----MERSHLP---ETPFDLAHSGP----------RFQAQSSG---------------- ----------NGSVLDNVLP----------------DMAH--------LVNPYWSRFAPM DPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S PVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP MTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNP RSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA------------------------ ---------------------------------------------------MREQAKKM- --------------------------------NVKSL----------------------- -----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF- KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GNT DEPKPDA-PASDTETTSEADSKA------------------------------------- ------------------------------------------------------------ ------ > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 -----MERSLLP---EPPLAMALLGP----------RFEAQTGG---------------- ----------NRSVLDNVLP----------------DMAP--------LVNPHWSRFAPM DPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S PVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP MTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNP RSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA------------------------ ---------------------------------------------------MRDQAKKM- --------------------------------NVKSL----------------------- -----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF- KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GTT DEPKPDA-PPSDTETTSEAESKD------------------------------------- ------------------------------------------------------------ ------ > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] -------------------MIAVSGP----------SYEAFSYG----GQA--------- ----RF---NNQTVVDKVPP----------------DMLH--------LIDANWYQYPPL NPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-S PPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KP LSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLS ASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN------------------------ ---------------------------------------------------MREQAKKM- --------------------------------NVASL----------------------- -----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF- NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------AA EPSS-DA-VSTTSGTTTVTDNEK-SNA--------------------------------- ------------------------------------------------------------ ------ > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] ---------------------MANQL----------SYSSLGWP----YQP--------- ----------NASVVDTMPK----------------EMLY--------MIHEHWYAFPPM NPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-M PTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-AP LTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSS ASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ------------------------ ---------------------------------------------------LREQAKKM- --------------------------------NVASL----------------------- -----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF- SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GSG ESGS-DV-KSEASATTTMEEKPK-IPEA-------------------------------- ------------------------------------------------------------ ------ > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ------------------------------------MVESTTLV----NQT--------- -----WWY--NPTVD----------------------------------IHPHWAKFDPI PDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGF PLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKK MSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPST RSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE------------------------ ---------------------------------------------------MAAMAKRL- --------------------------------NAKEL----------------------- -----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF- GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKEC EDAN-DA-EEEVVASER--GGES-RDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQG---VDNQAYQA ------ > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] -------------------------------------MGRDLRD----NET--------- -----WWY--NPSIV----------------------------------VHPHWREFDQV PDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGF PLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKK MSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTT RSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE------------------------ ---------------------------------------------------MAAMAKRL- --------------------------------NAKEL----------------------- -----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF- GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKET EDDK-DA-ETEIPAGESSDAAPS-ADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQG---VDNQAYQA ------ > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ -MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- -GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- ----------------------------------P--------GFPR----VQPES---- ---VISL-----------------NGVVKLQ--------KEVEECAN------------- -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR--------- -----NINRKLSAAGMHEALKLA------------------------------------- -------------------------------------------ERPERSEFVLQNSDHCG KKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ -MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- -GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- ----------------------------------P--------GFPR----VQPES---- ---VISL-----------------NGVVKLQ--------KEVEECAN------------- -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR--------- -----NINRKLSAAGMHEALKLA------------------------------------- -------------------------------------------ERPERSEFVLQNSDHCG KKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] ----------MD-------VLSPG------------QGNNTTSPPAPFETGG-------- ----------NTTGISDVTV---------------------------------------- --SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH--------- -GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------ ---------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR- --------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG- -PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF- CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------- -----RQ----------------------------------------------------- ------------------------------------------------------------ ------ > 33=p A35181 serotonin receptor class 1A - rat ----------MD-------VFSFG------------QGNNTTASQEPFGTGG-------- ----------NVTSISDVTF---------------------------------------- --SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH--------- -GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------ ---------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG- --------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG- -SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF- CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------- -----RR----------------------------------------------------- ------------------------------------------------------------ ------ > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG-------- ----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- -GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-- --------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS- ------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF- VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------- -----RGHR--------------------------------------------------- ------------------------------------------------------------ ------ > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG-------- ----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- -GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-- --------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS- ------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF- VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------- -----RGHR--------------------------------------------------- ------------------------------------------------------------ ------ > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi -MEGAEGQEELD-------WEAL-------YLRLP--LQNCSWNSTGWEPNW-------- ----------NVTVVPNTTW---------WQASAPFDTPAALVRAAAK------------ --------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M PLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHA STAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV--------- -GYQIFATASSFYVPVLIILILYWRIYQTARKRIR------------------------- --------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGI AAAVVAVIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGA VAAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT- CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV--------- -----RRRRA-------------------------------------------------- ---------------------------------------------PQ------------- ------ mafft-7.123-without-extensions/test/sample.parttree0000640000076500007650000007224412226665654021564 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MNG--------------------------------------------------------- -TE--GDNFYVPF-----------------SNKTGLARSPYEYPQ---------Y-YLAE PWK---------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANL FMVLFG-FTVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVI CKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDY YTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAAQQ---------------------------------ESASTQKAEKEVTRMVVLMVIG FLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM ITTLC----C---GKNPLGD-DE--SGASTSKTEV------------------------- --------------------------------------------------------SSVS -------TSPVSP-A---------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MNG--------------------------------------------------------- -TE--GPNFYVPF-----------------SNITGVVRSPFEQPQ---------Y-YLAE PWQ---------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADL FMVFGG-FTTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVV CKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDY YTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------ESATTQKAEKEVTRMVIIMVIF FLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM LTSLC----C---GKNPLGD-DE--ASATASKTE-------------------------- ------------------------------------------------------------ -------TSQVAP-A---------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MNG--------------------------------------------------------- -TE--GINFYVPM-----------------SNKTGVVRSPFEYPQ---------Y-YLAE PWK---------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADL FMACFG-FTVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVV CKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDY YTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------ESATTQKAEKEVTRMVILMVLG FMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM ITTIC----C---GKNPFGD-EDVSSTVSQSKTEV------------------------- --------------------------------------------------------SSVS -------SSQVSP-A---------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MNG--------------------------------------------------------- -TE--GKNFYVPM-----------------SNRTGLVRSPFEYPQ---------Y-YLAE PWQ---------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGT IMVCFG-FTVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV CKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDY YTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------DSASTQKAEREVTKMVILMVFG FLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM LTTIF----C---GKNPLGD-DE-SSTVSTSKTEV------------------------- --------------------------------------------------------SS-- ----------VSP-A---------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MNG--------------------------------------------------------- -TE--GNNFYVPL-----------------SNRTGLVRSPFEYPQ---------Y-YLAE PWQ---------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGA IMVCFG-FTVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV CKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDY YTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------DSASTQKAEREVTKMVILMVLG FLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCM LTTLF----C---GKNPLGD-EE-SSTVSTSKTEV------------------------- --------------------------------------------------------SS-- ----------VSP-A---------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MKQ--------------------------------------------------------- -VPEFHEDFYIPIPL-------------DINNLS--AYSPFLVPQ---------D-HLGN QGI---------FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANL FVAIFG-SPLSFYSF-FN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVI CKPLGN-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDW YTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL--------------- ------------------------------------------------------------ -----------------------------------------------------------A AKAQA---------------------------------DSASTQKAEREVTKMVVVMVLG FLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCM M-KMV----C---GKN-IEE-DE--ASTSSQVTQV------------------------- --------------------------------------------------------SS-- ----------VAPEK---------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MRK--------------------------------------------------------- -MS--EEEFYL------------------FKNISSV--GPWDGPQ---------Y-HIAP VWA---------FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGF LLCIFS-VFPVFVAS-CN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVI CKPFGN-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDW YTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AAQQQ---------------------------------ESATTQKAEREVSRMVVVMVGS FCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACI M-KMV----C---GKA-MTD-ES--DTCSSQKTEV------------------------- --------------------------------------------------------STVS -------STQVGP-N---------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------DL AETVIA-STISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVV CKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AKQQK---------------------------------ESESTQKAEKEVTRMVVVMVLA FC---------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLA---------------------------------------GRHPQDSYED STQ--SSIFTYTN-----------------SNST---RGPFEGPN---------Y-HIAP RWV---------YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADL AETVIA-STISVVNQ-VY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVV CKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AKQQK---------------------------------ESESTQKAEKEVTRMVVVMVLA FCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCI LQLF---------GKK-VDD-GS--ELSSASKTEV------------------------- --------------------------------------------------------SSV- --------SSVSP-A---------- > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLA---------------------------------------GRHPQDSYED STQ--SSIFTYTN-----------------SNST---RGPFEGPN---------Y-HIAP RWV---------YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADL AETVIA-STISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVV CKPFGN-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AKQQK---------------------------------ESESTQKAEKEVTRMVVVMIFA YCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCI LQLF---------GKK-VDD-GS--ELSSASKTEV------------------------- --------------------------------------------------------SSV- --------SSVSP-A---------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFA---------------------------------------ARRSRDD-DD TTR--GSVFTYTN-----------------TNNT---RGPFEGPN---------Y-HIAP RWV---------YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDL VETLVA-STISVFNQ-IF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVV CKPFGN-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AAQQK---------------------------------ESESTQKAEREVSRMVVVMIVA FCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCI MQLF---------GKK-VDD-GS--EASTTSRTEV------------------------- --------------------------------------------------------SSVS -------NSSVAP-A---------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MAA-WEAAFAA---------------------------------------RRRHEE--ED TTR--DSVFTYTN-----------------SNNT---RGPFEGPN---------Y-HIAP RWV---------YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADL GETVIA-STISVINQ-IS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVV CKPFGN-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AAQQK---------------------------------ESESTQKAEKEVSRMVVVMIVA YCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCI LQLF---------GKK-VDD-GS--EVST-SRTEV------------------------- --------------------------------------------------------SSVS -------NSSVSP-A---------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS--------------------------------------------------------SN SSQ--AP-----------------------PNGT---PGPFDGPQ---------WPYQAP QST---------YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADL LVTLCG-SSVSLSNN-IN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVV CKPLGD-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNW YTGGSNN--NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQK---------------------------------EADTTQRAEREVTRMVIVMVMA FLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCL LEMLC----CGYQPQR-TGK-AS--PGTPGPHADV------------------------- --------------------------------------------------------TAAG ------LRNKVMP-AH-------PV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNVS-----------------------------------------------SSLFGN VSTALRPEARLSA---ETRLL----GW----------NVPPEELR-----------HIPE HWLTYPEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDF MMMVK--TPIFIYNS-FH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVI TRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDY LT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L RDQAKK--------------------MNVESL---RSNVDKNKETAEIRIAKAAITICFL FFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMEL QKRCP----WLALNEKAPE--SS-AVASTSTTQEP------------------------- ------------------------------------------------------------ --------QQTTA-A---------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNVS-----------------------------------------------SVL-GN VSSVLRPDARLSA---ESRLL----GW----------NVPPDELR-----------HIPE HWLIYPEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDF MMMIK--TPIFIYNS-FH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVI TRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDY LT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L RDQAKK--------------------MNVDSL---RSNVDKSKEAAEIRIAKAAITICFL FFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMEL QKRCP----WLAISEKAPE--SR-AAISTSTTQEQ------------------------- ------------------------------------------------------------ --------QQTTA-A---------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME-----------------------------------------------------PLCNA SEPPLRPEAR-SSGNGDLQFL----GW----------NVPPDQIQ-----------YIPE HWLTQLEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL IMCLK--APIF--NS-FH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVI TKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDY LS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L REQAKK--------------------MNVESL---RSNVDKSKETAEIRIAKAAITICFL FFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLEL QKRCP----WLGVNEKSGE--IS-SAQST-TTQEQ------------------------- ------------------------------------------------------------ --------QQTTA-A---------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD-----------------------------------------------------ALCNA SEPPLRPEARMSSGSDELQFL----GW----------NVPPDQIQ-----------YIPE HWLTQLEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL IMCLK--APIFIYNS-FH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVI TKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDY LS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L REQAKK--------------------MNVESL---RSNVDKSKETAEIRIAKAAITICFL FFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMEL QKRCP----WLGVNEKSGE--AS-SAQST-TTQEQ------------------------- ------------------------------------------------------------ -------TQQTSA-A---------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT---------------------------------------------------------- --NATGPQMAYYGAASMD-FG-------YPEGVSIVDFVRPEIKP-----------YVHQ HWYNYPPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDL IMLTTN-VPFFTYNC-FSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNII CNGFNG-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDY LT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA--------------- ------------------------------------------------------------ -----------------------------------------------------------M RAQAKK--------------------MNVSTL---RS-NEADAQRAEIRIAKTALVNVSL WFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAI TQHLP----WFCVHETETKS-ND-DSQSNSTVAQ-------------------------- ------------------------------------------------------------ -----------DK-A---------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA---------------------------------------------------------- --NVTGPQMAFYGSGAAT-FG-------YPEGMTVADFVPDRVKH-----------MVLD HWYNYPPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDL IMLTTN-FPPFCYNC-FSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNII CNGFNG-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDY FT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA--------------- ------------------------------------------------------------ -----------------------------------------------------------M RAQAKK--------------------MNVTNL---RS-NEAETQRAEIRIAKTALVNVSL WFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAI TQHLP----WFCVHEKDPND-VE-ENQSSNTQTQ-------------------------- ------------------------------------------------------------ -----------EK-S---------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ME------------------------------------------------------SFAV AAAQLGPHFAPLS------------------NGSVVDKVTPDMAH-----------LISP YWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITN-TPMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVI VKGMAG-RPMTIPLALGKM---------------------------YVPEGNLTSCGIDY LE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTITL WFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------------- --------------------------------------------------------E--- -----------SK-A---------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ME------------------------------------------------------SFAV AAAQLGPHFAPLS------------------NGSVVDKVTPDMAH-----------LISP YWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITN-TPMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVI VKGMAG-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDY LE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTITL WFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------------- --------------------------------------------------------E--- -----------SK-A---------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MD------------------------------------------------------SFAA VATQLGPQFAAPS------------------NGSVVDKVTPDMAH-----------LISP YWDQFPAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITN-TPMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVI VKGMAG-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDY LE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDADKSAEGKLAKVALVTISL WFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCP----CCVFGKVDDGK-SS-EAQSQATTSEA------------------------- --------------------------------------------------------E--- -----------SK-A---------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHL-------------------------------------------------PETPF DLAHSGPRFQAQSSG----------------NGSVLDNVLPDMAH-----------LVNP YWSRFAPMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDF CMMASQ-SPVMIINF-YY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVI VKGING-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDY MT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDCDKSAEGKLAKVALTTISL WFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVL KEKCP----MCVFGNTDEPKPDA-PASDTETTSEA------------------------- --------------------------------------------------------D--- -----------SK-A---------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLL-------------------------------------------------PEPPL AMALLGPRFEAQTGG----------------NRSVLDNVLPDMAP-----------LVNP HWSRFAPMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDF CMMASQ-SPVMIINF-YY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVI VKGING-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDY MT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M RDQAKK--------------------MNVKSL---RS-SEDCDKSAENKLAKVALTTISL WFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVL KEKCP----MCVCGTTDEPKPDA-PPSDTETTSEA------------------------- --------------------------------------------------------E--- -----------SK-D---------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI---------------------------------------------------------- --AVSGPSYEAFSYGGQARF----------NNQTVVDKVPPDMLH-----------LIDA NWYQYPPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNF LMMFCM-SPPMVINC-YY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVI VKGLSG-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDY FN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVASL---RS-SENQNTSAECKLAKVALMTISL WFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAAL FAKFP----SLAC-AAEPSS-DA-VSTTSGTTTVT------------------------- --------------------------------------------------------DNEK -----------SN-A---------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] M----------------------------------------------------------- ---ANQLSYSSLGWPYQP-------------NASVVDTMPKEMLY-----------MIHE HWYAFPPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDF CMMAFM-MPTMTSNC-FA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVI VRGMAA-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDY LT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ--------------- ------------------------------------------------------------ -----------------------------------------------------------L REQAKK--------------------MNVASL---RANADQQKQSAECRLAKVAMMTVGL WFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAAL YQRFP----SLACGSGESGS-DV-KSEASATTTME------------------------- --------------------------------------------------------EKPK ----------IPE-A---------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MVES-------------------------------------------------------- -------------------TTLVNQTWWY--NPTVD---------------------IHP HWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDL SFSAINGFPLKTISA-FM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVI GRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDY LS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE--------------- ------------------------------------------------------------ -----------------------------------------------------------M AAMAKR--------------------LNAKEL---R--KAQAGASAEMKLAKISMVIITQ FMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAI QTTFPWLLTCCQFDEKECED-AN-DAEEEVVASER--GGESRDAAQMKEMMAMMQKMQAQ QAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQ GAPPQGVDNQAYQ-A---------- > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGRD-------------------------------------------------------- -------------------LR-DNETWWY--NPSIV---------------------VHP HWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDF TFSLVNGFPLMTISC-FL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVI GRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDY IS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE--------------- ------------------------------------------------------------ -----------------------------------------------------------M AAMAKR--------------------LNAKEL---R--KAQAGANAEMRLAKISIVIVSQ FLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAI SQTFPWVLTCCQFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQ QAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPP AAPPQGVDNQAYQ-A---------- > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M--------------------------------------------------PHLLSGFLE VTASPAPTW----------------------------DAPPDNVS--GCGEQINY----- --------GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADL SVAVAV-MPFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGI TRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS- -------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF------------ -----PGF---------------------------------PRVQPESVI---------- ------------SLNGVVK----------------------LQKEVEECAN--------L SRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIVGA FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTS RSLLQ--------CQYRNIN----RKLSAAGMHEA-------------------LKLA-- --------------------------------------------------------ERPE ------RSEFVLQNSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M--------------------------------------------------PHLLSGFLE VTASPAPTW----------------------------DAPPDNVS--GCGEQINY----- --------GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADL SVAVAV-MPFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGI TRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS- -------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF------------ -----PGF---------------------------------PRVQPESVI---------- ------------SLNGVVK----------------------LQKEVEECAN--------L SRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIVGA FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTY RSLLQ--------CQYRNIN----RKLSAAGMHEA-------------------LKLA-- --------------------------------------------------------ERPE ------RSEFVLQNSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] M-DVLSPGQ--------GNNTTSPPAPFETGGNTTGI----------------------- --SDVTVSY--------------------------------------------------- ------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL MVSVLV-LPMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS- -------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK--------------- ------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWR LGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP----- ASFERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT FILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF KKIIK--------CKFCR------------------------------------------ ------------------------------------------------------------ ------------------------Q > 33=p A35181 serotonin receptor class 1A - rat M-DVFSFGQ--------GNNTTASQEPFGTGGNVTSI----------------------- --SDVTFSY--------------------------------------------------- ------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL MVSVLV-LPMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS- -------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK--------------- ------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWR RCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP----- ACLERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT FILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF KKIIK--------CKFCR------------------------------------------ ------------------------------------------------------------ ------------------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L VTSDFNDSYGLTG-----QFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY----- --------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL MVAVLV-MPLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS- -------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------ ----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF QKILF--------GKYRRG----------------------------------------- ------------------------------------------------------------ -----------------------HR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L VTSDFNDSYGLTG-----QFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY----- --------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL MVAVLV-MPLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS- -------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------ ----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF QKILF--------GKYRRG----------------------------------------- ------------------------------------------------------------ -----------------------HR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi M-EGAE-GQEELD----WEALYLRLPLQNCSWNSTGWEPNW------------------N VTVVPNTTW---------------------WQASAPFDTPAALVR--------------- ------------AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADL LVACLV-MPLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAV TN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RCVVS- -------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR--------------- ------------------------------RRGATARGGVGPPPVP-------------- -------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSSNN TSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKTLAIITGA FVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF QRLLC--------GRRVRRR----R----------------------------------- ------------------------------------------------------------ ----------------------APQ mafft-7.123-without-extensions/test/sample.dpparttree0000640000076500007650000007160412226665654022107 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MNG--------------------------------------------------------- -TE--GDNFYVPF-----------------SNKTGLARSPYEYPQ---------Y-YLAE PWK---------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANL FMVLFGF-TVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVI CKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDY YTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAAQQ---------------------------------ESASTQKAEKEVTRMVVLMVIG FLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM ITTLCC-------GKNPLGD-DE---SGASTSKT------------------------EV SSVS-------------------------------------------------------- ------TSPVSP-A--- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MNG--------------------------------------------------------- -TE--GPNFYVPF-----------------SNITGVVRSPFEQPQ---------Y-YLAE PWQ---------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADL FMVFGGF-TTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVV CKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDY YTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------ESATTQKAEKEVTRMVIIMVIF FLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM LTSLCC-------GKNPLGD-DE---ASATASKT------------------------E- ------------------------------------------------------------ ------TSQVAP-A--- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MNG--------------------------------------------------------- -TE--GINFYVPM-----------------SNKTGVVRSPFEYPQ---------Y-YLAE PWK---------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADL FMACFGF-TVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVV CKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDY YTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------ESATTQKAEKEVTRMVILMVLG FMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM ITTICC-------GKNPFGD-EDV-SSTVSQSKT------------------------EV SSVS-------------------------------------------------------- ------SSQVSP-A--- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MNG--------------------------------------------------------- -TE--GKNFYVPM-----------------SNRTGLVRSPFEYPQ---------Y-YLAE PWQ---------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGT IMVCFGF-TVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV CKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDY YTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------DSASTQKAEREVTKMVILMVFG FLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM LTTIFC-------GKNPLGD-DE--SSTVSTSKT------------------------EV SS---------------------------------------------------------- ---------VSP-A--- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MNG--------------------------------------------------------- -TE--GNNFYVPL-----------------SNRTGLVRSPFEYPQ---------Y-YLAE PWQ---------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGA IMVCFGF-TVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV CKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDY YTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQQ---------------------------------DSASTQKAEREVTKMVILMVLG FLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCM LTTLFC-------GKNPLGD-EE--SSTVSTSKT------------------------EV SS---------------------------------------------------------- ---------VSP-A--- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MKQ--------------------------------------------------------- -VPEFHEDFYIPIP-------------LDINNLS--AYSPFLVPQ---------D-HLGN QGI---------FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANL FVAIFGS-PLSFYSF-FN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVI CKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDW YTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL--------------- ------------------------------------------------------------ -----------------------------------------------------------A AKAQA---------------------------------DSASTQKAEREVTKMVVVMVLG FLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCM M-KMVC-------GKN-IEE-DE---ASTSSQVT------------------------QV SS---------------------------------------------------------- ---------VAPEK--- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MRK--------------------------------------------------------- -MS--EEEFYL------------------FKNISSV--GPWDGPQ---------Y-HIAP VWA---------FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGF LLCIFSV-FPVFVAS-CN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVI CKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDW YTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AAQQQ---------------------------------ESATTQKAEREVSRMVVVMVGS FCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACI M-KMVC-------GKA-MTD-ES---DTCSSQKT------------------------EV STVS-------------------------------------------------------- ------STQVGP-N--- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------DL AETVIAS-TISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVV CKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AKQQK---------------------------------ESESTQKAEKEVTRMVVVMVLA FC---------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ----------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLA---------------------------------------GRHPQDSYED STQ--SSIFTYTN-----------------SNST---RGPFEGPN---------Y-HIAP RWV---------YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADL AETVIAS-TISVVNQ-VY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVV CKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AKQQK---------------------------------ESESTQKAEKEVTRMVVVMVLA FCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCI LQLF---------GKK-VDD-GS---ELSSASKT------------------------EV SSV--------------------------------------------------------- -------SSVSP-A--- > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLA---------------------------------------GRHPQDSYED STQ--SSIFTYTN-----------------SNST---RGPFEGPN---------Y-HIAP RWV---------YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADL AETVIAS-TISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVV CKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AKQQK---------------------------------ESESTQKAEKEVTRMVVVMIFA YCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCI LQLF---------GKK-VDD-GS---ELSSASKT------------------------EV SSV--------------------------------------------------------- -------SSVSP-A--- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFA---------------------------------------ARRSRDD-DD TTR--GSVFTYTN-----------------TNNT---RGPFEGPN---------Y-HIAP RWV---------YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDL VETLVAS-TISVFNQ-IF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVV CKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AAQQK---------------------------------ESESTQKAEREVSRMVVVMIVA FCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCI MQLF---------GKK-VDD-GS---EASTTSRT------------------------EV SSVS-------------------------------------------------------- ------NSSVAP-A--- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MAA-WEAAFAA---------------------------------------RRRHEE--ED TTR--DSVFTYTN-----------------SNNT---RGPFEGPN---------Y-HIAP RWV---------YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADL GETVIAS-TISVINQ-IS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVV CKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDV FSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------V AAQQK---------------------------------ESESTQKAEKEVSRMVVVMIVA YCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCI LQLF---------GKK-VDD-GS---EVST-SRT------------------------EV SSVS-------------------------------------------------------- ------NSSVSP-A--- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS--------------------------------------------------------SN SSQ--AP-----------------------PNGT---PGPFDGPQ---------WPYQAP QST---------YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADL LVTLCGS-SVSLSNN-IN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVV CKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNW YTGGSNN--NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA--------------- ------------------------------------------------------------ -----------------------------------------------------------A AAQQK---------------------------------EADTTQRAEREVTRMVIVMVMA FLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCL LEMLCC----GYQPQR-TGK-AS---PGTPGPHA------------------------DV TAAG-------------------------------------------------------- -----LRNKVMP-AHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNV-----------------------------------------------SSSLFGN VSTALRPEARLSA---------ETRLLGW--------NVPPEELR-----------HIPE HWLTYPEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDF -MMMVKT-PIFIYNS-FH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVI TRPMEGK--MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDY LT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L RDQAKK--------------------MNVESL---RSNVDKNKETAEIRIAKAAITICFL FFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMEL QKRCPW----LALNEKAPES--SAVASTSTTQEP------------------------QQ T----------------------------------------------------------- ----------TA-A--- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNV-----------------------------------------------SSVL-GN VSSVLRPDARLSA---------ESRLLGW--------NVPPDELR-----------HIPE HWLIYPEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDF -MMMIKT-PIFIYNS-FH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVI TRPMEGK--MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDY LT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L RDQAKK--------------------MNVDSL---RSNVDKSKEAAEIRIAKAAITICFL FFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMEL QKRCPW----LAISEKAPES--RAAISTSTTQEQ------------------------QQ T----------------------------------------------------------- ----------TA-A--- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME-----------------------------------------------------PLCNA SEPPLRPEAR-SSG------NGDLQFLGW--------NVPPDQIQ-----------YIPE HWLTQLEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL -IMCLKA-PIF--NS-FH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVI TKPMNRN--MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDY LS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L REQAKK--------------------MNVESL---RSNVDKSKETAEIRIAKAAITICFL FFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLEL QKRCPW----LGVNEKSGEI--SSAQST-TTQEQ------------------------QQ T----------------------------------------------------------- ----------TA-A--- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD-----------------------------------------------------ALCNA SEPPLRPEARMSSG------SDELQFLGW--------NVPPDQIQ-----------YIPE HWLTQLEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL -IMCLKA-PIFIYNS-FH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVI TKPMNRN--MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDY LS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------L REQAKK--------------------MNVESL---RSNVDKSKETAEIRIAKAAITICFL FFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMEL QKRCPW----LGVNEKSGEA--SSAQST-TTQEQ-----------------------TQQ T----------------------------------------------------------- ----------SA-A--- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT---------------------------------------------------------- --NATGPQMAYYGA------ASMD-F-GYPEGVSIVDFVRPEIKP-----------YVHQ HWYNYPPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDL IMLTTNV-PFFTYNC-FSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNII CNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDY LT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA--------------- ------------------------------------------------------------ -----------------------------------------------------------M RAQAKK--------------------MNVSTL---RS-NEADAQRAEIRIAKTALVNVSL WFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAI TQHLPW----FCVHETETKS-NDDSQSNSTVAQ--------------------------- ------------------------------------------------------------ ----------DK-A--- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA---------------------------------------------------------- --NVTGPQMAFYGS------GAAT-F-GYPEGMTVADFVPDRVKH-----------MVLD HWYNYPPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDL IMLTTNF-PPFCYNC-FSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNII CNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDY FT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA--------------- ------------------------------------------------------------ -----------------------------------------------------------M RAQAKK--------------------MNVTNL---RS-NEAETQRAEIRIAKTALVNVSL WFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAI TQHLPW----FCVHEKDPND-VEENQSSNTQTQ--------------------------- ------------------------------------------------------------ ----------EK-S--- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ME------------------------------------------------------SFAV AAAQLGPHFAPLS------------------NGSVVDKVTPDMAH-----------LISP YWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITNT-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVI VKGMAGR-PMTIPLALGKM---------------------------YVPEGNLTSCGIDY LE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTITL WFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCPC----CVFGKVDDGK-SSDAQSQATASEA------------------------E- ------------------------------------------------------------ ----------SK-A--- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ME------------------------------------------------------SFAV AAAQLGPHFAPLS------------------NGSVVDKVTPDMAH-----------LISP YWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITNT-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVI VKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDY LE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTITL WFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCPC----CVFGKVDDGK-SSDAQSQATASEA------------------------E- ------------------------------------------------------------ ----------SK-A--- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MD------------------------------------------------------SFAA VATQLGPQFAAPS------------------NGSVVDKVTPDMAH-----------LISP YWDQFPAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITNT-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVI VKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDY LE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDADKSAEGKLAKVALVTISL WFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCPC----CVFGKVDDGK-SSEAQSQATTSEA------------------------E- ------------------------------------------------------------ ----------SK-A--- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHL-------------------------------------------------PETPF DLAHSGPRFQAQSS------G----------NGSVLDNVLPDMAH-----------LVNP YWSRFAPMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDF CMMASQS-PVMIINF-YY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVI VKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDY MT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVKSL---RS-SEDCDKSAEGKLAKVALTTISL WFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVL KEKCPM----CVFGNTDEPKPDAPASDTETTSEA------------------------D- ------------------------------------------------------------ ----------SK-A--- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLL-------------------------------------------------PEPPL AMALLGPRFEAQTG------G----------NRSVLDNVLPDMAP-----------LVNP HWSRFAPMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDF CMMASQS-PVMIINF-YY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVI VKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDY MT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA--------------- ------------------------------------------------------------ -----------------------------------------------------------M RDQAKK--------------------MNVKSL---RS-SEDCDKSAENKLAKVALTTISL WFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVL KEKCPM----CVCGTTDEPKPDAPPSDTETTSEA------------------------E- ------------------------------------------------------------ ----------SK-D--- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI---------------------------------------------------------- --AVSGPSYEAFSY------GGQARF----NNQTVVDKVPPDMLH-----------LIDA NWYQYPPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNF LMMFCMS-PPMVINC-YY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVI VKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDY FN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN--------------- ------------------------------------------------------------ -----------------------------------------------------------M REQAKK--------------------MNVASL---RS-SENQNTSAECKLAKVALMTISL WFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAAL FAKFPS----LAC-AAEPSS-DAVSTTSGTTTVT------------------------DN EK---------------------------------------------------------- ----------SN-A--- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] M----------------------------------------------------------- ---ANQLSYSSLGW------PYQP-------NASVVDTMPKEMLY-----------MIHE HWYAFPPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDF CMMAFMM-PTMTSNC-FA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVI VRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDY LT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ--------------- ------------------------------------------------------------ -----------------------------------------------------------L REQAKK--------------------MNVASL---RANADQQKQSAECRLAKVAMMTVGL WFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAAL YQRFPS----LACGSGESGS-DVKSEASATTTME------------------------EK PK---------------------------------------------------------- ---------IPE-A--- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MVESTTL--------------------------------------------VNQ------ ---------------------------TWWYNPTVD---------------------IHP HWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDL SFSAINGFPLKTISA-FM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVI GRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDY LS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE--------------- ------------------------------------------------------------ -----------------------------------------------------------M AAMAKR--------------------LNAKEL---RK--AQAGASAEMKLAKISMVIITQ FMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAI QTTFPWLLTCCQFDEKECED-ANDAEEEVVASER--GGESRDAAQMKEMMAMMQKMQAQQ AAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQG APPQGVDNQAYQ-A--- > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGRDLR---------------------------------------------DNE------ ---------------------------TWWYNPSIV---------------------VHP HWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDF TFSLVNGFPLMTISC-FL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVI GRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDY IS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE--------------- ------------------------------------------------------------ -----------------------------------------------------------M AAMAKR--------------------LNAKEL---RK--AQAGANAEMRLAKISIVIVSQ FLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAI SQTFPWVLTCCQFDDKETED-DKDAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQ AAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPA APPQGVDNQAYQ-A--- > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M--------------------------------------------------PHLLSGFLE VTASPAPTW----------------------------DAPPDNVS--GCGEQINY----- --------GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADL SVAVAVM-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGI TRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS- -------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF------------ -----PGF---------------------------------PRVQPESVI---------- ------------SLNGVVK----------------------LQKEVEECAN--------L SRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIVGA FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTS RSLLQC--QYRNINRK---------LSAAGMHEA---------------LKLAERP--ER SEFVLQ------------------------------------------------------ -----NSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M--------------------------------------------------PHLLSGFLE VTASPAPTW----------------------------DAPPDNVS--GCGEQINY----- --------GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADL SVAVAVM-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGI TRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS- -------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF------------ -----PGF---------------------------------PRVQPESVI---------- ------------SLNGVVK----------------------LQKEVEECAN--------L SRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIVGA FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTY RSLLQC--QYRNINRK---------LSAAGMHEA---------------LKLAERP--ER SEFVLQ------------------------------------------------------ -----NSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] M-DVLSPGQ--------GNNTTSPPAPFETGGNTTGI----------------------- --SDVTVSY--------------------------------------------------- ------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL MVSVLVL-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS- -------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK--------------- ------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWR LGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP----- ASFERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT FILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF KKIIKC--KFCR------------------------------------------------ ------------------------------------------------------------ ----------------Q > 33=p A35181 serotonin receptor class 1A - rat M-DVFSFGQ--------GNNTTASQEPFGTGGNVTSI----------------------- --SDVTFSY--------------------------------------------------- ------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL MVSVLVL-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS- -------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK--------------- ------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWR RCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP----- ACLERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT FILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF KKIIKC--KFCR------------------------------------------------ ------------------------------------------------------------ ----------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L VTSDFNDSYGLTGQFINGSHSSRSR-----DNASAN-DTSATNMTDDRYWSLTVY----- --------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL MVAVLVM-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS- -------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------ ----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF QKILFG--KYRRG----------------------------------------------- ------------------------------------------------------------ ---------------HR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L VTSDFNDSYGLTGQFINGSHSSRSR-----DNASAN-DTSATNMTDDRYWSLTVY----- --------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL MVAVLVM-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS- -------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------ ----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF QKILFG--KYRRG----------------------------------------------- ------------------------------------------------------------ ---------------HR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi M-EGAE-GQEELD----WEALYLRLPLQNCSWNSTGWEPNW------------------N VTVVPNTTW---------------------WQASAPFDTPAALVR--------------- ------------AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADL LVACLVM-PLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAV TN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RCVVS- -------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR--------------- ------------------------------RRGATARGGVGPPPVP-------------- -------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSSNN TSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKTLAIITGA FVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF QRLLCG--RRVRRRR--------------------------------------------- ------------------------------------------------------------ --------------APQ mafft-7.123-without-extensions/test/samplerna.xinsi0000640000076500007650000000357412226665654021571 0ustar katohkatoh>AJ006331.1_1230 c--------------------------------ca-------------------uggcgu uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcga--------------------- ----------------------------------------------------------gu >Z84287.1_1250 u-------------------------------ucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgaga--------------------- ----------------------------------------------------------gu >AF064490.1_2296 u----------------------------------------------------------- ------gagugucgaacagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg >Z84230.1_1250 u-------------------------------ucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgaga--------------------- ----------------------------------------------------------gu >AB049100.1_1360 auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg mafft-7.123-without-extensions/test/samplerna0000640000076500007650000000273612226665654020437 0ustar katohkatoh>AJ006331.1_1230 ccauggcguuaguau gagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagu >Z84287.1_1250 uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu >AF064490.1_2296 ugagu gucgaacagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg >Z84230.1_1250 uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu >AB049100.1_1360 auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg mafft-7.123-without-extensions/test/sample.fftnsi0000640000076500007650000007061412226665654021226 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] --------------------MNGTE--------------------------GDNF----- ---YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK-- -------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLF G-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN -FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPN FNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESASTQKAEKEVTRMVVLMVIGFLVCWVPYASV AFYIFTHQGS---DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC----- --GKNPLGD-DE--SGASTSKTEVSSVS--TSPV-------------------------- ----------------------------------------------SPA----------- --- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] --------------------MNGTE--------------------------GPNF----- ---YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ-- -------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFG G-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN -FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPE VNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESATTQKAEKEVTRMVIIMVIFFLICWLPYASV AMYIFTHQGS---NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC----- --GKNPLGD-DE--ASATASKTE-------TSQV-------------------------- ----------------------------------------------APA----------- --- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 --------------------MNGTE--------------------------GINF----- ---YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK-- -------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACF G-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN -FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPD YHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----REAAAQQQ--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESATTQKAEKEVTRMVILMVLGFMLAWTPYAVV AFWIFTNKGA---DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC----- --GKNPFGD-EDVSSTVSQSKTEVSSVS--SSQV-------------------------- ----------------------------------------------SPA----------- --- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish --------------------MNGTE--------------------------GKNF----- ---YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ-- -------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCF G-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS -FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPD YNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------DSASTQKAEREVTKMVILMVFGFLIAWTPYATV AAWIFFNKGA---DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC----- --GKNPLGD-DE-SSTVSTSKTEVSS-------V-------------------------- ----------------------------------------------SPA----------- --- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish --------------------MNGTE--------------------------GNNF----- ---YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ-- -------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCF G-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS -FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPE YNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------DSASTQKAEREVTKMVILMVLGFLVAWTPYATV AAWIFFNKGA---AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC----- --GKNPLGD-EE-SSTVSTSKTEVSS-------V-------------------------- ----------------------------------------------SPA----------- --- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- --------------------MKQVPEF------------------------HEDF----- ---YIPIPLDI-NNLS--AYSPF----------------LVPQD-------HLGNQGI-- -------FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIF G-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN -FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNK YNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KLAAKAQA--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------DSASTQKAEREVTKMVVVMVLGFLVCWAPYASF SLWIVSHRGE---EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC----- --GKN-IEE-DE--ASTSSQVTQVSS-------V-------------------------- ----------------------------------------------APEK---------- --- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] --------------------MRKMS--------------------------EEEF----- ---YL-----F-KNIS--SVGPW----------------DGPQY-------HIAPVWA-- -------FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIF S-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN -FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTK YRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESATTQKAEREVSRMVVVMVGSFCVCYVPYAAF AMYMVNNRNH---GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC----- --GKA-MTD-ES--DTCSSQKTEVSTVS--STQV-------------------------- ----------------------------------------------GPN----------- --- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------------------------DLAETVI A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN -VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSY PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESESTQKAEKEVTRMVVVMVLAFC--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] --------------------MAQQWSL------------QRLAGRHPQDSYEDST----- ---QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV-- -------YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI A-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN -VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSY PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFF ACFAAANPGY---PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF------ --GKK-VDD-GS--ELSSASKTEVSSV----SSV-------------------------- ----------------------------------------------SPA----------- --- > 10== Z68193 1 human Red Opsin <>[] --------------------MAQQWSL------------QRLAGRHPQDSYEDST----- ---QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV-- -------YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN -VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSY PGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFF ACFAAANPGY---AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF------ --GKK-VDD-GS--ELSSASKTEVSSV----SSV-------------------------- ----------------------------------------------SPA----------- --- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] --------------------MTEAWNV------------AVFAARRSRDD-DDTT----- ---RGSV-FTY-TNTNN-TRGPF----------------EGPNY-------HIAPRWV-- -------YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLV A-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN -IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVE LGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESESTQKAEREVSRMVVVMIVAFCICWGPYASF VSFAAANPGY---AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF------ --GKK-VDD-GS--EASTTSRTEVSSVS--NSSV-------------------------- ----------------------------------------------APA----------- --- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] --------------------MA-AWEA------------AFAARRRHEE--EDTT----- ---RDSV-FTY-TNSNN-TRGPF----------------EGPNY-------HIAPRWV-- -------YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVI A-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN -IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSD PGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RAVAAQQK--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------ESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFF ACFAAANPGY---AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF------ --GKK-VDD-GS--EVST-SRTEVSSVS--NSSV-------------------------- ----------------------------------------------SPA----------- --- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] --------------------MSSNSSQA-----------------------PPNG----- ------------------TPGPF----------------DGPQW------PYQAPQST-- -------YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLC G-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD -FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGS- -NNNSYILSLFVTCFVLPLSLILFSYTNLLLTL----RAAAAQQK--------------- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------EADTTQRAEREVTRMVIVMVMAFLLCWLPYSTF ALVVATHKGI---IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCGYQPQ RTGKA--------SPGTPGPHADVTAAGL-RNKV-------------------------- ----------------------------------------------MPAHPV-------- --- > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] -----MESGNVSS-----SLFGNVSTA-------------LRPEARL----SAET----- ------RLLGW--------NVPP----------------EELR--------HIPEHWLTY PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK --TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG --KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DN FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVESL---------------- ------------------RS------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGVM SLIGAFGDKT---LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL-- ----NEKAP-ES-SAVASTSTTQEPQQ---TTAA-------------------------- ------------------------------------------------------------ --- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 -----MEYHNVS------SVLGNVSSV-------------LRPDARL----SAES----- ------RLLGW--------NVPP----------------DELR--------HIPEHWLIY PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK --TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG --KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DN FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVDSL---------------- ------------------RS------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGVM SLIGAFGDKT---LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI-- ----SEKAP-ES-RAAISTSTTQEQQQ---TTAA-------------------------- ------------------------------------------------------------ --- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] -----MEP------------LCNASEP------------PLRPEAR--SSGNGDL----- ------QFLGW--------NVPP----------------DQIQ--------YIPEHWLTQ LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK --APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR --NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DN FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVESL---------------- ------------------RS------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM SLIGAFGDKS---LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV-- ----NEKSG-EI-SSAQSTTTQEQQQ----TTAA-------------------------- ------------------------------------------------------------ --- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 -----MDA------------LCNASEP------------PLRPEARM-SSGSDEL----- ------QFLGW--------NVPP----------------DQIQ--------YIPEHWLTQ LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK --APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR --NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DN FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVESL---------------- ------------------RS------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM SLIGAFGDKS---LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV-- ----NEKSG-EA-SSAQSTTTQEQTQQ---TSAA-------------------------- ------------------------------------------------------------ --- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------MTNATGP------------QMAYYGAA----SMD------ --------FGYPEGVSIVDFVRP----------------EIKP--------YVHQHWYNY PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTT N-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG -PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QD FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVSTL---------------- ------------------RS-------NEADAQRAEIRIAKTALVNVSLWFICWTPYALI SLKGVMGDTS---GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV-- --HETETKS-ND-DSQSNSTVAQDKA---------------------------------- ------------------------------------------------------------ --- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------MANVTGP------------QMAFYGSG----AAT------ --------FGYPEGMTVADFVPD----------------RVKH--------MVLDHWYNY PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTT N-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG -PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RD MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVTNL---------------- ------------------RS-------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAI TIQGLLGNAE---GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV-- --HEKDPND-VE-ENQSSNTQTQEKS---------------------------------- ------------------------------------------------------------ --- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] -----MESFA---------VAAAQLGP------------HFAPLS--------------- -------------NGSVVDKVTP----------------DMAH--------LISPYWNQF PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG -RPMTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RD WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVKSL---------------- ------------------RS-------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVI NCMGLFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF-- --GKVDDGK-SS-DAQSQATASEAESKA-------------------------------- ------------------------------------------------------------ --- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] -----MESFA---------VAAAQLGP------------HFAPLS--------------- -------------NGSVVDKVTP----------------DMAH--------LISPYWNQF PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG -RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RD WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVKSL---------------- ------------------RS-------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVI NCMGLFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF-- --GKVDDGK-SS-DAQSQATASEAESKA-------------------------------- ------------------------------------------------------------ --- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' -----MDSFA---------AVATQLGP------------QFAAPS--------------- -------------NGSVVDKVTP----------------DMAH--------LISPYWDQF PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG -RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RD WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVKSL---------------- ------------------RS-------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVI NCMGLFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF-- --GKVDDGK-SS-EAQSQATTSEAESKA-------------------------------- ------------------------------------------------------------ --- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHLPETP---------FDLAHSGP------------RFQAQSSG------------- -------------NGSVLDNVLP----------------DMAH--------LVNPYWSRF APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS Q-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING -TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RM WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVKSL---------------- ------------------RS-------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVI CYFGLFKI-D---GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF-- --GNTDEPKPDA-PASDTETTSEADSKA-------------------------------- ------------------------------------------------------------ --- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLLPEPP---------LAMALLGP------------RFEAQTGG------------- -------------NRSVLDNVLP----------------DMAP--------LVNPHWSRF APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS Q-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING -TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQ WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVKSL---------------- ------------------RS-------SEDCDKSAENKLAKVALTTISLWFMAWTPYLII CYFGLFKI-D---GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC-- --GTTDEPKPDA-PPSDTETTSEAESKD-------------------------------- ------------------------------------------------------------ --- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------MIAVSGP------------SYEAFSYG---GQARF----- ------------NNQTVVDKVPP----------------DMLH--------LIDANWYQY PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFC M-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG -KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RG LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVASL---------------- ------------------RS-------SENQNTSAECKLAKVALMTISLWFMAWTPYLVI NFSGIFNL-V---KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-- ---AAEPSS-DA-VSTTSGTTTVTDNEK--SNA--------------------------- ------------------------------------------------------------ --- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] --------------------MAN----------------QLSYSSLG------------- --------WPYQPNASVVDTMPK----------------EMLY--------MIHEHWYAF PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAF M-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA -APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KD WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK-------------- ------------------------------------------------------------ --------------------------------------MNVASL---------------- ------------------RA------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLII SWAGVFSSGT---RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC-- --GSGESGS-DV-KSEASATTTMEEKPK-------------------------------- ----------------------------------------------IPEA---------- --- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] --------------------MVESTTL-----------------------VNQTW----- ----------W-YNPTV----------------------------------DIHPHWAKF DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAI NGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAA SKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TD PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR-------------- ------------------------------------------------------------ --------------------------------------LNAKEL---------------- ------------------R--------KAQAGASAEMKLAKISMVIITQFMLSWSPYAII ALLAQFGPAE---WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCC QFDEKECED-AN-DAEEEVVASERGGES--RDAAQMKEMMAMMQKMQAQQAAYQPPPPPQ GY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQA YQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] --------------------MGRDLRD------------------------NETW----- ----------W-YNPSI----------------------------------VVHPHWREF DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLV NGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAA SKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RD STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR-------------- ------------------------------------------------------------ --------------------------------------LNAKEL---------------- ------------------R--------KAQAGANAEMRLAKISIVIVSQFLLSWSPYAVV ALLAQFGPLE---WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCC QFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAY----PPQ GYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQA YQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] --------------------M--------------------------------------- -----PHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN------ --YGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF------ ----GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------- ----------------------PGF-----------------PRVQPESVISL------- --------NGVVKLQKEV--------------------EECANLSRLLKH---------- -----------------ER--------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLL STARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYRNI --NRKLSAA-GMHEALKLAERPERSEFVL-QNSDHCGK---------------------- ----------------------------KGHDT--------------------------- --- > 31=p A47425 serotonin receptor 5HT-7 - rat --------------------M--------------------------------------- -----PHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN------ --YGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF------ ----GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------- ----------------------PGF-----------------PRVQPESVISL------- --------NGVVKLQKEV--------------------EECANLSRLLKH---------- -----------------ER--------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLL STARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNI --NRKLSAA-GMHEALKLAERPERSEFVL-QNSDHCGK---------------------- ----------------------------KGHDT--------------------------- --- > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] -----MDVLSPGQ-------GNNTTSPPA----------PFETGG--------------- -------------NTTGISDVTV------------------------------------- -----SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH------ ----GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------- -------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNW RLGVESKAGGALCANGAVRQG-----------------DDGAALEVIEVHRVGNSKEHLP LPSEAGPTPCA--PASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIV ALVLPFCESS-C-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRQ ------------------------------------------------------------ ------------------------------------------------------------ --- > 33=p A35181 serotonin receptor class 1A - rat -----MDVFSFGQ-------GNNTTASQE----------PFGTGG--------------- -------------NVTSISDVTF------------------------------------- -----SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH------ ----GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------- -------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDW RRCAENRAVGTPCTNGAVRQG-----------------DDEATLEVIEVHRVGNSKEHLP LPSESGSNSYA--PACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIV ALVLPFCESS-C-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRR ------------------------------------------------------------ ------------------------------------------------------------ --- > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] -----MANFTFGDLALDVARMGGLASTPS----------GLRSTGLTTPGLSPTG----- -------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSL TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK------ ----GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEE TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN-- -------ANGVNSNSSSS-ER-LKQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISS NDTPYS------------RT-------REKLELKRERKAARTLAIITGAFLICWLPFFII ALIGPFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR-- ------------------------------------------------------------ -------------------------------------------------RGHR------- --- > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail -----MANFTFGDLALDVARMGGLASTPS----------GLRSTGLTTPGLSPTG----- -------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSL TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK------ ----GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEE TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN-- -------ANGVNSNSSSS-ER-LKQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISS NDTPYS------------RT-------REKLELKRERKAARTLAIITGAFLICWLPFFII ALIGPFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR-- ------------------------------------------------------------ -------------------------------------------------RGHR------- --- > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi --------------------MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNW----- -------------NVTVVPNTTW---------WQASAPFDTPAAL--------------- -----VRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACL V-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDY IHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV------ ----GYQIFATASSFYVPVLIILILYWRIY-------QTARKRIRR-------------- -------------------------------RRGATARGGVGPPPVPAGG-ALVAGGG-- -------SGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSP---EKQSCANGLEADP PTTGYGAVAAAYYPSLVRRKP------KEAADSKRERKAAKTLAIITGAFVACWLPFFVL AILVPTCDCE----VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRVRR R----------------------------------------------------------- ---------------------------------------------RAPQ----------- --- mafft-7.123-without-extensions/test/sample.ginsi0000640000076500007650000007121012226665654021037 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MN-------------------------GTE-------GDNFYVP---------------- -------------------------FSNKTG------LARSPYEYPQY-YLAEPW----- -----------------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLN LAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIE RYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCS CGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEKEVTRMVVLMVIGFLVCWVPY ASVAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTL--- --CCGKNPLGDDE-SG-ASTSKTEVSSVST------------------------------ ------------------------------------------------------------ ----SPVSPA > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MN-------------------------GTE-------GPNFYVP---------------- -------------------------FSNITG------VVRSPFEQPQY-YLAEPW----- -----------------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLN LAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIE RYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCS CGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------TTQKAEKEVTRMVIIMVIFFLICWLPY ASVAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL--- --CCGKNPLGDDE-AS-ATASKTET----------------------------------- ------------------------------------------------------------ ----SQVAPA > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MN-------------------------GTE-------GINFYVP---------------- -------------------------MSNKTG------VVRSPFEYPQY-YLAEPW----- -----------------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVN LAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIE RYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCS CGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------TTQKAEKEVTRMVILMVLGFMLAWTPY AVVAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI--- --CCGKNPFGDEDVSSTVSQSKTEVSSVSS------------------------------ ------------------------------------------------------------ ----SQVSPA > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MN-------------------------GTE-------GKNFYVP---------------- -------------------------MSNRTG------LVRSPFEYPQY-YLAEPW----- -----------------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVN LAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIE RYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCS CGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEREVTKMVILMVFGFLIAWTPY ATVAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI--- --FCGKNPLGDDE-SSTVSTSKTEVSS--------------------------------- ------------------------------------------------------------ ------VSPA > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MN-------------------------GTE-------GNNFYVP---------------- -------------------------LSNRTG------LVRSPFEYPQY-YLAEPW----- -----------------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVN LAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIE RYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCS CGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEREVTKMVILMVLGFLVAWTPY ATVAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL--- --FCGKNPLGDEE-SSTVSTSKTEVSS--------------------------------- ------------------------------------------------------------ ------VSPA > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MK-------------------------QVPE-----FHEDFYIP---------------- -------------------IPLD--INNLSAY--------SPFLVPQD-HLGNQG----- -----------------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVN LSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFE RWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQCS CGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEREVTKMVVVMVLGFLVCWAPY ASFSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV--- ---CGKN-IEEDE-AS-TSSQVTQVSSVAP------------------------------ ------------------------------------------------------------ --------EK > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MR-------------------------KMS-------EEEFYL----------------- -------------------------FKNISSV--------GPWDGPQY-HIAPVW----- -----------------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVN VSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFE RYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQCS CGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------TTQKAEREVSRMVVVMVGSFCVCYVPY AAFAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV--- ---CGKA-MTDES-DT-CSSQKTEVSTVSS------------------------------ ------------------------------------------------------------ ----TQVGPN > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWE RWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTS CGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEKEVTRMVVVMVLAFC------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ---------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MA------QQWSLQRLAGRHPQDSYEDSTQ-------SSIFTYT---------------- -------------------------NSNSTR---------GPFEGPNY-HIAPRW----- -----------------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVN LAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWE RWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTS CGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEKEVTRMVVVMVLAFCFCWGPY AFFACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF--- ----GKKVDDGSE-LS--SASKTEVSSV-------------------------------- ------------------------------------------------------------ ----SSVSPA > 10== Z68193 1 human Red Opsin <>[] MA------QQWSLQRLAGRHPQDSYEDSTQ-------SSIFTYT---------------- -------------------------NSNSTR---------GPFEGPNY-HIAPRW----- -----------------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVN LAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWE RWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLKTS CGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEKEVTRMVVVMIFAYCVCWGPY TFFACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF--- ----GKKVDDGSE-LS--SASKTEVSSV-------------------------------- ------------------------------------------------------------ ----SSVSPA > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MT------EAWNVAVFAARRSRDD-DDTTR-------GSVFTYT---------------- -------------------------NTNNTR---------GPFEGPNY-HIAPRW----- -----------------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVN LAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWE RWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLKTS CGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEREVSRMVVVMIVAFCICWGPY ASFVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF--- ----GKKVDDGSE-AS--TTSRTEVSSVSN------------------------------ ------------------------------------------------------------ ----SSVAPA > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-------AWE-AAFAARRRHEE-EDTTR-------DSVFTYT---------------- -------------------------NSNNTR---------GPFEGPNY-HIAPRW----- -----------------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVN LAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWE RWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLKTS CGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------STQKAEKEVSRMVVVMIVAYCFCWGPY TFFACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF--- ----GKKVDDGSE-VS---TSRTEVSSVSN------------------------------ ------------------------------------------------------------ ----SSVSPA > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS-------------------------SNS-------SQA-------------------- -------------------------PPNGTP---------GPFDGPQWPYQAPQS----- -----------------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVN LAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALE RYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLRTS CGPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD----- ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------TTQRAEREVTRMVIVMVMAFLLCWLPY STFALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML--- --CCGYQPQRTGKASPGTPGPHADVTAAGLRNKV-------------------------- ------------------------------------------------------------ ----MPAHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNVSS------------SLFGNVSTAL-------RPE-------------------- --------ARLSAE-------------TRLL------GWNVPPEELR--HIPEHWLTYPE -------------PPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVIN LAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYD RFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTS CTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNV ES---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS-------NVDKNKETAEIRIAKAAITICFLFFCSWTPY GVMSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWL --ALNEKAPESSA----VASTST---TQEP------------------------------ ------------------------------------------------------------ ----QQTTAA > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNVSS------------VL-GNVSSVL-------RPD-------------------- --------ARLSAE-------------SRLL------GWNVPPDELR--HIPEHWLIYPE -------------PPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVIN LAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYD RYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTS CTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNV DS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS-------NVDKSKEAAEIRIAKAAITICFLFFASWTPY GVMSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWL --AISEKAPESRA----AISTST---TQEQ------------------------------ ------------------------------------------------------------ ----QQTTAA > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME------------------PLCNASEPPL-------RPE-------------------- --------AR-SSGNGD----------LQFL------GWNVPPDQIQ--YIPEHWLTQLE -------------PPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLN LAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYD RYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTS CSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNV ES---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPY GVMSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWL --GVNEKSGEISS----AQSTTT---QEQ------------------------------- ------------------------------------------------------------ ----QQTTAA > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD------------------ALCNASEPPL-------RPE-------------------- --------ARMSSGSDE----------LQFL------GWNVPPDQIQ--YIPEHWLTQLE -------------PPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLN LAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYD RYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTS CSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNV ES---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPY GVMSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWL --GVNEKSGEASS----AQSTTT---QEQT------------------------------ ------------------------------------------------------------ ----QQTSAA > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT-------------------------NAT-------GPQMAYYG--------------- -------AASMDFGYPE----------GVSI------VDFVRPEIKP--YVHQHWYNYPP -------------VNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVN LALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFD RYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGILDS CSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNV ST---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------NEADAQRAEIRIAKTALVNVSLWFICWTPY ALISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWF --CVHETETKSND----DSQSNS---TVAQ------------------------------ ------------------------------------------------------------ -------DKA > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA-------------------------NVT-------GPQMAFYG--------------- -------SGAATFGYPE----------GMTV------ADFVPDRVKH--MVLDHWYNYPP -------------VNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVN LALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFD RYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGILDS CSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNV TN---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------NEAETQRAEIRIAKTALVNVSLWFICWTPY AAITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWF --CVHEKDPNDVE----ENQSSN---TQTQ------------------------------ ------------------------------------------------------------ -------EKS > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] MESFAV-------------------AAAQL-------GPHF------------------- --------APLS---------------NGSV------VDKVTPDMAH--LISPYWNQFPA -------------MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVIN LAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLD RYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNLTS CGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNV KS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPY LVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC --VFGKVDDGKSS----DAQSQA-TASEAE------------------------------ ------------------------------------------------------------ -------SKA > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] MESFAV-------------------AAAQL-------GPHF------------------- --------APLS---------------NGSV------VDKVTPDMAH--LISPYWNQFPA -------------MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVIN LAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLD RYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLTS CGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNV KS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPY LVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC --VFGKVDDGKSS----DAQSQA-TASEAE------------------------------ ------------------------------------------------------------ -------SKA > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MDSFAA-------------------VATQL-------GPQF------------------- --------AAPS---------------NGSV------VDKVTPDMAH--LISPYWDQFPA -------------MDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVIN LAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLD RYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLTS CGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNV KS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------SEDADKSAEGKLAKVALVTISLWFMAWTPY LVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC --VFGKVDDGKSS----EAQSQA-TTSEAE------------------------------ ------------------------------------------------------------ -------SKA > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHLPETPF--------------DLAHS-------GPRF------------------- --------QAQSSG-------------NGSV------LDNVLPDMAH--LVNPYWSRFAP -------------MDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLN LAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFD RYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNLTA CSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNV KS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------SEDCDKSAEGKLAKVALTTISLWFMAWTPY LVICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMC --VFGNTDEPKPD----APASDTETTSEAD------------------------------ ------------------------------------------------------------ -------SKA > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLLPEPPL--------------AMALL-------GPRF------------------- --------EAQTGG-------------NRSV------LDNVLPDMAP--LVNPHWSRFAP -------------MDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLN LAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFD RYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNLTA CSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNV KS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------SEDCDKSAENKLAKVALTTISLWFMAWTPY LIICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMC --VCGTTDEPKPD----APPSDTETTSEAE------------------------------ ------------------------------------------------------------ -------SKD > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI-------------------------AVS-------GPSY------------------- --------EAFSYGGQAR-------FNNQTV------VDKVPPDMLH--LIDANWYQYPP -------------LNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVIN LAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFD RYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNMTA CGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNV AS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRS--------SENQNTSAECKLAKVALMTISLWFMAWTPY LVINFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSL --ACAA--EPSSD----AVSTTSGTTTVTD------------------------------ ------------------------------------------------------------ ----NEKSNA > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] MAN----------------------------------QLSY------------------- --------SSLGWPYQP----------NASV------VDTMPKEMLY--MIHEHWYAFPP -------------MNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVN LAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLD RYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNLTS CTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNV AS---------------------------------------------------------- ------------------------------------------------------------ -------------------LRA-------NADQQKQSAECRLAKVAMMTVGLWFMAWTPY LIISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSL --ACGSGESGSDV----KSEASA-TTTMEEK----------------------------- ------------------------------------------------------------ ----PKIPEA > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MV-------------------------ESTT-----L----------------------- --------VNQTWWY------------NPTV------------------DIHPHWAKFDP -------------IPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIIN LAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISID RYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGILTS CSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNA KE---------------------------------------------------------- ------------------------------------------------------------ -------------------LRK---------AQAGASAEMKLAKISMVIITQFMLSWSPY AIIALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWL LTCCQFDEKECED----ANDAEEEVVASER--GGESRDAAQMKEMMAMMQKMQAQQAAYQ P---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPP QGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MG-------------------------RDLR----------------------------- --------DNETWWY------------NPSI------------------VVHPHWREFDQ -------------VPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIIN LAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISID RYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVLCN CSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNA KE---------------------------------------------------------- ------------------------------------------------------------ -------------------LRK---------AQAGANAEMRLAKISIVIVSQFLLSWSPY AVVALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWV LTCCQFDDKETED----DKDAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAYP PQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AAPP QGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- -------PHLLS---------------------GFLEVTASP---------APTW---DA PPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVS LALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISID RYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCL ISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG----- ---------FPRVQPESVISLNG------------------------------------- -------------------------VVKLQKE-------------------VEECAN--- ---------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWLPF FLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL--- --QCQYRNINRKL----SAAGMHEALKLAER--------------------PERSEFVLQ NSDH-------------------------------------------------------- --CGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- -------PHLLS---------------------GFLEVTASP---------APTW---DA PPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVS LALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISID RYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCL ISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG----- ---------FPRVQPESVISLNG------------------------------------- -------------------------VVKLQKE-------------------VEECAN--- ---------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWLPF FLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL--- --QCQYRNINRKL----SAAGMHEALKLAER--------------------PERSEFVLQ NSDH-------------------------------------------------------- --CGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] MDVLSPG----------------------------------------------------- -------------------------QGNNTT------SPPAPF------ETGGNTTGISD -------------VTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGS LAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALD RYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACT ISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGA DTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALCA-------------- -NGAVRQGD---------------------------------DGAALEVIEVHRVGNSKE HLPLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPF FIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII--- --KCKF------------------------------------------------------ ------------------------------------------------------------ --CR-----Q > 33=p A35181 serotonin receptor class 1A - rat MDVFSFG----------------------------------------------------- -------------------------QGNNTT------ASQEPF------GTGGNVTSISD -------------VTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGS LAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALD RYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACT ISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGA GTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPCT-------------- -NGAVRQGD---------------------------------DEATLEVIEVHRVGNSKE HLPLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPF FIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII--- --KCKF------------------------------------------------------ ------------------------------------------------------------ --CR-----R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALDVARMGGLASTPSGLRSTGL-----TTPGLSPTGLVTSDFNDSYGLTGQ FINGSHSSRSRD-----------NASANDTS------ATNMT---------DDRYWSLTV -------------YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILS LAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMD RYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCI ISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARL KTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLP ENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK -------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPF FIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL--- --FGKY------------------------------------------------------ ------------------------------------------------------------ -----RRGHR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALDVARMGGLASTPSGLRSTGL-----TTPGLSPTGLVTSDFNDSYGLTGQ FINGSHSSRSRD-----------NASANDTS------ATNMT---------DDRYWSLTV -------------YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILS LAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMD RYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCI ISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARL KTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLP ENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK -------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPF FIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL--- --FGKY------------------------------------------------------ ------------------------------------------------------------ -----RRGHR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi ME---------------------------------------------------------- ---GAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVP---------NTTWWQASA PFDTP--------AALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILS LAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALD RYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCV VSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGV GP-------PP-----------------------------------------------VP AGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANGLE ADPPTTGYGAVAAAYYPSLVRR------KPKEAADSKRERKAAKTLAIITGAFVACWLPF FVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL--- --CGRR------------------------------------------------------ ------------------------------------------------------------ --VRRRRAPQ mafft-7.123-without-extensions/test/sample0000640000076500007650000004035012226665654017730 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN > 8=opsin, greensensitive human (fragment) S07060 DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS VSPA > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS VSPA > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS SVAPA > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS PA > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE KAPESSAVASTSTTQEPQQTTAA > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK APESRAAISTSTTQEQQQTTAA > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE ISSAQSTTTQEQQQTTAA > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG EASSAQSTTTQEQTQQTSAA > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET KSNDDSQSNSTVAQDKA > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP NDVEENQSSNTQTQEKS > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA QSQATASEAESKA > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE AQSQATTSEAESKA > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD EPKPDAPASDTETTSEADSKA > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD EPKPDAPPSDTETTSEAESKD > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV STTSGTTTVTDNEKSNA > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE ASATTTMEEKPKIPEA > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY PPPPQGAPPQGAPPAAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC RQ > 33=p A35181 serotonin receptor class 1A - rat MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC RR > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL NPIIYTIFSPEFRSAFQKILFGKYRRGHR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL NPIIYTIFSPEFRSAFQKILFGKYRRGHR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT VFSPEFRHAFQRLLCGRRVRRRRAPQ mafft-7.123-without-extensions/test/samplerna.qinsi0000640000076500007650000000357412226665654021562 0ustar katohkatoh>AJ006331.1_1230 ---------------------------------------------------ccauggcgu uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagu------------------- ------------------------------------------------------------ >Z84287.1_1250 -------------------------------uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu------------------- ------------------------------------------------------------ >AF064490.1_2296 ------------------------------------------------------------ -----ugagugucgaacagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg >Z84230.1_1250 -------------------------------uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu------------------- ------------------------------------------------------------ >AB049100.1_1360 auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg mafft-7.123-without-extensions/test/sample.gins10000640000076500007650000007231012226665654020751 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] --------------------MN-------------------------GTE-------GDN FYVP-----------------------------------------FSNKTG--------- --LARSPYEYPQY-YLAEPW----------------------KYSALAAYMFFLILVGFP VNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTM CSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC- AAPPLVG-WS-----RYIPEGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFF CYGRLLCTVKEAAAAQQESA---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEKEVTRMVVLMVIG FLVCWVPYASVAFYIFT-HQGS-D-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM ITTL-----CCGKNPLGDDE-SG-ASTSKTEVSSVST----------------------- ------------------------------------------------------------ ------------SPV-------SP-A > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] --------------------MN-------------------------GTE-------GPN FYVP-----------------------------------------FSNITG--------- --VVRSPFEQPQY-YLAEPW----------------------QFSMLAAYMFLLIVLGFP INFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTG CNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC- AAPPLVG-WS-----RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFF CYGQLVFTVKEAAAQQQESA---------------------------------------- ------------------------------------------------------------ -----------------------------------------TTQKAEKEVTRMVIIMVIF FLICWLPYASVAMYIFT-HQGS-N-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM LTSL-----CCGKNPLGDDE-AS-ATASKTE-----T----------------------- ------------------------------------------------------------ ------------SQV-------AP-A > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 --------------------MN-------------------------GTE-------GIN FYVP-----------------------------------------MSNKTG--------- --VVRSPFEYPQY-YLAEPW----------------------KYRLVCCYIFFLISTGLP INLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVG CAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC- AAPPLFG-WS-----RYMPEGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFF SYGRLICKVREAAAQQQESA---------------------------------------- ------------------------------------------------------------ -----------------------------------------TTQKAEKEVTRMVILMVLG FMLAWTPYAVVAFWIFT-NKGA-D-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM ITTI-----CCGKNPFGDEDVSSTVSQSKTEVSSVSS----------------------- ------------------------------------------------------------ ------------SQV-------SP-A > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish --------------------MN-------------------------GTE-------GKN FYVP-----------------------------------------MSNRTG--------- --LVRSPFEYPQY-YLAEPW----------------------QFKILALYLFFLMSMGLP INGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTG CAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC- AAPPLFG-WS-----RYIPEGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFF TYGRLVCTVKAAAAQQQDSA---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEREVTKMVILMVFG FLIAWTPYATVAAWIFF-NKGA-D-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM LTTI-----FCGKNPLGDDE-SSTVSTSKTEVSS-------------------------- ------------------------------------------------------------ --------------V-------SP-A > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish --------------------MN-------------------------GTE-------GNN FYVP-----------------------------------------LSNRTG--------- --LVRSPFEYPQY-YLAEPW----------------------QFKLLAVYMFFLICLGLP INGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTG CAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC- AAPPLVG-WS-----RYIPEGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFF TYGRLVCTVKAAAAQQQDSA---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEREVTKMVILMVLG FLVAWTPYATVAAWIFF-NKGA-A-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCM LTTL-----FCGKNPLGDEE-SSTVSTSKTEVSS-------------------------- ------------------------------------------------------------ --------------V-------SP-A > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- --------------------MK-------------------------QVPE-----FHED FYIP-----------------------------------------IPLDINN-------- -LSAYSPFLVPQD-HLGNQG----------------------IFMAMSVFMFFIFIGGAS INILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATA CKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA- SLPPLFG-WS-----RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVF CYGQLLITLKLAAKAQADSA---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEREVTKMVVVMVLG FLVCWAPYASFSLWIVS-HRGE-E-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCM M-KM-----VCGKN-IEEDE-AS-TSSQVTQVSS-------------------------- ------------------------------------------------------------ --------------V-------APEK > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] --------------------MR-------------------------KMS-------EEE FYL------------------------------------------FKNISS--------- --V--GPWDGPQY-HIAPVW----------------------AFYLQAAFMGTVFLIGFP LNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHV CALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV- SIPPFFG-WS-----RFIPEGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICF SYTQLLRALKAVAAQQQESA---------------------------------------- ------------------------------------------------------------ -----------------------------------------TTQKAEREVSRMVVVMVGS FCVCYVPYAAFAMYMVN-NRNH-G-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACI M-KM-----VCGKA-MTDES-DT-CSSQKTEVSTVSS----------------------- ------------------------------------------------------------ ------------TQV-------GP-N > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------DLAETVIA-STISIVNQVS-GYFV--LGHPM CVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW- TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVL CYLQVWLAIRAVAKQQKESE---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEKEVTRMVVVMVLA FC---------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWS-LQRLAGRHPQDSYED-------------------------STQ-------SSI FTYT-----------------------------------------NSNSTR--------- -----GPFEGPNY-HIAPRW----------------------VYHLTSVWMIFVVIASVF TNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPM CVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW- TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVL CYLQVWLAIRAVAKQQKESE---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEKEVTRMVVVMVLA FCFCWGPYAFFACFAAA-NPGY-P-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCI LQLF-------GKKVDDGSE-LS--SASKTEVSSV------------------------- ------------------------------------------------------------ ------------SSV-------SP-A > 10== Z68193 1 human Red Opsin <>[] MAQQWS-LQRLAGRHPQDSYED-------------------------STQ-------SSI FTYT-----------------------------------------NSNSTR--------- -----GPFEGPNY-HIAPRW----------------------VYHLTSVWMIFVVTASVF TNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPM CVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW- TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIML CYLQVWLAIRAVAKQQKESE---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEKEVTRMVVVMIFA YCVCWGPYTFFACFAAA-NPGY-A-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCI LQLF-------GKKVDDGSE-LS--SASKTEVSSV------------------------- ------------------------------------------------------------ ------------SSV-------SP-A > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD--DD-------------------------TTR-------GSV FTYT-----------------------------------------NTNNTR--------- -----GPFEGPNY-HIAPRW----------------------VYNLVSFFMIIVVIASCF TNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPL CVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW- SAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIV CYLQVWMAIRAVAAQQKESE---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEREVSRMVVVMIVA FCICWGPYASFVSFAAA-NPGY-A-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCI MQLF-------GKKVDDGSE-AS--TTSRTEVSSVSN----------------------- ------------------------------------------------------------ ------------SSV-------AP-A > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWE-AAFAARRRHEE--ED-------------------------TTR-------DSV FTYT-----------------------------------------NSNNTR--------- -----GPFEGPNY-HIAPRW----------------------VYNLTSVWMIFVVAASVF TNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHPM CVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW- TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIIL CYLQVWLAIRAVAAQQKESE---------------------------------------- ------------------------------------------------------------ -----------------------------------------STQKAEKEVSRMVVVMIVA YCFCWGPYTFFACFAAA-NPGY-A-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCI LQLF-------GKKVDDGSE-VS--T-SRTEVSSVSN----------------------- ------------------------------------------------------------ ------------SSV-------SP-A > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] -------------------------------------------------M-------SSN SSQA-----------------------------------------PPNGTP--------- -----GPFDGPQWPYQAPQS----------------------TYVGVAVLMGTVVACASV VNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRM CELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW- SAPPLLG-WS-----SYVPEGLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILF SYTNLLLTLRAAAAQQKEAD---------------------------------------- ------------------------------------------------------------ -----------------------------------------TTQRAEREVTRMVIVMVMA FLLCWLPYSTFALVVAT-HKGI-I-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCL LEML-----CCGYQPQRTGK-AS--PGTPGPHADVTA----------------------- ------------------------------------------------------------ ------------AGLRNKVMPAHP-V > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] --------------------MESGNVS------------SSLFGNVSTAL-------RPE -----------------------ARLSA---E---------------TRLL--------- --GWNVPPEELR--HIPEHWLTYPE-------------PPESMNYLLGTLYIFFTLMSML GNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLG CQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW- VVACYTETWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITY YYSQIVGHVFSHEKALRDQAKKM------------------NVESLRS------------ ------------------------------------------------------------ -------------------------------------NVDKNKETAEIRIAKAAITICFL FFCSWTPYGVMSLIGAF-GDKT-L-LTPGATMIPACACKMVACIDPFVYAISHPRYRMEL QKRCPWL--ALNEKAPESSA----VASTST---TQEP----------------------- ------------------------------------------------------------ ------------QQT-------TA-A > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 --------------------MEYHNVS------------SVL-GNVSSVL-------RPD -----------------------ARLSA---E---------------SRLL--------- --GWNVPPDELR--HIPEHWLIYPE-------------PPESMNYLLGTLYIFFTVISMI GNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLG CQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW- VVACYTESWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITY YYSQIVGHVFSHEKALRDQAKKM------------------NVDSLRS------------ ------------------------------------------------------------ -------------------------------------NVDKSKEAAEIRIAKAAITICFL FFASWTPYGVMSLIGAF-GDKT-L-LTPGATMIPACTCKMVACIDPFVYAISHPRYRMEL QKRCPWL--AISEKAPESRA----AISTST---TQEQ----------------------- ------------------------------------------------------------ ------------QQT-------TA-A > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] --------------------ME------------------PLCNASEPPL-------RPE -----------------------AR-SSGNGD---------------LQFL--------- --GWNVPPDQIQ--YIPEHWLTQLE-------------PPASMHYMLGVFYIFLFCASTV GNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTW CQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW- VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILY YYSQIVGHVFSHEKALREQAKKM------------------NVESLRS------------ ------------------------------------------------------------ -------------------------------------NVDKSKETAEIRIAKAAITICFL FFVSWTPYGVMSLIGAF-GDKS-L-LTQGATMIPACTCKLVACIDPFVYAISHPRYRLEL QKRCPWL--GVNEKSGEISS----AQSTTT---QEQ------------------------ ------------------------------------------------------------ ------------QQT-------TA-A > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 --------------------MD------------------ALCNASEPPL-------RPE -----------------------ARMSSGSDE---------------LQFL--------- --GWNVPPDQIQ--YIPEHWLTQLE-------------PPASMHYMLGVFYIFLFFASTL GNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTW CQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW- VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILY YYSQIVGHVFNHEKALREQAKKM------------------NVESLRS------------ ------------------------------------------------------------ -------------------------------------NVDKSKETAEIRIAKAAITICFL FFVSWTPYGVMSLIGAF-GDKS-L-LTPGATMIPACTCKLVACIEPFVYAISHPRYRMEL QKRCPWL--GVNEKSGEASS----AQSTTT---QEQT----------------------- ------------------------------------------------------------ ------------QQT-------SA-A > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------MT-------------------------NAT-------GPQ MAYYG-----------------AASMDFGYPE---------------GVSI--------- --VDFVRPEIKP--YVHQHWYNYPP-------------VNPMWHYLLGVIYLFLGTVSIF GNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQY CEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC- ALPPFFG-WG-----NYILEGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVF SYVFIVKAIFAHEAAMRAQAKKM------------------NVSTLRS------------ ------------------------------------------------------------ --------------------------------------NEADAQRAEIRIAKTALVNVSL WFICWTPYALISLKGVM-GDTS-G-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAI TQHLPWF--CVHETETKSND----DSQSNS---TVAQ----------------------- ------------------------------------------------------------ ----------------------DK-A > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------MA-------------------------NVT-------GPQ MAFYG-----------------SGAATFGYPE---------------GMTV--------- --ADFVPDRVKH--MVLDHWYNYPP-------------VNPMWHYLLGVVYLFLGVISIA GNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTY CEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW- SLPPFFG-WG-----SYTLEGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVF SYVFIVKAIFAHEAAMRAQAKKM------------------NVTNLRS------------ ------------------------------------------------------------ --------------------------------------NEAETQRAEIRIAKTALVNVSL WFICWTPYAAITIQGLL-GNAE-G-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAI TQHLPWF--CVHEKDPNDVE----ENQSSN---TQTQ----------------------- ------------------------------------------------------------ ----------------------EK-S > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] --------------------ME-------------------SFAVAAAQL-------GPH FA----------------------PLS--------------------NGSV--------- --VDKVTPDMAH--LISPYWNQFPA-------------MDPIWAKILTAYMIMIGMISWC GNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMM CDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKM----------- ----------------YVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICY SYWFIIAAVSAHEKAMREQAKKM------------------NVKSLRS------------ ------------------------------------------------------------ --------------------------------------SEDAEKSAEGKLAKVALVTITL WFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCPCC--VFGKVDDGKSS----DAQSQA-TASEAE----------------------- ------------------------------------------------------------ ----------------------SK-A > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] --------------------ME-------------------SFAVAAAQL-------GPH FA----------------------PLS--------------------NGSV--------- --VDKVTPDMAH--LISPYWNQFPA-------------MDPIWAKILTAYMIMIGMISWC GNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMM CDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW- CLAPAFG-WS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICY SYWFIIAAVSAHEKAMREQAKKM------------------NVKSLRS------------ ------------------------------------------------------------ --------------------------------------SEDAEKSAEGKLAKVALVTITL WFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCPCC--VFGKVDDGKSS----DAQSQA-TASEAE----------------------- ------------------------------------------------------------ ----------------------SK-A > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' --------------------MD-------------------SFAAVATQL-------GPQ FA----------------------APS--------------------NGSV--------- --VDKVTPDMAH--LISPYWDQFPA-------------MDPIWAKILTAYMIIIGMISWC GNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMM CDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWC CLAPVFG-WS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICY SYWFIIAAVSAHEKAMREQAKKM------------------NVKSLRS------------ ------------------------------------------------------------ --------------------------------------SEDADKSAEGKLAKVALVTISL WFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL KEKCPCC--VFGKVDDGKSS----EAQSQA-TTSEAE----------------------- ------------------------------------------------------------ ----------------------SK-A > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] --------------------MERSHL--------------PETPFDLAHS-------GPR FQ----------------------AQSSG------------------NGSV--------- --LDNVLPDMAH--LVNPYWSRFAP-------------MDPMMSKILGLFTLAIMIISCC GNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLW CDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW- TVMPLIG-WS-----AYVPEGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICY SYWFIIAAVAAHEKAMREQAKKM------------------NVKSLRS------------ ------------------------------------------------------------ --------------------------------------SEDCDKSAEGKLAKVALTTISL WFMAWTPYLVICYFGLF-KI-D-G-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVL KEKCPMC--VFGNTDEPKPD----APASDTETTSEAD----------------------- ------------------------------------------------------------ ----------------------SK-A > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 --------------------MERSLL--------------PEPPLAMALL-------GPR FE----------------------AQTGG------------------NRSV--------- --LDNVLPDMAP--LVNPHWSRFAP-------------MDPTMSKILGLFTLVILIISCC GNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLW CDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW- TIMPLIG-WS-----SYVPEGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICY SYWFIIATVAAHEKAMRDQAKKM------------------NVKSLRS------------ ------------------------------------------------------------ --------------------------------------SEDCDKSAENKLAKVALTTISL WFMAWTPYLIICYFGLF-KI-D-G-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVL KEKCPMC--VCGTTDEPKPD----APPSDTETTSEAE----------------------- ------------------------------------------------------------ ----------------------SK-D > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------MI-------------------------AVS-------GPS YE----------------------AFSYGGQA------------RFNNQTV--------- --VDKVPPDMLH--LIDANWYQYPP-------------LNPMWHGILGFVIGMLGFVSAM GNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLF CQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW- TIAPMFG-WN-----RYVPEGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIY SYWFIIQAVAAHEKNMREQAKKM------------------NVASLRS------------ ------------------------------------------------------------ --------------------------------------SENQNTSAECKLAKVALMTISL WFMAWTPYLVINFSGIF-NL-V-K-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAAL FAKFPSL--AC-AAEPSSDA----VSTTSG-TTTVTDN---------------------- ------------------------------------------------------------ ------------EK--------SN-A > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] --------------------M---------------------------AN-------QLS YS----------------------SLGWPYQP---------------NASV--------- --VDTMPKEMLY--MIHEHWYAFPP-------------MNPLWYSILGVAMIILGIICVL GNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFM CEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW- TILPFFG-WS-----RYVPEGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIY CYFFIVHAVAEHEKQLREQAKKM------------------NVASLRA------------ ------------------------------------------------------------ -------------------------------------NADQQKQSAECRLAKVAMMTVGL WFMAWTPYLIISWAGVF-SSGT-R-LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAAL YQRFPSL--ACGSGESGSDV----KSEASA-TTTMEEK---------------------- ------------------------------------------------------------ ------------PKI-------PE-A > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] --------------------MV-------------------------EST-------TL- -----------------------VNQTWWY-----------------NPTV--------- --------------DIHPHWAKFDP-------------IPDAVYYSVGIFIGVVGIIGIL GNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVA CQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW- SVGPVFN-WG-----AYVPEGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAF CYFNIVMSVSNHEKEMAAMAKRL------------------NAKELRK------------ ------------------------------------------------------------ --------------------------------------AQ-AGASAEMKLAKISMVIITQ FMLSWSPYAIIALLAQF-GPAE-W-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAI QTTFPWLLTCCQFDEKECED----ANDAEE-EVVASER--GGESRDAAQMKEMMAMMQKM QAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQV EAPQGAPPQGVDNQA-------YQ-A > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] --------------------MG-------------------------RDL-------R-- -----------------------DNETWWY-----------------NPSI--------- --------------VVHPHWREFDQ-------------VPDAVYYSLGIFIGICGIIGCG GNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAA CKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW- AIGPIFG-WG-----AYTLEGVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIFF CYFNIVMSVSNHEKEMAAMAKRL------------------NAKELRK------------ ------------------------------------------------------------ --------------------------------------AQ-AGANAEMRLAKISIVIVSQ FLLSWSPYAVVALLAQF-GPLE-W-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAI SQTFPWVLTCCQFDDKETED----DKDAET-EIPAGESSDAAPSADAAQMKEMMAMMQKM QQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP-- ----AAPPQGVDNQA-------YQ-A > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ ----------------------------------------------------MPHLLSGF LEVTAS---------PAPTW---DAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIA GNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFF CNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI- TLPPLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLF MYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG------------ ----------------------------------------VVKLQKE------------- -----VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGIIVGA FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTS RSLL-----QCQYRNINRKL----SAAGMHEALKLAER---------------------P ERSEFVLQNS-------------------------------------------------- --------DHCGKKG-------HD-T > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ ----------------------------------------------------MPHLLSGF LEVTAS---------PAPTW---DAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIA GNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFF CNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI- TLPPLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLF MYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG------------ ----------------------------------------VVKLQKE------------- -----VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGIIVGA FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTY RSLL-----QCQYRNINRKL----SAAGMHEALKLAER---------------------P ERSEFVLQNS-------------------------------------------------- --------DHCGKKG-------HD-T > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] --------------------MDVLSPG--------------------------------- ---------------------------------------------QGNNT---------- -TSPPAPF------ETGGNTTGISD-------------VTVSYQVITSLLLGTLIFCAVL GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVT CDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI- SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLLMLV LYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNWRLG VESKAGGALCANGAVRQGDDGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS---- --------------FERKNERNA-------------EAKRKMALARERKTVKTLGIIMGT FILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF KKII-----KCKF----------------------------------------------- ------------------------------------------------------------ ----------CR-------------Q > 33=p A35181 serotonin receptor class 1A - rat --------------------MDVFSFG--------------------------------- ---------------------------------------------QGNNT---------- -TASQEPF------GTGGNVTSISD-------------VTFSYQVITSLLLGTLIFCAVL GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVT CDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI- SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLLMLV LYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRC AENRAVGTPCTNGAVRQGDDEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC---- --------------LERKNERNA-------------EAKRKMALARERKTVKTLGIIMGT FILCWLPFFIVALVLPF-CESSCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF KKII-----KCKF----------------------------------------------- ------------------------------------------------------------ ----------CR-------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] --------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG LSPTGLVTSDFNDSYGLTGQFINGSHSSRSRD-----------NASANDT---------- -SATNM---------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLCCII GNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEV CDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI- SIPPLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMI IYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKK RRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEA SIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITGA FLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF QKIL-----FGKY----------------------------------------------- ------------------------------------------------------------ ----------RR----------GH-R > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail --------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG LSPTGLVTSDFNDSYGLTGQFINGSHSSRSRD-----------NASANDT---------- -SATNM---------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLCCII GNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEV CDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI- SIPPLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMI IYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKK RRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEA SIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITGA FLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF QKIL-----FGKY----------------------------------------------- ------------------------------------------------------------ ----------RR----------GH-R > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi --------------------M--------------------------------------- ----------------------EGAEGQEELDWEALYLRLPLQNCSWNSTGWEPN----- WNVTVV---------PNTTWWQASAPFDT--------PAALVRAAAKAVVLGLLILATVV GNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPEL CDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV- CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFATASSFYVPVLIILI LYWRIYQTARKRIRR------RRGATARGGVGPPPVPAG--------------------- ------------GALVAGGGSGGIAAAVVAV-----IGRPLPTISETTTTGFTNVSSNNT SPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGA FVACWLPFFVLAILVPT-CD--CE-VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF QRLL-----CGRR----------------------------------------------- ------------------------------------------------------------ ----------VRRRR-------AP-Q mafft-7.123-without-extensions/test/sample.lins10000640000076500007650000007033012226665654020756 0ustar katohkatoh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] -------------------------MNGTE--G-------------------DNFYVPFS NKTG--------------------------------LARSPYEYPQY------------- ---YLAEPW---------KYSA-----LAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPL NYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSL VVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIP EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES A----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC------GKNPLGD DE-SG-ASTSK-TEVSSVS--TSPVSPA-------------------------------- ----------------------------------------------------------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] -------------------------MNGTE--G-------------------PNFYVPFS NITG--------------------------------VVRSPFEQPQY------------- ---YLAEPW---------QFSM-----LAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPL NYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSL VVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIP EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES A----------------------------------------------------------- ------------------------------------------------------------ ----------------------TTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC------GKNPLGD DE-AS-ATASK-TETSQVA--PA------------------------------------- ----------------------------------------------------------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 -------------------------MNGTE--G-------------------INFYVPMS NKTG--------------------------------VVRSPFEYPQY------------- ---YLAEPW---------KYRL-----VCCYIFFLISTGLPINLLTLLVTFKHKKLRQPL NYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSL VVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMP EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES A----------------------------------------------------------- ------------------------------------------------------------ ----------------------TTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC------GKNPFGD EDVSSTVSQSK-TEVSSVS--SSQVSPA-------------------------------- ----------------------------------------------------------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish -------------------------MNGTE--G-------------------KNFYVPMS NRTG--------------------------------LVRSPFEYPQY------------- ---YLAEPW---------QFKI-----LALYLFFLMSMGLPINGLTLVVTAQHKKLRQPL NFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSL VVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIP EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS A----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC------GKNPLGD DE-SSTVSTSK-TEVSSVS--PA------------------------------------- ----------------------------------------------------------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish -------------------------MNGTE--G-------------------NNFYVPLS NRTG--------------------------------LVRSPFEYPQY------------- ---YLAEPW---------QFKL-----LAVYMFFLICLGLPINGLTLICTAQHKKLRQPL NFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSL VVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIP EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS A----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC------GKNPLGD EE-SSTVSTSK-TEVSSVS--PA------------------------------------- ----------------------------------------------------------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- -------------------------MKQVPEFH-------------------EDFYIPIP LDIN------------------------------NLSAYSPFLVPQD------------- ---HLGNQG---------IFMA-----MSVFMFFIFIGGASINILTILCTIQFKKLRSHL NYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSL AVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLF-GWS-----RYIP EGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADS A----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC------GKN-IEE DE-AS-TSSQV-TQVSSVA--PEK------------------------------------ ----------------------------------------------------------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] -------------------------MRKMS--E-------------------EEFYL--- --FK------------------------------NISSVGPWDGPQY------------- ---HIAPVW---------AFYL-----QAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPL NYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSL AFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFF-GWS-----RFIP EGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQES A----------------------------------------------------------- ------------------------------------------------------------ ----------------------TTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC------GKA-MTD ES-DT-CSSQK-TEVSTVS--STQVGPN-------------------------------- ----------------------------------------------------------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSL AIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWP HGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKES E----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEKEVTRMVVVMVLAFC----------------- ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWS-LQRLAGRHPQDS-----YEDSTQ--S-------------------SIFTYTNS N-----------------------------------STRGPFEGPNY------------- ---HIAPRW---------VYHL-----TSVWMIFVVIASVFTNGLVLAATMKFKKLRHPL NWILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSL AIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWP HGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKES E----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF-------GKK-VDD GS-EL-SSASK-TEVSSV----SSVSPA-------------------------------- ----------------------------------------------------------- > 10== Z68193 1 human Red Opsin <>[] MAQQWS-LQRLAGRHPQDS-----YEDSTQ--S-------------------SIFTYTNS N-----------------------------------STRGPFEGPNY------------- ---HIAPRW---------VYHL-----TSVWMIFVVTASVFTNGLVLAATMKFKKLRHPL NWILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSL AIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIF-GWS-----RYWP HGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKES E----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF-------GKK-VDD GS-EL-SSASK-TEVSSV----SSVSPA-------------------------------- ----------------------------------------------------------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD-------DDTTR--G-------------------SVFTYTNT N-----------------------------------NTRGPFEGPNY------------- ---HIAPRW---------VYNL-----VSFFMIIVVIASCFTNGLVLVATAKFKKLRHPL NWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSL AIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIF-GWS-----RYWP HGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKES E----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF-------GKK-VDD GS-EA-STTSR-TEVSSVS--NSSVAPA-------------------------------- ----------------------------------------------------------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWE-AAFAARRRHEE-------EDTTR--D-------------------SVFTYTNS N-----------------------------------NTRGPFEGPNY------------- ---HIAPRW---------VYNL-----TSVWMIFVVAASVFTNGLVLVATWKFKKLRHPL NWILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSL AIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIF-GWS-----RYWP HGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKES E----------------------------------------------------------- ------------------------------------------------------------ ----------------------STQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF-------GKK-VDD GS-EV-ST-SR-TEVSSVS--NSSVSPA-------------------------------- ----------------------------------------------------------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] -----------------------------M--S-------------------SNSSQAPP N-----------------------------------GTPGPFDGPQW------------- --PYQAPQS---------TYVG-----VAVLMGTVVACASVVNGLVIVVSICYKKLRSPL NYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSL AILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLL-GWS-----SYVP EGLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEA D----------------------------------------------------------- ------------------------------------------------------------ ----------------------TTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCG--YQPQR-TGK AS-PG-TPGPH-ADVTAAG-LRNKVMPAHPV----------------------------- ----------------------------------------------------------- > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] ----------------MESGNVSSSLFGNV--S-------------------TALR---- ---P--------------------------------EARLSA---E---TRLLGWNVPPE ELRHIPEHWLTYPEPPESMNYL-----LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPS NILVINLAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATN AFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVP EGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQ AKKM------------------NVESLRS------------------------------- ------------------------------------------------------------ ------------------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQ-KRCPWL--ALNEK-APE SS-AV-ASTST-TQEPQQT----TAA---------------------------------- ----------------------------------------------------------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 ----------------MEYHNVSSVL-GNV--S-------------------SVLR---- ---P--------------------------------DARLSA---E---SRLLGWNVPPD ELRHIPEHWLIYPEPPESMNYL-----LGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPS NILVINLAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATN AFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVP EGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQ AKKM------------------NVDSLRS------------------------------- ------------------------------------------------------------ ------------------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQ-KRCPWL--AISEK-APE SR-AA-ISTST-TQEQQQT----TAA---------------------------------- ----------------------------------------------------------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ----------------------MEPLCNAS--E-------------------PPLR---- ---P--------------------------------EAR-SSGNGD---LQFLGWNVPPD QIQYIPEHWLTQLEPPASMHYM-----LGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPS NMFVLNLAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTN AAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVP EGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQ AKKM------------------NVESLRS------------------------------- ------------------------------------------------------------ ------------------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQ-KRCPWL--GVNEK-SGE IS-SA-QSTTT-QEQQQTT----AA----------------------------------- ----------------------------------------------------------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 ----------------------MDALCNAS--E-------------------PPLR---- ---P--------------------------------EARMSSGSDE---LQFLGWNVPPD QIQYIPEHWLTQLEPPASMHYM-----LGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPS NMFVLNLAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTN AAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVP EGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQ AKKM------------------NVESLRS------------------------------- ------------------------------------------------------------ ------------------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQ-KRCPWL--GVNEK-SGE AS-SA-QSTTT-QEQTQQT----SAA---------------------------------- ----------------------------------------------------------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 -------------------------MTNAT--G-------------------PQMAY--- --YG--------------------------------AASMDFGYPE---GVSIVDFVRPE IKPYVHQHWYNYPPVNPMWHYL-----LGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPA NILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLL CMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFF-GWG-----NYIL EGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQ AKKM------------------NVSTLRS------------------------------- ------------------------------------------------------------ -------------------NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM-G DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAIT-QHLPWF--CVHET-ETK SN-DD-SQSNS-TVAQDKA----------------------------------------- ----------------------------------------------------------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 -------------------------MANVT--G-------------------PQMAF--- --YG--------------------------------SGAATFGYPE---GMTVADFVPDR VKHMVLDHWYNYPPVNPMWHYL-----LGVVYLFLGVISIAGNGLVIYLYMKSQALKTPA NMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTL CMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFF-GWG-----SYTL EGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQ AKKM------------------NVTNLRS------------------------------- ------------------------------------------------------------ -------------------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAIT-QHLPWF--CVHEK-DPN DV-EE-NQSSN-TQTQEKS----------------------------------------- ----------------------------------------------------------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ----------------MES---FAVAAAQL--G-------------------PHFA---- --------------------------------------PLS--------NGSVVDKVTPD MAHLISPYWNQFPAMDPIWAKI-----LTAYMIMIGMISWCGNGVVIYIFATTKSLRTPA NLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSM CMISLDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVP EGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQ AKKM------------------NVKSLRS------------------------------- ------------------------------------------------------------ -------------------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VFGKV-DDG KS-SD-AQSQA-TASEAES----KA----------------------------------- ----------------------------------------------------------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ----------------MES---FAVAAAQL--G-------------------PHFA---- --------------------------------------PLS--------NGSVVDKVTPD MAHLISPYWNQFPAMDPIWAKI-----LTAYMIMIGMISWCGNGVVIYIFATTKSLRTPA NLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSM CMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAF-GWS-----RYVP EGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQ AKKM------------------NVKSLRS------------------------------- ------------------------------------------------------------ -------------------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VFGKV-DDG KS-SD-AQSQA-TASEAES----KA----------------------------------- ----------------------------------------------------------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' ----------------MDS---FAAVATQL--G-------------------PQFA---- --------------------------------------APS--------NGSVVDKVTPD MAHLISPYWDQFPAMDPIWAKI-----LTAYMIIIGMISWCGNGVVIYIFATTKSLRTPA NLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSM CMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVF-GWS-----RYVP EGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQ AKKM------------------NVKSLRS------------------------------- ------------------------------------------------------------ -------------------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VFGKV-DDG KS-SE-AQSQA-TTSEAES----KA----------------------------------- ----------------------------------------------------------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] -----------MERSHLPE---TPFDLAHS--G-------------------PRFQ---- --------------------------------------AQSSG------NGSVLDNVLPD MAHLVNPYWSRFAPMDPMMSKI-----LGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPA NLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSM CMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLI-GWS-----AYVP EGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQ AKKM------------------NVKSLRS------------------------------- ------------------------------------------------------------ -------------------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLK-EKCPMC--VFGNT-DEP KP-DA-PASDTETTSEADS----KA----------------------------------- ----------------------------------------------------------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 -----------MERSLLPE---PPLAMALL--G-------------------PRFE---- --------------------------------------AQTGG------NRSVLDNVLPD MAPLVNPHWSRFAPMDPTMSKI-----LGLFTLVILIISCCGNGVVVYIFGGTKSLRTPA NLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSM CMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLI-GWS-----SYVP EGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQ AKKM------------------NVKSLRS------------------------------- ------------------------------------------------------------ -------------------SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF-K I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLK-EKCPMC--VCGTT-DEP KP-DA-PPSDTETTSEAES----KD----------------------------------- ----------------------------------------------------------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] -------------------------MIAVS--G-------------------PSYE---- --------------------------------------AFSYGGQARFNNQTVVDKVPPD MLHLIDANWYQYPPLNPMWHGI-----LGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPS NLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTM TMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMF-GWN-----RYVP EGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQ AKKM------------------NVASLRS------------------------------- ------------------------------------------------------------ -------------------SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF-N L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALF-AKFPSL--AC-AA-EPS SD-AV-STTSG-TTTVTDN----EKSNA-------------------------------- ----------------------------------------------------------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] ---------------------------MAN--Q-------------------LSYS---- --------------------------------------SLGWPYQP---NASVVDTMPKE MLYMIHEHWYAFPPMNPLWYSI-----LGVAMIILGIICVLGNGMVIYLMMTTKSLRTPT NLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSM VMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFF-GWS-----RYVP EGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQ AKKM------------------NVASLRA------------------------------- ------------------------------------------------------------ ------------------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALY-QRFPSL--ACGSG-ESG SD-VK-SEASA-TTTMEEK----PKIPEA------------------------------- ----------------------------------------------------------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ---------------------------MVE--S-------------------TTLV---- --------------------------------------NQTWWY-----NPTV------- ---DIHPHWAKFDPIPDAVYYS-----VGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPA NMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTM AMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVF-NWG-----AYVP EGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAM AKRL------------------NAKELRK------------------------------- ------------------------------------------------------------ -------------------AQ-AGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQ-TTFPWLLTCCQFD-EKE CE-DA-NDAEE-EVVASER--GGESRDAAQMKEMMAMMQKMQAQQAAYQP---PPPPQGY PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] ----------------------------MG--R-------------------DLRD---- --------------------------------------NETWWY-----NPSI------- ---VVHPHWREFDQVPDAVYYS-----LGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPA NMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTM AMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIF-GWG-----AYTL EGVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAM AKRL------------------NAKELRK------------------------------- ------------------------------------------------------------ -------------------AQ-AGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAIS-QTFPWVLTCCQFD-DKE TE-DD-KDAET-EIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGY PPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ -------------------------MPHLLSGFLEVTASPA---PTW--DA------PPD NVSGCGEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVSFVKKLRQPS NYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTL CVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVN DDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF- -------------PGFPRVQPESVISLNG------------------------------- ---------------------VVKLQKE------------------VEECAN-------- ----------LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSR-SLLQCQ--YRNIN-RKL SA-AGMHEALKLAERPERSEFVLQNSDHCGKK---------------------------- --------------------------------------------------GHDT----- > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ -------------------------MPHLLSGFLEVTASPA---PTW--DA------PPD NVSGCGEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVSFVKKLRQPS NYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTL CVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVN DDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF- -------------PGFPRVQPESVISLNG------------------------------- ---------------------VVKLQKE------------------VEECAN-------- ----------LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYR-SLLQCQ--YRNIN-RKL SA-AGMHEALKLAERPERSEFVLQNSDHCGKK---------------------------- --------------------------------------------------GHDT----- > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] -------------------------MDVLS-PG--------------------------- ------------------------------------QGNNT--------TSPPAPFETGG NTTGIS-------DVTVSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVA NYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHL CAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTPEDRSDPD ---ACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKK VEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNWRLGVESKAGGALCANGAVRQGD DGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS------------------FERKN ERNA-------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KIIKCK--FCRQ----- ------------------------------------------------------------ ----------------------------------------------------------- > 33=p A35181 serotonin receptor class 1A - rat -------------------------MDVFS-FG--------------------------- ------------------------------------QGNNT--------TASQEPFGTGG NVTSIS-------DVTFSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVA NYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHL CAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTPEDRSDPD ---ACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRK VEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRCAENRAVGTPCTNGAVRQGD DEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC------------------LERKN ERNA-------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KIIKCK--FCRR----- ------------------------------------------------------------ ----------------------------------------------------------- > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] ---------------MANFTFGDLALDVAR-MGGLASTPS---GLRS-----TGLTTPGL SPTGLVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDD RYWSLT-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVA NYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHL VAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDPNN--DPD KTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQ MTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKN RAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSND TPYS-------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-V DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KILFGK--YRRGH-R-- ------------------------------------------------------------ ----------------------------------------------------------- > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail ---------------MANFTFGDLALDVAR-MGGLASTPS---GLRS-----TGLTTPGL SPTGLVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDD RYWSLT-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVA NYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHL VAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDPNN--DPD KTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQ MTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKN RAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSND TPYS-------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-V DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KILFGK--YRRGH-R-- ------------------------------------------------------------ ----------------------------------------------------------- > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi ----------------MEGAEGQEELDWEA-LY---------------------LRLP-- -----------------------------LQ---NCSWNSTGWEPNW--NVTVV---PNT TWWQAS-------APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAA NNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHL VAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVS EDLRCVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRR---- --RRGATARGGVGPPPVPAG---------------------------------GALVAGG GSGGIAAAVVAV-----IGRPLPTISETTTTGFTNVSSNNTSPE--KQSCANGLEADPPT TGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-C DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ-RLLCGR--RVRRR-RA- ---------------PQ------------------------------------------- ----------------------------------------------------------- mafft-7.123-without-extensions/scripts/0000750000076500007650000000000012227117132017211 5ustar katohkatoh