mafft-7.505-without-extensions/0000755000175000017500000000000014224477544016070 5ustar nileshnileshmafft-7.505-without-extensions/core/0000755000175000017500000000000014224501721017001 5ustar nileshnileshmafft-7.505-without-extensions/core/tbfast.c0000644000175000017500000031336114224501721020437 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define SHISHAGONYU 0 // for debug #define REPORTCOSTS 0 static int treein; static int topin; static int treeout; static int distout; static int noalign; static int multidist; static int subalignment; static int subalignmentoffset; static int keeplength; static int ndeleted; static int mapout; static int smoothing; static int callpairlocalalign; static int outputhat23; static int nthreadtb; typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _msacompactdistmtxthread_arg // single thread demo tsukau { int njob; int thread_no; int *selfscore; double **partmtx; char **seq; int **skiptable; double *mindist; int *mindistfrom; int *jobpospt; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } msacompactdistmtxthread_arg_t; #ifdef enablemultithread typedef struct _distancematrixthread_arg { int njob; int thread_no; int *selfscore; double **iscore; char **seq; int **skiptable; Jobtable *jobpospt; pthread_mutex_t *mutex; } distancematrixthread_arg_t; typedef struct _treebasethread_arg { int thread_no; int *nrunpt; int njob; int *nlen; int *jobpospt; int ***topol; Treedep *dep; char **aseq; double *effarr; int *alloclenpt; LocalHom **localhomtable; RNApair ***singlerna; double *effarr_kozo; int *fftlog; char *mergeoralign; int *targetmap; int *uselh; pthread_mutex_t *mutex; pthread_cond_t *treecond; } treebasethread_arg_t; #endif static void arguments( int argc, char *argv[], int *pac, char **pav, int *tac, char **tav ) // 2 kai yobaremasu. { int c; int i; nthread = 1; nthreadtb = 1; nthreadpair = 1; outnumber = 0; scoreout = 0; spscoreout = 0; treein = 0; topin = 0; rnaprediction = 'm'; rnakozo = 0; nevermemsave = 0; inputfile = NULL; addfile = NULL; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; // chuui force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; sueff_global = 0.1; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; // dorp = NOTSPECIFIED; ppenalty_dist = NOTSPECIFIED; ppenalty = NOTSPECIFIED; penalty_shift_factor = 1000.0; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNAppenalty_ex = NOTSPECIFIED; RNApthr = NOTSPECIFIED; TMorJTT = JTT; consweight_multi = 1.0; consweight_rna = 0.0; multidist = 0; subalignment = 0; subalignmentoffset = 0; legacygapcost = 0; specificityconsideration = 0.0; keeplength = 0; mapout = 0; smoothing = 0; specifictarget = 0; callpairlocalalign = 0; outputhat23 = 0; nwildcard = 0; nadd = 0; if( pac ) { pav[0] = "tbfast-pair"; *pac = 1; tav[0] = "tbfast"; *tac = 1; for( i=0; i 0 && (*++argv)[0] == '-' ) { // reporterr( "(*argv)[0] = %s\n", (*argv) ); while ( ( c = *++argv[0] ) ) { // reporterr( "c=%c\n", c ); switch( c ) { case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); // fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'V': ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; // fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; // fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthreadpair = nthread = myatoi( *++argv ); // fprintf( stderr, "nthread = %d\n", nthread ); --argc; #ifndef enablemultithread nthread = 0; #endif goto nextoption; case 's': specificityconsideration = (double)myatof( *++argv ); // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; case 'R': rnaprediction = 'r'; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case 't': treeout = 1; break; case '^': treeout = 2; break; case 'T': noalign = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'L': legacygapcost = 1; break; #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif #if 0 case 'S' : scoreout = 1; // for checking parallel calculation break; #else case 'S' : spscoreout = 1; // 2014/Dec/30, sp score break; #endif case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; case 's': treemethod = 's'; break; #endif case 'X': treemethod = 'X'; sueff_global = atof( *++argv ); // fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; case 'E': treemethod = 'E'; break; case 'q': treemethod = 'q'; break; case 'n' : outnumber = 1; break; #if 0 case 'a': alg = 'a'; break; case 'H': alg = 'H'; break; case 'Q': alg = 'Q'; break; #endif case '@': alg = 'd'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'N': nevermemsave = 1; break; case 'B': // hitsuyou! memopt -M -B no tame break; case 'F': use_fft = 1; break; case 'G': force_fft = 1; use_fft = 1; break; case 'U': treein = 1; break; #if 0 case 'V': topin = 1; break; #endif case 'u': tbrweight = 0; weight = 0; break; case 'v': tbrweight = 3; break; case 'd': multidist = 1; break; #if 0 case 'd': disp = 1; break; #endif /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; #endif case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'W': minimumweight = atof( *++argv ); // fprintf( stderr, "minimumweight = %f\n", minimumweight ); --argc; goto nextoption; #if 0 case 'Z': checkC = 1; break; #endif case 'Y': keeplength = 1; break; case 'z': mapout = 2; break; case 'Z': mapout = 1; break; case 'p': smoothing = 1; break; case '=': specifictarget = 1; break; case ':': nwildcard = 1; break; case '+': outputhat23 = myatoi( *++argv ); reporterr( "outputhat23=%d\n", outputhat23 ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } // reporterr( "argc=%d\n", argc ); if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "argc=%d, tbfast options: Check source file !\n", argc ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } #if 0 static void *distancematrixthread2( void *arg ) { distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; double *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; Jobtable *jobpospt = targ->jobpospt; double ssi, ssj, bunbo; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); i = jobpospt->i; i++; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } jobpospt->i = i; pthread_mutex_unlock( targ->mutex ); ssi = selfscore[i]; if( i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); for( j=i+1; jnjob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; char **seq = targ->seq; int **skiptable = targ->skiptable; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int *jobpospt = targ->jobpospt; double tmpdist, preference, tmpdistx, tmpdisty; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == njob-1 ) { return( NULL ); } *jobpospt = i+1; } if( i % 100 == 0 ) { if( nthreadpair ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); else fprintf( stderr, "\r% 5d / %d", i, njob ); } for( j=i+1; jnjob; int thread_no = targ->thread_no; double *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; double ssi, ssj, bunbo; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; j = i + 1; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } } jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex ); if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); ssi = selfscore[i]; ssj = selfscore[j]; bunbo = MIN( ssi, ssj ); if( bunbo == 0.0 ) iscore[i][j-i] = 2.0; // 2013/Oct/17 else // iscore[i][j-i] = ( 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo ) * 2.0; // 2013/Oct/17 iscore[i][j-i] = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast if( iscore[i][j-i] > 10 ) iscore[i][j-i] = 10.0; // 2015/Mar/17 } } #else static void *distancematrixthread( void *arg ) // v7.2 ijou deha tsukawanaihazu { distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; int ssi, ssj, bunbo; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); i = jobpospt->i; // (jobpospt-i)++ dato, shuuryou hantei no mae ni ++ surunode, tomaranakunaru. if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } jobpospt->i += 1; pthread_mutex_unlock( targ->mutex ); if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); ssi = selfscore[i]; for( j=i+1; j 10.0 ) iscore[i][j-i] = 10.0; // 2015/Mar/17 } } } #endif static void *treebasethread( void *arg ) // seed && compacttree==3 niha taioushinai. { treebasethread_arg_t *targ = (treebasethread_arg_t *)arg; int *nrunpt = targ->nrunpt; int thread_no = targ->thread_no; int njob = targ->njob; int *nlen = targ->nlen; int *jobpospt = targ->jobpospt; int ***topol = targ->topol; Treedep *dep = targ->dep; char **aseq = targ->aseq; double *effarr = targ->effarr; int *alloclen = targ->alloclenpt; LocalHom **localhomtable = targ->localhomtable; RNApair ***singlerna = targ->singlerna; double *effarr_kozo = targ->effarr_kozo; int *fftlog = targ->fftlog; int *targetmap = targ->targetmap; int *uselh = targ->uselh; char *mergeoralign = targ->mergeoralign; char **mseq1, **mseq2; char **localcopy; int i, j, l; int len1, len2; int clus1, clus2; double pscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; double *effarr1_kozo = NULL; double *effarr2_kozo = NULL; LocalHom ***localhomshrink = NULL; char *swaplist = NULL; int m1, m2; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; RNApair ***grouprna1 = NULL, ***grouprna2 = NULL; double **dynamicmtx; int **localmem = NULL; int posinmem; #if REPORTCOSTS time_t starttime, startclock; starttime = time(NULL); startclock = clock(); #endif if( compacttree == 3 ) { reporterr( "bug. treebasethread() is no longer used when compacttree==3.\n" ); exit( 1 ); } mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); localcopy = calloc( njob, sizeof( char * ) ); dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); localmem = AllocateIntMtx( 2, njob+1 ); // memhist mitaiou if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); #if 0 reporterr( "before allocating localhomshrink (--thread >0), constraint=%d, njob=%d\n", constraint, njob ); use_getrusage(); #endif swaplist = NULL; // if( constraint ) if( constraint && compacttree != 3 ) { if( specifictarget ) swaplist = calloc( njob, sizeof( char ) ); // use_getrusage(); localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) ); for( i=0; i main thread if( constraint ) calcimportance( njob, effarr, aseq, localhomtable ); #endif // writePre( njob, name, nlen, aseq, 0 ); // for( l=0; lmutex ); l = *jobpospt; if( l == njob-1 ) { pthread_mutex_unlock( targ->mutex ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); partA__align( NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( mseq1 ); free( mseq2 ); free( localcopy ); free( effarr1 ); free( effarr2 ); free( effarr1_kozo ); free( effarr2_kozo ); free( indication1 ); free( indication2 ); FreeDoubleMtx( dynamicmtx ); FreeIntMtx( localmem ); if( rnakozo && rnaprediction == 'm' ) { if( grouprna1 ) free( grouprna1 ); // nakami ha? if( grouprna2 ) free( grouprna2 ); // nakami ha? grouprna1 = grouprna2 = NULL; } if( constraint && compacttree != 3 ) { if( localhomshrink ) // nen no tame { for( i=0; itreecond, targ->mutex ); } if( dep[l].child1 != -1 ) { while( dep[dep[l].child1].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } // while( *nrunpt >= nthread ) // pthread_cond_wait( targ->treecond, targ->mutex ); // iranai no?? (*nrunpt)++; // pthread_mutex_unlock( targ->mutex ); if( mergeoralign[l] == 'n' ) { // fprintf( stderr, "SKIP!\n" ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); // free( topol[l][0] ); // free( topol[l][1] ); // free( topol[l] ); pthread_mutex_unlock( targ->mutex ); // free( localmem[0] ); // free( localmem[1] ); continue; } m1 = topol[l][0][0]; m2 = topol[l][1][0]; // fprintf( stderr, "\ndistfromtip = %f\n", dep[l].distfromtip ); // makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip - 0.5 ); makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip ); // pthread_mutex_lock( targ->mutex ); len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen <= len1 + len2 ) { fprintf( stderr, "\nReallocating (by thread %d) ..", thread_no ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); fprintf( stderr, "done. *alloclen = %d\n", *alloclen ); } localmem[0][0] = -1; posinmem=topolorderz( localmem[0], topol, dep, l, 0 ) - localmem[0]; localmem[1][0] = -1; posinmem=topolorderz( localmem[1], topol, dep, l, 1 ) - localmem[1]; for( i=0; (j=localmem[0][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); } for( i=0; (j=localmem[1][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); } pthread_mutex_unlock( targ->mutex ); if( effarr_kozo ) { clus1 = fastconjuction_noname_kozo( localmem[0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); clus2 = fastconjuction_noname_kozo( localmem[1], localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 ); } #if 0 else if( specifictarget ) { clus1 = fastconjuction_target( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1, minimumweight, targetmap ); clus2 = fastconjuction_target( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2, minimumweight, targetmap ); } #endif else { clus1 = fastconjuction_noname( localmem[0], localcopy, mseq1, effarr1, effarr, indication1, minimumweight, NULL ); clus2 = fastconjuction_noname( localmem[1], localcopy, mseq2, effarr2, effarr, indication2, minimumweight, NULL ); } #if 1 if( l < 1000 || l % 100 == 0 ) fprintf( stderr, "\rSTEP % 5d /%d (thread %4d) ", l+1, njob-1, thread_no ); #else fprintf( stderr, "STEP %d /%d (thread %d) \n", l+1, njob-1, thread_no ); fprintf( stderr, "group1 = %.66s", indication1 ); if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, ", child1 = %d\n", dep[l].child0 ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, ", child2 = %d\n", dep[l].child1 ); fprintf( stderr, "Group1's lengths = " ); for( i=0; i 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } // fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, localmem[0], localmem[1], uselh, NULL, NULL, (compacttree==3)?l:-1, 0 ); // seedinlh, nfiles ni ha taiou shiteinai if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, constraint, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxhist mitaiou } if( alg == 'd' ) { imp_match_init_strictD( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, localmem[0], localmem[1], uselh, NULL, NULL, (compacttree==3)?l:-1, 0 ); if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, constraint, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } else if( alg == 'Q' ) { fprintf( stderr, "Not supported\n" ); exit( 1 ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, " f\b\b" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxhist mitaiou break; case( 'A' ): pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxhist mitaiou break; case( 'd' ): pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif /* fprintf( stderr, "after align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "after align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ // writePre( njob, name, nlen, localcopy, 0 ); if( disp ) display( localcopy, njob ); pthread_mutex_lock( targ->mutex ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); for( i=0; (j=localmem[0][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); for( i=0; (j=localmem[1][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); pthread_mutex_unlock( targ->mutex ); for( i=0; (j=localmem[0][i])!=-1; i++ ) free( localcopy[j] ); for( i=0; (j=localmem[1][i])!=-1; i++ ) free( localcopy[j] ); // free( topol[l][0] ); // free( topol[l][1] ); // free( topol[l] ); #if REPORTCOSTS if( l < 1000 || l % 100 == 0 ) { use_getrusage(); reporterr( "real = %f min\n", (float)(time(NULL) - starttime)/60.0 ); reporterr( "user = %f min\n", (float)(clock()-startclock)/CLOCKS_PER_SEC/60); } #endif } } #endif void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo, int *targetmap, int *targetmapr, int ntarget, int *uselh, int nseed, int *nfilesfornode ) { int i, l, m; int len1nocommongap, len2nocommongap; int len1, len2; int clus1, clus2; double pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; double *effarr1_kozo = NULL; double *effarr2_kozo = NULL; LocalHom ***localhomshrink = NULL; int *seedinlh1 = NULL; int *seedinlh2 = NULL; char *swaplist = NULL; int *fftlog; int m1, m2; int *gaplen; int *gapmap; int *alreadyaligned; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; RNApair ***grouprna1 = NULL, ***grouprna2 = NULL; static double **dynamicmtx; int gapmaplen; int **localmem = NULL; int posinmem; int nfiles; double ***cpmxhist = NULL; int **memhist = NULL; double ***cpmxchild0 = NULL; double ***cpmxchild1 = NULL; double orieff1, orieff2; #if REPORTCOSTS time_t starttime, startclock; starttime = time(NULL); startclock = clock(); #endif if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } fftlog = AllocateIntVec( njob ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); gaplen = AllocateIntVec( *alloclen+10 ); gapmap = AllocateIntVec( *alloclen+10 ); alreadyaligned = AllocateIntVec( njob ); dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); localmem = calloc( sizeof( int * ), 2 ); cpmxhist = (double ***)calloc( njob-1, sizeof( double ** ) ); for( i=0; i0, nseed > 0 { localhomshrink = (LocalHom ***)calloc( nseed, sizeof( LocalHom ** ) ); for( i=0; i 0, compacttree == 3 && nseed == 0 { seedinlh1 = NULL; // nakutemo seedinlh2 = NULL; // nakutemo localhomshrink = NULL; // nakutemo } effarr1_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru. effarr2_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru. for( i=0; i 0 { dontcalcimportance_half( nseed, effarr, aseq, localhomtable ); //CHUUI } // writePre( njob, name, nlen, aseq, 0 ); tscore = 0.0; for( l=0; l 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif #if REPORTCOSTS if( l < 1000 || l % 100 == 0 ) reporterr( "\nclus1=%d, clus2=%d\n", clus1, clus2 ); #endif // for( i=0; i 0 && compacttree == 3 && nseed > 0 { fastshrinklocalhom_half_seed( localmem[0], localmem[1], nseed, seedinlh1, seedinlh2, localhomtable, localhomshrink ); for( i=0; i 30000 || len2 > 30000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } // fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, localmem[0], localmem[1], uselh, seedinlh1, seedinlh2, (compacttree==3)?l:-1, nfiles ); if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); #if REPORTCOSTS // reporterr( "\n\n %d - %d (%d x %d) : \n", topol[l][0][0], topol[l][1][0], clus1, clus2 ); #endif pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, constraint, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } if( alg == 'd' ) { imp_match_init_strictD( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, localmem[0], localmem[1], uselh, seedinlh1, seedinlh2, (compacttree==3)?l:-1, nfiles ); if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, constraint, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } else if( alg == 'Q' ) { fprintf( stderr, "Not supported\n" ); exit( 1 ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, " f\b\b" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'A' ): pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; /* fprintf( stderr, "after align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "after align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ // writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. { reporterr( "Check source!!\n" ); exit( 1 ); } if( mergeoralign[l] == '2' ) { // fprintf( stderr, ">mseq1[0] = \n%s\n", mseq1[0] ); // fprintf( stderr, ">mseq2[0] = \n%s\n", mseq2[0] ); // if( keeplength ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, NULL ); gapmaplen = strlen( mseq1[0] )-len1nocommongap+len1; adjustgapmap( gapmaplen, gapmap, mseq1[0] ); if( smoothing ) { restorecommongapssmoothly( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps_bothorders( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, gapmaplen, *alloclen, alg, '-' ); } else { restorecommongaps( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, *alloclen, alg, '-' ); } #if 0 for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } // free( topol[l][0] ); // free( topol[l][1] ); // free( topol[l] ); free( localmem[0] ); free( localmem[1] ); #if REPORTCOSTS if( l < 1000 || l % 100 == 0 ) { use_getrusage(); reporterr( "real = %f min\n", (float)(time(NULL) - starttime)/60.0 ); reporterr( "user = %f min\n", (float)(clock()-startclock)/CLOCKS_PER_SEC/60); } #endif } #if REPORTCOSTS use_getrusage(); reporterr( "real = %f min\n", (float)(time(NULL) - starttime)/60.0 ); reporterr( "user = %f min\n", (float)(clock()-startclock)/CLOCKS_PER_SEC/60); #endif #if 1 // 2021/Jun/25 if( cpmxhist ) { for( i=0; i0.0 && usertree no toki. loadtree( njob, topol, len, name, nlen, dep, treeout ); // loadtop( njob, topol, len, name, NULL, dep ); // 2015/Jan/13, not yet checked fprintf( stderr, "\ndone.\n\n" ); // for( i=0; i0.0 ) { int *mem0 = calloc( sizeof( int ), njob ); int *mem1 = calloc( sizeof( int ), njob ); expdist = AllocateDoubleMtx( njob, njob ); for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; #if SHISHAGONYU // for debug char buff[100]; sprintf( buff, "%10.5f", tmpptr->opt ); tmpptr->opt = 0.0; sscanf( buff, "%lf", &(tmpptr->opt) ); #endif tmpptr->opt = ( tmpptr->opt ) / 5.8 * 600; } } if( !specifictarget ) ilim--; } prep = fopen( "hat3.seed", "r" ); if( prep ) { fprintf( stderr, "Loading 'hat3.seed' ... " ); if( specifictarget ) readlocalhomtable2_target( prep, njob, localhomtable, kozoarivec, targetmap ); // uwagakisarerukara koredehadame. else readlocalhomtable2_half( prep, njob, localhomtable, kozoarivec ); // uwagakisarerukara koredehadame. fclose( prep ); fprintf( stderr, "\ndone.\n" ); } else fprintf( stderr, "No hat3.seed. No problem.\n" ); if( outputhat23 ) { prep = fopen( "hat3", "w" ); if( !prep ) ErrorExit( "Cannot open hat3 to write." ); fprintf( stderr, "Writing hat3 for iterative refinement\n" ); if( specifictarget ) ilim = ntarget; else ilim = njob-1; for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; if( targetmap[j] == -1 || targetmap[i] < targetmap[j] ) fprintf( prep, "%d %d %d %7.5f %d %d %d %d %c\n", targetmapr[i], j, tmpptr->overlapaa, tmpptr->opt/600*5.8, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->korh ); } } } fclose( prep ); prep = fopen( "hat2", "w" ); WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore ); fclose( prep ); } else if( distout ) // choufuku shiterukedo, muda deha nai. { prep = fopen( "hat2", "w" ); WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore ); fclose( prep ); } } else { /* compacttree==3 no toki hat3.seed ha mada yomenai */ prep = fopen( "hat3.seed", "r" ); if( prep ) { char r; r=fgetc(prep); if( isalnum( r ) || r == ' ' ) { reporterr( "Structural alignment is not yet supported in the --memsavepair mode. Try normal mode,\n" ); exit( 1 ); } fclose( prep ); } } } // else if( compacttree != 3 ) else { fprintf( stderr, "Loading 'hat3' ... " ); prep = fopen( "hat3", "r" ); if( prep == NULL ) ErrorExit( "Make hat3." ); if( specifictarget ) readlocalhomtable2_target( prep, njob, localhomtable, kozoarivec, targetmap ); else readlocalhomtable2_half( prep, njob, localhomtable, kozoarivec ); fclose( prep ); fprintf( stderr, "\ndone.\n" ); } nkozo = 0; for( i=0; i 0 ) { msacompactdistmtxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; double **mindistthread; int **mindistfromthread; mindistthread = AllocateDoubleMtx( nthreadpair, njob ); mindistfromthread = AllocateIntMtx( nthreadpair, njob ); targ = calloc( nthreadpair, sizeof( msacompactdistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); jobpos = 0; for( i=0; i=7.2. Please email katoh@ifrec.osaka-u.ac.jp\n" ); // fflush( stderr ); // exit( 1 ); iscore = AllocateFloatHalfMtx( njob ); // tbutree == 0 no baai ha allocate sareteinainode for( i=1; i 0 ) { distancematrixthread_arg_t *targ; Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthreadpair, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 10 ) iscore[i][j-i] = 10.0; // 2015/Mar/17 //exit( 1 ); #if 0 fprintf( stderr, "### ssj = %f\n", ssj ); fprintf( stderr, "### selfscore[i] = %f\n", selfscore[i] ); fprintf( stderr, "### selfscore[j] = %f\n", selfscore[j] ); fprintf( stderr, "### rawscore = %f\n", naivepairscore11( seq[i], seq[j], penalty_dist ) ); #endif } } } // fprintf( stderr, "\ndone.\n\n" ); FreeIntMtx( skiptable ); // fflush( stderr ); reporterr( "\rdone. \n" ); } else { if( callpairlocalalign ) { if( multidist ) { reporterr( "Bug in v7.290. Please email katoh@ifrec.osaka-u.ac.jp\n" ); exit( 1 ); } #if 0 prep = fopen( "hat2", "w" ); if( !prep ) ErrorExit( "Cannot open hat2." ); WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore ); // jissiha double fclose( prep ); #endif } else { if( multidist ) { fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " ); prep = fopen( "hat2n", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_doublehalf_pointer( prep, njob, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); prep = fopen( "hat2i", "r" ); if( prep == NULL ) ErrorExit( "Make hat2i." ); readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } else { fprintf( stderr, "Loading 'hat2' ... " ); prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_doublehalf_pointer( prep, njob, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } if( distout ) // callpairlocalalign == 1 no toki ha ue de shorizumi. { reporterr( "\nwriting hat2 (2)\n" ); hat2p = fopen( "hat2", "w" ); WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore ); fclose( hat2p ); } } // for( i=0; imtx) ha, 6merdistance -> disttbfast.c; dp distance -> muzukashii { reporterr( "Constructing a tree ... nthread=%d", nthread ); compacttree_memsaveselectable( njob, partmtx, mindistfrom, mindist, NULL, selfscore, seq, skiptable, topol, len, name, NULL, dep, treeout, compacttree, 1 ); // compacttreegivendist( njob, mindist, mindistfrom, topol, len, name, dep, treeout ); if( mindistfrom ) free( mindistfrom ); mindistfrom = NULL; if( mindist ) free( mindist );; mindist = NULL; // if( selfscore ) free( selfscore ); selfscore = NULL; // matomete free if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL; // nikaime dake free( partmtx ); } else if( treeout ) { fprintf( stderr, "Constructing a UPGMA tree ... " ); fixed_musclesupg_double_realloc_nobk_halfmtx_treeout_memsave( njob, iscore, topol, len, name, nlen, dep, 1, treeout ); // _memsave demo iihazu } else { fprintf( stderr, "Constructing a UPGMA tree ... " ); fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( njob, iscore, topol, len, dep, 1, 1 ); // _memsave demo iihazu } // else // ErrorExit( "Incorrect tree\n" ); if( nkozo ) // atode kakukamo { // for( i=0; i= njob ) { fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 ); fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" ); fprintf( stderr, "#\n" ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); fprintf( stderr, "#\n" ); fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "\n" ); exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d does not form a monophyletic cluster\n", i+1 ); fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "############################################################################### \n" ); fprintf( stderr, "\n" ); exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %c\n\n", i, mergeoralign[i] ); } #endif for( i=0; i 0 && nadd == 0 ) { treebasethread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t treecond; int *fftlog; int nrun; int nthread_yoyu; nthread_yoyu = nthreadtb * 1; nrun = 0; jobpos = 0; targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); fftlog = AllocateIntVec( njob ); handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &treecond, NULL ); for( i=0; inext ) { free( (void *)tmppt1 ); tmppt1 = tmppt2; } free( (void *)tmppt1 ); } free( (void *)(localhomtable[i]+j) ); } free( (void *)localhomtable ); } #endif fprintf( trap_g, "done.\n" ); // fclose( trap_g ); free( mergeoralign ); freeconstants(); if( rnakozo && rnaprediction == 'm' ) { if( singlerna ) // nen no tame { for( i=0; i 0 ) { reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted ); if( mapout ) reporterr( "The deleted letters are shown in the (filename).map file.\n" ); else reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" ); } free( kozoarivec ); FreeCharMtx( seq ); FreeCharMtx( bseq ); free( mseq1 ); free( mseq2 ); FreeCharMtx( name ); free( nlen ); free( selfscore ); FreeIntCub( topol ); topol = NULL; // for( i=0; i -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } #else for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } #endif } #endif static double Ltracking( double *lasthorizontalw, double *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int *off1pt, int *off2pt, int endi, int endj, int *warpis, int *warpjs, int warpbase ) { int i, j, l, iin, jin, lgth1, lgth2, k, limk; int ifi=0, jfi=0; // by D.Mathog, a guess // char gap[] = "-"; char *gap; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i= warpbase ) { // fprintf( stderr, "WARP!\n" ); ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } #if 1 // sentou de warp? if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { *--mseq1[0] = seq1[0][l]; *--mseq2[0] = *gap; k++; } l= jin; while( --l >= 0 ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][l]; k++; } break; } else #endif { l = iin - ifi; while( --l > 0 ) { *--mseq1[0] = seq1[0][ifi+l]; *--mseq2[0] = *gap; k++; } l= jin - jfi; while( --l > 0 ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][jfi+l]; k++; } } if( iin <= 0 || jin <= 0 ) break; *--mseq1[0] = seq1[0][ifi]; *--mseq2[0] = seq2[0][jfi]; if( ijp[ifi][jfi] == localstop ) break; k++; iin = ifi; jin = jfi; } if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi; if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi; // fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi ); // fprintf( stderr, "\n" ); // fprintf( stderr, "%s\n", mseq1[0] ); // fprintf( stderr, "%s\n", mseq2[0] ); return( 0.0 ); } double L__align11( double **n_dynamicmtx, double scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; int i, j; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; // static TLS int **intwork; // static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double **amino_dynamicmtx = NULL; // ?? double maxwm; int endali = 0, endalj = 0; // by D.Mathog, a guess // int endali, endalj; double localthr = -offset + scoreoffset * 600; // 2013/12/13 double localthr2 = -offset + scoreoffset * 600; // 2013/12/13 // double localthr = -offset; // double localthr2 = -offset; double fpenalty = (double)penalty; double fpenalty_ex = (double)penalty_ex; double fpenalty_shift = (double)penalty_shift; double fpenalty_tmp; // atode kesu int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; } return( 0.0 ); } if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); warpbase = lgth1 + lgth2; warpis = NULL; warpjs = NULL; warpn = 0; if( trywarp ) { wmrecords = AllocateFloatVec( lgth2+1 ); warpi = AllocateIntVec( lgth2+1 ); warpj = AllocateIntVec( lgth2+1 ); prevwmrecords = AllocateFloatVec( lgth2+1 ); prevwarpi = AllocateIntVec( lgth2+1 ); prevwarpj = AllocateIntVec( lgth2+1 ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; // FreeFloatMtx( doublework ); // FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); // doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); // intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 ); orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; *ijppt = -( j - mpi ); } if( *prept > mi ) { mi = *prept; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt+fpenalty) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } if( *prept > *mjpt ) { *mjpt = *prept; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f, localthr = %f\n", i, j, *curpt, localthr ); *ijppt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if 0 fprintf( stderr, "wm (%d,%d) = %5.0f\n", i, j, wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif if( trywarp ) { fpenalty_tmp = fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ); // fprintf( stderr, "fpenalty_shift = %f\n", fpenalty_tmp ); // fprintf( stderr, "\n\n\nwarp to %c-%c (%d-%d) from %c-%c (%d-%d) ? prevwmrecords[%d] = %f + %f <- wm = %f\n", seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], prevwarpi[j-1], prevwarpj[j-1], seq1[0][i], seq2[0][j], i, j, j, prevwmrecords[j-1], fpenalty_tmp, wm ); // if( (g=prevwmrecords[j-1] + fpenalty_shift )> wm ) if( ( g=*prevwmrecordspt++ + fpenalty_tmp )> wm ) // naka ha osokute kamawanai { // fprintf( stderr, "Yes! Warp!! from %d-%d (%c-%c) to %d-%d (%c-%c) fpenalty_tmp = %f! warpn = %d\n", i, j, seq1[0][i], seq2[0][j-1], prevwarpi[j-1], prevwarpj[j-1],seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], fpenalty_tmp, warpn ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } else { } curm = *curpt + wm; // fprintf( stderr, "###### curm = %f at %c-%c, i=%d, j=%d\n", curm, seq1[0][i], seq2[0][j], i, j ); // fprintf( stderr, "copy from i, j-1? %f > %f?\n", wmrecords[j-1], curm ); // if( wmrecords[j-1] > wmrecords[j] ) if( *wmrecords1pt > *wmrecordspt ) { // fprintf( stderr, "yes\n" ); // wmrecords[j] = wmrecords[j-1]; *wmrecordspt = *wmrecords1pt; // warpi[j] = warpi[j-1]; // warpj[j] = warpj[j-1]; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); // fprintf( stderr, "warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] ); } // else // { // fprintf( stderr, "no\n" ); // } // fprintf( stderr, " curm = %f at %c-%c\n", curm, seq1[0][i], seq2[0][j] ); // fprintf( stderr, " wmrecords[%d] = %f\n", j, wmrecords[j] ); // fprintf( stderr, "replace?\n" ); // if( curm > wmrecords[j] ) if( curm > *wmrecordspt ) { // fprintf( stderr, "yes at %d-%d (%c-%c), replaced warp: warpi[j]=%d, warpj[j]=%d warpn=%d, wmrecords[j] = %f -> %f\n", i, j, seq1[0][i], seq2[0][j], i, j, warpn, wmrecords[j], curm ); // wmrecords[j] = curm; *wmrecordspt = curm; // warpi[j] = i; // warpj[j] = j; *warpipt = i; *warpjpt = j; } // else // { // fprintf( stderr, "No! warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] ); // } // fprintf( stderr, "%d-%d (%c-%c) curm = %5.0f, wmrecords[j]=%f\n", i, j, seq1[0][i], seq2[0][j], curm, wmrecords[j] ); wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } // fprintf( stderr, "\nwm = %f\n", wm ); if( trywarp ) { // if( warpn ) fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if 0 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif if( ijp[endali][endalj] == localstop ) { strcpy( seq1[0], "" ); strcpy( seq2[0], "" ); *off1pt = *off2pt = 0; fprintf( stderr, "maxwm <- 0.0 \n" ); return( 0.0 ); } Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj, warpis, warpjs, warpbase ); if( warpis ) free( warpis ); if( warpjs ) free( warpjs ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "wm=%f\n", wm ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "*off1pt = %d, *off2pt = %d\n", *off1pt, *off2pt ); fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, " wm = %f\n", wm ); #endif return( maxwm ); } double L__align11_noalign( double **n_dynamicmtx, char **seq1, char **seq2 ) // warp mitaiou { // int k; int i, j; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; // int resultlen; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; #if 1 double *wtmp; // int *ijppt; double *mjpt, *prept, *curpt; // int *mpjpt; #endif static TLS double mi, *m; // static TLS int **ijp; // static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ // static TLS char **mseq1; // static TLS char **mseq2; // static TLS char **mseq; // static TLS int **intwork; // static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double **amino_dynamicmtx = NULL; // ?? double maxwm; // int endali = 0, endalj = 0; // by D.Mathog, a guess // int endali, endalj; double localthr = -offset; double localthr2 = -offset; // double localthr = 100; // double localthr2 = 100; double fpenalty = (double)penalty; double fpenalty_ex = (double)penalty_ex; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; // free( mseq1 ); // free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); // FreeIntVec( mp ); // FreeCharMtx( mseq ); if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; } return( 0.0 ); } // if( orlgth1 == 0 ) // { // mseq1 = AllocateCharMtx( njob, 0 ); // mseq2 = AllocateCharMtx( njob, 0 ); // } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); // FreeIntVec( mp ); // FreeCharMtx( mseq ); // FreeFloatMtx( doublework ); // FreeIntMtx( intwork ); if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); // mp = AllocateIntVec( ll2+2 ); // mseq = AllocateCharMtx( njob, ll1+ll2 ); // doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); // intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif // amino_dynamicmtx = AllocateDoubleMtx( 0x80, 0x80 ); amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 ); // 2017/Nov. constants.c no 'charsize' wo global hensuu nishita houga yoi? orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) // { // int ll1, ll2; // // if( commonAlloc1 && commonAlloc2 ) // { // FreeIntMtx( commonIP ); // } // // ll1 = MAX( orlgth1, commonAlloc1 ); // ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG // fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif // commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG // fprintf( stderr, "succeeded\n\n" ); #endif // commonAlloc1 = ll1; // commonAlloc2 = ll2; // } // ijp = commonIP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; // *ijppt = -( j - mpi ); } if( *prept > mi ) { mi = *prept; // mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt+fpenalty) > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } if( *prept > *mjpt ) { *mjpt = *prept; // *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif if( maxwm < wm ) { maxwm = wm; // endali = i; // endalj = j; } #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); // *ijppt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt++ += wm; // ijppt++; mjpt++; prept++; // mpjpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } #if 0 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif #if 0 // IRUKAMO!!!! if( ijp[endali][endalj] == localstop ) { strcpy( seq1[0], "" ); strcpy( seq2[0], "" ); *off1pt = *off2pt = 0; fprintf( stderr, "maxwm <- 0.0 \n" ); return( 0.0 ); } #else if( maxwm < localthr ) { fprintf( stderr, "maxwm <- 0.0 \n" ); return( 0.0 ); } #endif // Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj ); // resultlen = strlen( mseq1[0] ); // if( alloclen < resultlen || resultlen > N ) // { // fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); // ErrorExit( "LENGTH OVER!\n" ); // } // strcpy( seq1[0], mseq1[0] ); // strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "wm=%f\n", wm ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, " wm = %f\n", wm ); #endif return( maxwm ); } mafft-7.505-without-extensions/core/makemergetable.rb0000644000175000017500000000115114224501721022271 0ustar nileshnilesh#!/bin/env ruby seedoffset = 0 #require 'getopts' #if getopts( "s:" ) == nil || ARGV.length == 0 || $OPT_h then # puts "Usage: #{$0} [-s number_of_seeds] input_files" # exit #end # #if $OPT_s # seedoffset = $OPT_s.to_i #end require 'optparse' opt = OptionParser.new OPTS = {} opt.on('-s VAL') {|v| OPTS[:s] = v} opt.parse!(ARGV) seedoffset = OPTS[:s].to_i files = ARGV num = seedoffset + 1 for file in files output = "" STDERR.puts file fp = File.open( file, "r" ) while line = fp.gets if line =~ /^>/ then output += " " + num.to_s num += 1 end end fp.close puts output + " # " + file end mafft-7.505-without-extensions/core/mingw64mingw32dll0000644000175000017500000000073114224501721022123 0ustar nileshnilesh#!/usr/bin/env bash export PATH=/home/mingw32/mingw32/bin:$PATH export C_INCLUDE_PATH=/home/mingw32/mingw32/include export LIBRARY_PATH=/home/mingw32/mingw32/lib make clean make ENABLE_MULTITHREAD="" dlls rm -rf dll32 mkdir dll32 mv *.dll dll32/ export PATH=/home/mingw64/mingw64/bin:$PATH export C_INCLUDE_PATH=/home/mingw64/mingw64/include export LIBRARY_PATH=/home/mingw64/mingw64/lib make clean make ENABLE_MULTITHREAD="" dlls rm -rf dll64 mkdir dll64 mv *.dll dll64/ mafft-7.505-without-extensions/core/dash_client.go0000644000175000017500000014247514224501721021622 0ustar nileshnileshpackage main import( "bufio" "bytes" "encoding/json" "flag" "fmt" "io" "io/ioutil" "net/http" "os" "strings" "strconv" "sort" "time" ) var VersionNumber = 1.1 var Debug = false var NumberOfQueryTries = 12 var HTTPClient = &http.Client{} var SequenceLimit = 3000 var AlignmentLimit = 10000 //URL's func DASHDomainAlignmentURL(dash_url string) string { return dash_url + "domain_alignments?format=JSON" } func DASHChainDomainsURL(dash_url string) string { return dash_url + "domains?format=JSON&filter=pdbid=%s" } func DASHChainURL(dash_url string) string { return dash_url + "chains?format=JSON&filter=pdbid=%s" } func DASHChainSearchURL(dash_url string) string { return dash_url + "chain_search_sequence?limit=5" } //Data types type Sequence struct { Label string Sequence string } type DASHInput struct { PDBID string FullID string Hat3Index int Start int End int Sequence string Domains []RESTDomain } type RESTChain struct { StatusCode int StatusMessage string PDBID string DepositionDate string Sequence string Length int } type RESTDomain struct { StatusCode int StatusMessage string DomainID string PDBID string Length int Sequence string Segments string ResidueNumbers string Start int End int SliceStart int SliceEnd int ResidueNumberInts []int } type RESTAlignment struct { StatusCode int StatusMessage string SCORE int ID1 string ID2 string PRIMS1 string SECOS1 string PRIMS2 string SECOS2 string EQUIVALENCE string LOWSIMILARITY bool } type RESTSearch struct { StatusCode int StatusMessage string ID string Start int End int } func(alignment *RESTAlignment) Reverse() { new_alignment := *alignment new_alignment.ID1 = alignment.ID2 new_alignment.ID2 = alignment.ID1 new_alignment.PRIMS1 = alignment.PRIMS2 new_alignment.PRIMS2 = alignment.PRIMS1 new_alignment.SECOS1 = alignment.SECOS2 new_alignment.SECOS2 = alignment.SECOS1 *alignment = new_alignment } //Utility Functions func fatal(object interface{}) { if Debug { panic(fmt.Sprint(object)) } else { fmt.Fprintln(os.Stderr, "----") non_fatal(object) os.Exit(1) } } func non_fatal(object interface{}) { fmt.Fprintln(os.Stderr, fmt.Sprint(object)) } func check(err error) { if err != nil { fatal(err.Error()) } } func http_query(method string, url string, body io.Reader) *http.Response { //A simple request will follow redirects by default! retry_count := NumberOfQueryTries var response *http.Response status_code := 404 for status_code != 200 { if retry_count != NumberOfQueryTries { fmt.Println("Retrying DASH request...") time.Sleep(10*time.Second) } if retry_count == 0 { break } request, err := http.NewRequest(method, url, body) check(err) response, err = HTTPClient.Do(request) if err != nil { status_code = 404 retry_count += -1 continue } status_code = response.StatusCode } if status_code != 200 { error_message := fmt.Sprintf( "Client was unable to connect to DASH server after %d retries.", NumberOfQueryTries) error_message += "\nPlease check https://sysimm.org for information about possible maintenance." error_message += "\nIf there is no scheduled maintenance occuring right now you may submit a bug report by contacting us at https://sysimm.org" fatal(error_message) } return response } func parse_residue_numbers(residue_numbers_string string) []int { starts_and_ends := strings.Split(residue_numbers_string, "; ") residue_numbers := make([]int, 0, 3000) for _, start_and_end := range(starts_and_ends) { start_and_end_split := strings.Split(start_and_end, "-") start, err := strconv.Atoi(start_and_end_split[0]) check(err) end, err := strconv.Atoi(start_and_end_split[1]) check(err) for residue_number := start; residue_number <= end; residue_number++ { residue_numbers = append(residue_numbers, residue_number) } } return residue_numbers } //BLOSUM62 Matrix var BLOSUM62Max = 11.0 type BLOSUMRow map[byte]float64 type BLOSUMMatrix map[byte]BLOSUMRow var BLOSUM62 = BLOSUMMatrix{ 'A':BLOSUMRow{ 'A':4, 'R':-1, 'N':-2, 'D':-2, 'C':0, 'Q':-1, 'E':-1, 'G':0, 'H':-2, 'I':-1, 'L':-1, 'K':-1, 'M':-1, 'F':-2, 'P':-1, 'S':1, 'T':0, 'W':-3, 'Y':-2, 'V':0, 'B':-2, 'Z':-1, }, 'R':BLOSUMRow{ 'A':-1, 'R':5, 'N':0, 'D':-2, 'C':-3, 'Q':1, 'E':0, 'G':-2, 'H':0, 'I':-3, 'L':-2, 'K':2, 'M':-1, 'F':-3, 'P':-2, 'S':-1, 'T':-1, 'W':-3, 'Y':-2, 'V':-3, 'B':-1, 'Z':0, }, 'N':BLOSUMRow{ 'A':-2, 'R':0, 'N':6, 'D':1, 'C':-3, 'Q':0, 'E':0, 'G':0, 'H':1, 'I':-3, 'L':-3, 'K':0, 'M':-2, 'F':-3, 'P':-2, 'S':1, 'T':0, 'W':-4, 'Y':-2, 'V':-3, 'B':3, 'Z':0, }, 'D':BLOSUMRow{ 'A':-2, 'R':-2, 'N':1, 'D':6, 'C':-3, 'Q':0, 'E':2, 'G':-1, 'H':-1, 'I':-3, 'L':-4, 'K':-1, 'M':-3, 'F':-3, 'P':-1, 'S':0, 'T':-1, 'W':-4, 'Y':-3, 'V':-3, 'B':4, 'Z':1, }, 'C':BLOSUMRow{ 'A':0, 'R':-3, 'N':-3, 'D':-3, 'C':9, 'Q':-3, 'E':-4, 'G':-3, 'H':-3, 'I':-1, 'L':-1, 'K':-3, 'M':-1, 'F':-2, 'P':-3, 'S':-1, 'T':-1, 'W':-2, 'Y':-2, 'V':-1, 'B':-3, 'Z':-3, }, 'Q':BLOSUMRow{ 'A':-1, 'R':1, 'N':0, 'D':0, 'C':-3, 'Q':5, 'E':2, 'G':-2, 'H':0, 'I':-3, 'L':-2, 'K':1, 'M':0, 'F':-3, 'P':-1, 'S':0, 'T':-1, 'W':-2, 'Y':-1, 'V':-2, 'B':0, 'Z':3, }, 'E':BLOSUMRow{ 'A':-1, 'R':0, 'N':0, 'D':2, 'C':-4, 'Q':2, 'E':5, 'G':-2, 'H':0, 'I':-3, 'L':-3, 'K':1, 'M':-2, 'F':-3, 'P':-1, 'S':0, 'T':-1, 'W':-3, 'Y':-2, 'V':-2, 'B':1, 'Z':4, }, 'G':BLOSUMRow{ 'A':0, 'R':-2, 'N':0, 'D':-1, 'C':-3, 'Q':-2, 'E':-2, 'G':6, 'H':-2, 'I':-4, 'L':-4, 'K':-2, 'M':-3, 'F':-3, 'P':-2, 'S':0, 'T':-2, 'W':-2, 'Y':-3, 'V':-3, 'B':-1, 'Z':-2, }, 'H':BLOSUMRow{ 'A':-2, 'R':0, 'N':1, 'D':-1, 'C':-3, 'Q':0, 'E':0, 'G':-2, 'H':8, 'I':-3, 'L':-3, 'K':-1, 'M':-2, 'F':-1, 'P':-2, 'S':-1, 'T':-2, 'W':-2, 'Y':2, 'V':-3, 'B':0, 'Z':0, }, 'I':BLOSUMRow{ 'A':-1, 'R':-3, 'N':-3, 'D':-3, 'C':-1, 'Q':-3, 'E':-3, 'G':-4, 'H':-3, 'I':4, 'L':2, 'K':-3, 'M':1, 'F':0, 'P':-3, 'S':-2, 'T':-1, 'W':-3, 'Y':-1, 'V':3, 'B':-3, 'Z':-3, }, 'L':BLOSUMRow{ 'A':-1, 'R':-2, 'N':-3, 'D':-4, 'C':-1, 'Q':-2, 'E':-3, 'G':-4, 'H':-3, 'I':2, 'L':4, 'K':-2, 'M':2, 'F':0, 'P':-3, 'S':-2, 'T':-1, 'W':-2, 'Y':-1, 'V':1, 'B':-4, 'Z':-3, }, 'K':BLOSUMRow{ 'A':-1, 'R':2, 'N':0, 'D':-1, 'C':-3, 'Q':1, 'E':1, 'G':-2, 'H':-1, 'I':-3, 'L':-2, 'K':5, 'M':-1, 'F':-3, 'P':-1, 'S':0, 'T':-1, 'W':-3, 'Y':-2, 'V':-2, 'B':0, 'Z':1, }, 'M':BLOSUMRow{ 'A':-1, 'R':-1, 'N':-2, 'D':-3, 'C':-1, 'Q':0, 'E':-2, 'G':-3, 'H':-2, 'I':1, 'L':2, 'K':-1, 'M':5, 'F':0, 'P':-2, 'S':-1, 'T':-1, 'W':-1, 'Y':-1, 'V':1, 'B':-3, 'Z':-1, }, 'F':BLOSUMRow{ 'A':-2, 'R':-3, 'N':-3, 'D':-3, 'C':-2, 'Q':-3, 'E':-3, 'G':-3, 'H':-1, 'I':0, 'L':0, 'K':-3, 'M':0, 'F':6, 'P':-4, 'S':-2, 'T':-2, 'W':1, 'Y':3, 'V':-1, 'B':-3, 'Z':-3, }, 'P':BLOSUMRow{ 'A':-1, 'R':-2, 'N':-2, 'D':-1, 'C':-3, 'Q':-1, 'E':-1, 'G':-2, 'H':-2, 'I':-3, 'L':-3, 'K':-1, 'M':-2, 'F':-4, 'P':7, 'S':-1, 'T':-1, 'W':-4, 'Y':-3, 'V':-2, 'B':-2, 'Z':-1, }, 'S':BLOSUMRow{ 'A':1, 'R':-1, 'N':1, 'D':0, 'C':-1, 'Q':0, 'E':0, 'G':0, 'H':-1, 'I':-2, 'L':-2, 'K':0, 'M':-1, 'F':-2, 'P':-1, 'S':4, 'T':1, 'W':-3, 'Y':-2, 'V':-2, 'B':0, 'Z':0, }, 'T':BLOSUMRow{ 'A':0, 'R':-1, 'N':0, 'D':-1, 'C':-1, 'Q':-1, 'E':-1, 'G':-2, 'H':-2, 'I':-1, 'L':-1, 'K':-1, 'M':-1, 'F':-2, 'P':-1, 'S':1, 'T':5, 'W':-2, 'Y':-2, 'V':0, 'B':-1, 'Z':-1, }, 'W':BLOSUMRow{ 'A':-3, 'R':-3, 'N':-4, 'D':-4, 'C':-2, 'Q':-2, 'E':-3, 'G':-2, 'H':-2, 'I':-3, 'L':-2, 'K':-3, 'M':-1, 'F':1, 'P':-4, 'S':-3, 'T':-2, 'W':11, 'Y':2, 'V':-3, 'B':-4, 'Z':-3, }, 'Y':BLOSUMRow{ 'A':-2, 'R':-2, 'N':-2, 'D':-3, 'C':-2, 'Q':-1, 'E':-2, 'G':-3, 'H':2, 'I':-1, 'L':-1, 'K':-2, 'M':-1, 'F':3, 'P':-3, 'S':-2, 'T':-2, 'W':2, 'Y':7, 'V':-1, 'B':-3, 'Z':-2, }, 'V':BLOSUMRow{ 'A':0, 'R':-3, 'N':-3, 'D':-3, 'C':-1, 'Q':-2, 'E':-2, 'G':-3, 'H':-3, 'I':3, 'L':1, 'K':-2, 'M':1, 'F':-1, 'P':-2, 'S':-2, 'T':0, 'W':-3, 'Y':-1, 'V':4, 'B':-3, 'Z':-2, }, 'B':BLOSUMRow{ 'A':-2, 'R':-1, 'N':3, 'D':4, 'C':-3, 'Q':0, 'E':1, 'G':-1, 'H':0, 'I':-3, 'L':-4, 'K':0, 'M':-3, 'F':-3, 'P':-2, 'S':0, 'T':-1, 'W':-4, 'Y':-3, 'V':-3, 'B':4, 'Z':1, }, 'Z':BLOSUMRow{ 'A':-1, 'R':0, 'N':0, 'D':1, 'C':-3, 'Q':3, 'E':4, 'G':-2, 'H':0, 'I':-3, 'L':-3, 'K':1, 'M':-1, 'F':-3, 'P':-1, 'S':0, 'T':-1, 'W':-3, 'Y':-2, 'V':-2, 'B':1, 'Z':4, }, } //Realign type FloatRow []float64 type FloatMatrix []FloatRow type IntRow []int type IntMatrix []IntRow func InitializeFloatMatrix(size_a int, size_b int) *FloatMatrix { float_matrix := make(FloatMatrix, size_a) for i, _ := range(float_matrix) { float_matrix[i] = make(FloatRow, size_b) } return &float_matrix } func InitializeIntMatrix(size_a int, size_b int) *IntMatrix { int_matrix := make(IntMatrix, size_a) for i, _ := range(int_matrix) { int_matrix[i] = make(IntRow, size_b) } return &int_matrix } func AlignMatrix(equivalence_matrix *FloatMatrix) (FloatMatrix, int, float64) { number_of_residues_a := len(*equivalence_matrix) number_of_residues_b := len((*equivalence_matrix)[0]) //Constants bog := 0.0 beg := 0.0 iog := 5.0 ieg := 1.0 //Initialize matrices D := InitializeFloatMatrix(number_of_residues_a+1, number_of_residues_b+1) E := InitializeFloatMatrix(number_of_residues_a+1, number_of_residues_b+1) F := InitializeFloatMatrix(number_of_residues_a+1, number_of_residues_b+1) P1 := InitializeIntMatrix(number_of_residues_a+1, number_of_residues_b+1) P2 := InitializeIntMatrix(number_of_residues_a+1, number_of_residues_b+1) pE := InitializeIntMatrix(number_of_residues_a+1, number_of_residues_b+1) pF := InitializeIntMatrix(number_of_residues_a+1, number_of_residues_b+1) //Fill matrices for i := 0; i <= number_of_residues_a; i++ { if i != number_of_residues_a { (*D)[i][number_of_residues_b] = -1 * (bog + float64(number_of_residues_a - 1 - i) * beg) (*P1)[i][number_of_residues_b] = number_of_residues_a (*P2)[i][number_of_residues_b] = number_of_residues_a } (*E)[i][number_of_residues_b] = -2000.0 (*F)[i][number_of_residues_b] = -2000.0 (*pE)[i][number_of_residues_b] = number_of_residues_a (*pF)[i][number_of_residues_b] = number_of_residues_a } for i := 0; i <= number_of_residues_b; i++ { if i != number_of_residues_b { (*D)[number_of_residues_a][i] = -1 * (bog + float64(number_of_residues_b - 1 - i) * beg) (*P1)[number_of_residues_a][i] = number_of_residues_b (*P2)[number_of_residues_a][i] = number_of_residues_b } (*E)[number_of_residues_a][i] = -2000.0 (*F)[number_of_residues_a][i] = -2000.0 (*pE)[number_of_residues_a][i] = number_of_residues_b (*pF)[number_of_residues_a][i] = number_of_residues_b } for i := 0; i < number_of_residues_a; i++ { for j := 0; j < number_of_residues_b; j++ { similarity := (*equivalence_matrix)[i][j] (*D)[i][j] = similarity } } //Solv_Rec for i := number_of_residues_a - 1; i >= 0; i-- { gp1o := iog gp1e := ieg if i == 0 { gp1o = bog gp1e = beg } for j := number_of_residues_b - 1; j >= 0; j-- { gp2o := iog gp2e := ieg if j == 0 { gp2o = bog gp2e = beg } //Determine E d1 := (*E)[i+1][j] - gp2e d2 := (*D)[i+1][j] - gp2o if d1 > d2 { (*E)[i][j] = d1 (*pE)[i][j] = (*pE)[i+1][j] } else { (*E)[i][j] = d2 (*pE)[i][j] = i+1 } //Determine F d1 = (*F)[i][j+1] - gp1e d2 = (*D)[i][j+1] - gp1o if d1 > d2 { (*F)[i][j] = d1; (*pF)[i][j] = (*pF)[i][j+1] } else { (*F)[i][j] = d2; (*pF)[i][j] = j+1 } //Determine D Mx := 0.0 if (*E)[i][j] > (*F)[i][j] { Mx = (*E)[i][j] (*P1)[i][j] = (*pE)[i][j] (*P2)[i][j] = j } else { Mx = (*F)[i][j] (*P1)[i][j] = i (*P2)[i][j] = (*pF)[i][j] } d1 = (*D)[i][j] + (*D)[i+1][j+1] if d1 >= Mx { (*D)[i][j] = d1 (*P1)[i][j] = i+1 (*P2)[i][j] = j+1 } else { (*D)[i][j] = Mx } } } //MxSc := D //Bck_Trk Dal := make([]IntRow, 2) Dal[0] = make(IntRow, 30000) Dal[1] = make(IntRow, 30000) i := 0 j := 0 Alen := 0 for i < number_of_residues_a && j < number_of_residues_b { if i == (*P1)[i][j] && j != (*P2)[i][j] { for k := j; k < (*P2)[i][j]; k++ { Dal[0][Alen] = -10 Dal[1][Alen] = k Alen += 1 } } else if i != (*P1)[i][j] && j == (*P2)[i][j] { for k := i; k < (*P1)[i][j]; k++ { Dal[0][Alen] = k Dal[1][Alen] = -10 Alen += 1 } } else if (*P1)[i][j] == i+1 && (*P2)[i][j] == j+1 { Dal[0][Alen] = i Dal[1][Alen] = j Alen += 1 } l := i i = (*P1)[i][j] j = (*P2)[l][j] } if i == number_of_residues_a && j < number_of_residues_b { for k := j; k < number_of_residues_b; k++ { Dal[0][Alen] = -10 Dal[1][Alen] = k Alen += 1 } } else if i < number_of_residues_a && j == number_of_residues_b { for k := i; k < number_of_residues_a; k++ { Dal[0][Alen] = k Dal[1][Alen] = -10 Alen += 1 } } Ial := make([]IntRow, 2) for i := 0; i < 2; i++ { Ial[i] = make(IntRow, Alen) for j := 0; j < Alen; j++ { Ial[i][j] = Dal[i][j] } } //Format results //NOTE: This has been modified so that iner and map12 now are stored in floats! nalign := 0 sim_tot := 0.0 iner := make(FloatRow, number_of_residues_a+number_of_residues_b) map12 := make(FloatMatrix, 0, number_of_residues_a+number_of_residues_b) for j := 0; j < Alen; j++ { k1 := Ial[0][j] k2 := Ial[1][j] iner[j] = 0 if k1 >= 0 && k2 >= 0 { iner[j] = (*equivalence_matrix)[k1][k2] if iner[j] > 0.0 { nalign += 1 sim_tot += iner[j] } //fmt.Printf("REALGN %d %d %d\n", k1, k2, iner[j]) map12 = append(map12, FloatRow{float64(k1), float64(k2), iner[j]}) } else if k1 >= 0 { //fmt.Printf("REALGN %d - 0\n", k1) map12 = append(map12, FloatRow{float64(k1), -1, -1}) } else if k2 >= 0 { //fmt.Printf("REALGN - %d 0\n", k2) map12 = append(map12, FloatRow{-1, float64(k2), -1}) } } return map12, nalign, sim_tot } //FASTA type FASTASequence struct { Label string Sequence string } func NewScannerLarge(file_path string) (*os.File, *bufio.Scanner) { file, err := os.Open(file_path) check(err) scanner := bufio.NewScanner(file) buffer_size := 10*1024*1024 //10 MB buffer scanner_buffer := make([]byte, 0, buffer_size) //10 MB buffer scanner.Buffer(scanner_buffer, buffer_size) return file, scanner } func ParseFASTA(path string) []FASTASequence { sequences := make([]FASTASequence, 0, 10000) label := "" buffer := bytes.Buffer{} file, scanner := NewScannerLarge(path) defer file.Close() //Parse sequences delimited by new sequences for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" { continue } if line[0] == '>' { sequence := buffer.String() if sequence != "" { sequences = append(sequences, FASTASequence{label, sequence}) } buffer.Reset() label = line[1:] } else { buffer.WriteString(line) } } //Parse final sequence sequence := buffer.String() if sequence != "" { sequences = append(sequences, FASTASequence{label, sequence}) } return sequences } //Get domains for chain func get_chain_domains(dash_url string, dash_input DASHInput) []RESTDomain { unique_domains := make(map[string]RESTDomain) response := http_query("GET", fmt.Sprintf(DASHChainDomainsURL(dash_url), dash_input.PDBID), nil) defer func() { io.Copy(ioutil.Discard, response.Body) response.Body.Close() response.Close = true }() scanner := bufio.NewScanner(response.Body) for scanner.Scan() { json_bytes := scanner.Bytes() var domain RESTDomain err := json.Unmarshal(json_bytes, &domain) check(err) if domain.StatusCode != -1 { fatal(domain) } domain.ResidueNumberInts = parse_residue_numbers(domain.ResidueNumbers) for _, residue_number := range(domain.ResidueNumberInts) { if residue_number >= dash_input.Start && residue_number <= dash_input.End { _, exist := unique_domains[domain.DomainID] if !exist { unique_domains[domain.DomainID] = domain } } } } domains := make([]RESTDomain, 0, len(unique_domains)) for _, domain := range(unique_domains) { domains = append(domains, domain) } return domains } //Get chain/domain and self-alignments func get_chain(dash_url string, pdb_id string) RESTChain { var chain RESTChain response := http_query("GET", fmt.Sprintf(DASHChainURL(dash_url), pdb_id), nil) defer func() { io.Copy(ioutil.Discard, response.Body) response.Body.Close() response.Close = true }() json_bytes, err := ioutil.ReadAll(response.Body) err = json.Unmarshal(json_bytes, &chain) check(err) if chain.StatusCode != -1 { fatal(chain) } return chain } func get_chain_self_alignment(dash_url string, pdb_id string) RESTAlignment { chain := get_chain(dash_url, pdb_id) var alignment RESTAlignment alignment.ID1 = chain.PDBID alignment.ID2 = chain.PDBID alignment.PRIMS1 = replace_non_standard_residues(chain.Sequence) alignment.PRIMS2 = alignment.PRIMS1 alignment.SECOS1 = strings.Repeat(" ", len(chain.Sequence)) alignment.SECOS2 = alignment.SECOS1 alignment.EQUIVALENCE = strings.Repeat("9", len(chain.Sequence)) return alignment } func parse_domain_alignment(json_string string) RESTAlignment { var alignment RESTAlignment err := json.Unmarshal([]byte(json_string), &alignment) check(err) if alignment.StatusCode == -1 { alignment.PRIMS1 = replace_non_standard_residues(alignment.PRIMS1) alignment.PRIMS2 = replace_non_standard_residues(alignment.PRIMS2) } else if alignment.StatusCode == 17 { alignment.LOWSIMILARITY = true } else { non_fatal(alignment) } return alignment } func dummy_prims(prims string, start int, end int) (string, int, int) { //Create new dummy of prims with all gaps new_prims_bytes := make([]byte, len(prims)) for i, _ := range(new_prims_bytes) { new_prims_bytes[i] = '-' } //Fill in only residues between start and end, save indices to slice later start_index := -1 end_index := -1 count := 0 for i := 0; i < len(prims); i++ { if count > end { break } if prims[i] != '-' { count += 1 } if prims[i] != '-' && count >= start && count <= end { new_prims_bytes[i] = prims[i] } if count == start && start_index == -1 { start_index = i } if count == end && end_index == -1 { end_index = i } } return string(new_prims_bytes), start_index, end_index+1 } func slice_alignment(alignment RESTAlignment, query_start int, query_end int, subject_start int, subject_end int) RESTAlignment { new_query_prims, start_index, end_index := dummy_prims(alignment.PRIMS1, query_start, query_end) new_subject_prims, subject_start_index, subject_end_index := dummy_prims(alignment.PRIMS2, subject_start, subject_end) alignment.PRIMS1 = new_query_prims alignment.PRIMS2 = new_subject_prims if subject_start_index < start_index { start_index = subject_start_index } if subject_end_index > end_index { end_index = subject_end_index } alignment.PRIMS1 = alignment.PRIMS1[start_index:end_index] alignment.PRIMS2 = alignment.PRIMS2[start_index:end_index] alignment.SECOS1 = alignment.SECOS1[start_index:end_index] alignment.SECOS2 = alignment.SECOS2[start_index:end_index] alignment.EQUIVALENCE = alignment.EQUIVALENCE[start_index:end_index] new_equivalence_bytes := make([]byte, len(alignment.EQUIVALENCE)) for i := 0; i < len(alignment.PRIMS1); i++ { if alignment.PRIMS1[i] != '-' && alignment.PRIMS2[i] != '-' { new_equivalence_bytes[i] = alignment.EQUIVALENCE[i] } else { new_equivalence_bytes[i] = '0' } } alignment.EQUIVALENCE = string(new_equivalence_bytes) new_score := 0 for _, equivalence_byte := range(new_equivalence_bytes) { equivalence, err := strconv.Atoi(string(equivalence_byte)) check(err) new_score += equivalence } alignment.SCORE = new_score return alignment } //Output formatting functions func format_alignment_legacy(alignment RESTAlignment) string { lines := make([]string, 0, 10) if alignment.LOWSIMILARITY { lines = append(lines, fmt.Sprintf("Query %s Template %s lowsimilarity", alignment.ID1, alignment.ID2)) } else { lines = append(lines, fmt.Sprintf("Query %s Template %s", alignment.ID1, alignment.ID2)) } lines = append(lines, fmt.Sprintf("QUERY %s", alignment.PRIMS1)) lines = append(lines, fmt.Sprintf("QUERY %s", alignment.SECOS1)) lines = append(lines, fmt.Sprintf("TEMPL %s", alignment.PRIMS2)) lines = append(lines, fmt.Sprintf("TEMPL %s", alignment.SECOS2)) lines = append(lines, fmt.Sprintf("Equivalence %s", alignment.EQUIVALENCE)) lines = append(lines, "") return strings.Join(lines, "\n") } func average_over_window(equivalences *[]float64, index int, lookaround int) float64 { if lookaround == 0 { return (*equivalences)[index] } start := index - lookaround if start < 0 { start = 0 } end := index + lookaround + 1 if end > len(*equivalences) { end = len(*equivalences) } total := 0.0 count := 0.0 for i := start; i < end; i++ { equivalence := (*equivalences)[i] if equivalence > 0 { total += equivalence count += 1.0 } } return total/count } func output_alignment_hat3(hat3_file *os.File, query_index int, template_index int, query string, template string, equivalence_string string, equivalence_threshold float64, equivalence_scale float64, minimum_segment_length int, equivalence_lookaround int) { equivalences := make([]float64, len(query)) equivalence_mask := make([]bool, len(query)) //Parse equivalence, enforce threshold, rescale equivalence, multiply by scale for i := 0; i < len(query); i++ { equivalence_int, err := strconv.Atoi(string(equivalence_string[i])) check(err) equivalence := float64(equivalence_int) if equivalence >= equivalence_threshold { equivalence := (equivalence - equivalence_threshold+1) / (9 - equivalence_threshold+1) * 9.0 equivalence = equivalence * equivalence_scale equivalences[i] = equivalence } } //Make a mask based on minimum segment length last_zero_index := -1 last_index := len(equivalences) - 1 for x, equivalence := range(equivalences) { //Case where the end of a segment is a 0 equivalence if equivalence <= 0 { var start_index int var length int if last_zero_index == -1 { //If there is no last zero index length = x start_index = 0 } else { //If there is length = x - last_zero_index - 1 start_index = last_zero_index + 1 } //Fill mask if long enough if length >= minimum_segment_length { for start_index < x { equivalence_mask[start_index] = true start_index += 1 } } //Set this as the previous index with a zero last_zero_index = x //Special case for when the alignment ends without a zero equivalence } else if x == last_index { var start_index int var length int if last_zero_index == -1 { //If there is no last zero index then the length is the whole thing length = len(equivalences) start_index = 0 } else { //If there is length = x - last_zero_index start_index = last_zero_index + 1 } //Fill mask if long enough if length >= minimum_segment_length { for start_index <= x { equivalence_mask[start_index] = true start_index += 1 } } } } //Compute final equivalence and construct line lines := make([]string, 0, len(query)) query_i := -1 template_i := -1 for i := 0; i < len(query); i++ { if query[i] != '-' { query_i += 1 } if template[i] != '-' { template_i += 1 } if equivalence_mask[i] { windowed_equivalence := average_over_window(&equivalences, i, equivalence_lookaround) lines = append(lines, fmt.Sprintf("%d %d 1 %0.5f %d %d %d %d k", query_index, template_index, windowed_equivalence, query_i, query_i, template_i, template_i)) } } if len(lines) > 0 { fmt.Fprintln(hat3_file, strings.Join(lines, "\n")) } } func replace_non_standard_residues(sequence string) string { sequence = strings.ToUpper(sequence) sequence = strings.Replace(sequence, "U", "X", -1) sequence = strings.Replace(sequence, "J", "X", -1) sequence = strings.Replace(sequence, "O", "X", -1) return sequence } func filter_sequences_and_hat3(sequence_file_path string, hat3_path string, minimum_alignment_percent float64) { //Read Sequence Data and Save Indexes sequence_data := make([]Sequence, 0, 10000) sequence_file, err := os.Open(sequence_file_path) check(err) scanner := bufio.NewScanner(sequence_file) for scanner.Scan() { id := strings.TrimSpace(scanner.Text())[1:] scanner.Scan() sequence := strings.TrimSpace(scanner.Text()) sequence_data = append(sequence_data, Sequence{id, sequence}) } sequence_file.Close() //Read hat3 file to see which ID's were used hat3_file, err := os.Open(hat3_path) check(err) scanner = bufio.NewScanner(hat3_file) old_hat3_id_map := make(map[int][]int) previous_query_index := -1 previous_subject_index := -1 for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" { continue } fields := strings.Fields(line) query_index, err := strconv.Atoi(fields[0]) check(err) subject_index, err := strconv.Atoi(fields[1]) check(err) if query_index != previous_query_index || subject_index != previous_subject_index { old_hat3_id_map[query_index] = append(old_hat3_id_map[query_index], subject_index) old_hat3_id_map[subject_index] = append(old_hat3_id_map[subject_index], query_index) previous_query_index = query_index previous_subject_index = subject_index } } hat3_file.Close() //Append alignment counts to ID's for query_index, subject_indexes := range(old_hat3_id_map) { sequence_data[query_index].Label += fmt.Sprintf("||%d", len(subject_indexes)) } //Filter least-used sequences from hat3 alignment_count_cutoff := int(float64(len(old_hat3_id_map))*minimum_alignment_percent/100.0) filtered_old_hat3_index_map := make(map[int]bool) for query_index, subject_indexes := range(old_hat3_id_map) { if len(subject_indexes) >= alignment_count_cutoff { filtered_old_hat3_index_map[query_index] = true for _, subject_index := range(subject_indexes) { if len(old_hat3_id_map[subject_index]) >= alignment_count_cutoff { filtered_old_hat3_index_map[subject_index] = true } } } } //Make map from new to old indexes existing_hat3_indexes := make([]int, 0, len(old_hat3_id_map)) for i, _ := range(filtered_old_hat3_index_map) { existing_hat3_indexes = append(existing_hat3_indexes, i) } sort.Ints(existing_hat3_indexes) hat3_id_map_old_new := make(map[int]int) for new_index, old_index := range(existing_hat3_indexes) { hat3_id_map_old_new[old_index] = new_index } //Write new hat3 file new_hat3_path := fmt.Sprintf("%s_cleaned", hat3_path) new_hat3_file, err := os.Create(new_hat3_path) check(err) hat3_file, err = os.Open(hat3_path) check(err) scanner = bufio.NewScanner(hat3_file) for scanner.Scan() { fields := strings.Fields(strings.TrimSpace(scanner.Text())) query_index, err := strconv.Atoi(fields[0]) check(err) subject_index, err := strconv.Atoi(fields[1]) check(err) new_query_index, new_query_index_exist := hat3_id_map_old_new[query_index] new_subject_index, new_subject_index_exist := hat3_id_map_old_new[subject_index] if new_query_index_exist && new_subject_index_exist { fields[0] = fmt.Sprintf("%d", new_query_index) fields[1] = fmt.Sprintf("%d", new_subject_index) fmt.Fprintln(new_hat3_file, strings.Join(fields, " ")) } } hat3_file.Close() new_hat3_file.Close() //Write Cleaned Sequences new_sequence_file, err := os.Create(sequence_file_path) check(err) defer new_sequence_file.Close() for _, old_index := range(existing_hat3_indexes) { sequence := sequence_data[old_index] sequence_lines := fmt.Sprintf(">%s\n%s", sequence.Label, sequence.Sequence) fmt.Fprintln(new_sequence_file, sequence_lines) } //Move new hat3 to old hat3 err = os.Rename(new_hat3_path, hat3_path) check(err) } func main() { fmt.Println("------------------") fmt.Println("MAFFT-DASH Client v", VersionNumber) fmt.Println("------------------") //Parse flags var help bool var input_path string var hat3_output_path string var alignments_output_path string var sequences_output_path string var slice bool var filter float64 var equivalence_threshold float64 var equivalence_scale float64 var blosum_alpha float64 var structure_only bool var minimum_segment_length int var equivalence_lookaround int var template_list_path string var dash_url string flag.BoolVar(&help, "help", false, "Display this help message.") flag.StringVar(&input_path, "i", "", fmt.Sprintf("Path to FASTA sequence file. (REQUIRED!) (Limit of %d sequences)", SequenceLimit)) flag.StringVar(&dash_url, "url", "https://sysimm.org/dash/REST1.0/", "URL for DASH REST service.") flag.StringVar(&template_list_path, "templates", "", "Path to explicit list of DASH templates (Debug-only).") flag.BoolVar(&slice, "slice", true, "Slice alignments/sequences according to start/end positions.") flag.Float64Var(&filter, "filter", 22.5, "Filter sequences/hat3 where sequence is aligned to less than this percentage of the inputs.") flag.Float64Var(&equivalence_threshold, "threshold", 1.0, "Only use equivalence values >= this value for hat3.") flag.Float64Var(&equivalence_scale, "scale", 1.0, "Multiply equivalence values by this value before outputting to hat3.") flag.IntVar(&minimum_segment_length, "length", 5, "Only use equivalences when the number of consecutive residues >= the threshold are >= this length.") flag.IntVar(&equivalence_lookaround, "lookaround", 0, "Output the average of this many surrounding values instead of raw equivalence. (default 0)") flag.StringVar(&hat3_output_path, "hat3", "./hat3", "Output path for hat3 file.") flag.StringVar(&alignments_output_path, "alignments", "./dash_alignments", "Output path for raw alignments for debug purposes.") flag.StringVar(&sequences_output_path, "sequences", "./dash_sequences.fa", "Output path for template sequences for debug purposes.") flag.Float64Var(&blosum_alpha, "alpha", 0.75, "Background sequence BLOSUM multiplier.") flag.BoolVar(&structure_only, "structure-only", false, "Output alignments with only residues which exist in the structure. Sets BLOSUM Alpha to 0.0") flag.Parse() if input_path == "" && template_list_path == "" { fmt.Println( "Please submit a FASTA sequence file with -i or a template list with -templates.") fmt.Println("------------------") flag.PrintDefaults() os.Exit(0) } if help { flag.PrintDefaults() os.Exit(0) } if structure_only { blosum_alpha = 0.0 } fmt.Println("Querying from", dash_url) //Parse sequences and query DASH for representatives sequences := []FASTASequence{} if input_path != "" { sequences = ParseFASTA(input_path) if len(sequences) > SequenceLimit { fatal(fmt.Sprintf("Number of sequences greater than sequence limit of %d.", SequenceLimit)) } } template_list_buffer := bytes.Buffer{} if template_list_path != "" { //Parse template list into JSON so that it's the same format as what the //server would return fmt.Println("Using local template list from", template_list_path) template_list, err := os.Open(template_list_path) check(err) scanner := bufio.NewScanner(template_list) i := 0 for scanner.Scan() { i += 1 line := strings.TrimSpace(scanner.Text()) if line == "" { continue } split_line := strings.Split(line, "||") if len(split_line) != 3 { fatal(fmt.Sprintf("Line %d contains wrong number of fields.", i)) } id := split_line[0] start, err := strconv.Atoi(split_line[1]) check(err) end, err := strconv.Atoi(split_line[2]) check(err) object := RESTSearch{-1, "", id, start, end} object_bytes, err := json.Marshal(object) check(err) _, err = template_list_buffer.Write(object_bytes) check(err) _, err = template_list_buffer.WriteString("\n") check(err) } template_list.Close() } else if input_path != "" { //Write request body for template selection fmt.Println("Building query for template selection.") request_body := bytes.Buffer{} for i, sequence := range(sequences) { //Use index if labels don't exist label := fmt.Sprintf("%d", i) if sequence.Label != "" { label = sequence.Label } _, err := request_body.WriteString(fmt.Sprintf(">%s\n%s\n", label, sequence.Sequence)) check(err) } //Send query fmt.Println("Sending query for template selection...") response := http_query("POST", DASHChainSearchURL(dash_url), &request_body) _, err := io.Copy(&template_list_buffer, response.Body) check(err) io.Copy(ioutil.Discard, response.Body) response.Body.Close() fmt.Println("Waiting for response from server...") } //Parse results for template selection dash_inputs := make([]DASHInput, 0, 10*len(sequences)) id_map := make(map[string]bool) scanner := bufio.NewScanner(&template_list_buffer) for scanner.Scan() { json_bytes := scanner.Bytes() var result RESTSearch err := json.Unmarshal(json_bytes, &result) check(err) if result.StatusCode != -1 { fatal(result) } var dash_input DASHInput dash_input.FullID = fmt.Sprintf("%s||%d||%d", result.ID, result.Start, result.End) dash_input.PDBID = result.ID dash_input.Start = result.Start dash_input.End = result.End _, exists := id_map[dash_input.FullID] if !exists { dash_inputs = append(dash_inputs, dash_input) id_map[dash_input.FullID] = true } } sort.Slice(dash_inputs, func(i, j int) bool { return dash_inputs[i].FullID < dash_inputs[j].FullID }) //Split chains into domains dash_input_map := make(map[string][]DASHInput) dash_domain_id_map := make([]string, 0, 10000) for i, dash_input := range(dash_inputs) { if i % 25 == 0 { percent := i*100/len(dash_inputs) fmt.Printf("Querying DASH for domains - [%d%%]\n", percent) } dash_input.Hat3Index = i dash_input.Domains = get_chain_domains(dash_url, dash_input) for _, domain := range(dash_input.Domains) { _, exist := dash_input_map[domain.DomainID] if !exist { dash_domain_id_map = append(dash_domain_id_map, domain.DomainID) } dash_input_map[domain.DomainID] = append(dash_input_map[domain.DomainID], dash_input) } dash_inputs[i] = dash_input } fmt.Printf("Querying DASH for domains - [100%%]\n") //Open output files sequences_output_file, err := os.Create(sequences_output_path) check(err) alignments_output_file, err := os.Create(alignments_output_path) check(err) hat3_output_file, err := os.Create(hat3_output_path) check(err) //Get sequences and self-alignments for i, dash_input := range(dash_inputs) { if i % 25 == 0 { percent := i*100/len(dash_inputs) fmt.Printf("Querying DASH for sequences - [%d%%]\n", percent) } raw_alignment := get_chain_self_alignment(dash_url, dash_input.PDBID) alignment := raw_alignment if slice { alignment = slice_alignment(alignment, dash_input.Start, dash_input.End, dash_input.Start, dash_input.End) } dash_inputs[i].Sequence = alignment.PRIMS1 fmt.Fprintln(sequences_output_file, fmt.Sprintf(">DASH_%s\n%s", dash_input.FullID, alignment.PRIMS1)) alignment.ID1 = dash_input.FullID alignment.ID2 = dash_input.FullID fmt.Fprintln(alignments_output_file, format_alignment_legacy(alignment)) } fmt.Printf("Querying DASH for sequences - [100%%]\n") //Batch alignment downloads in groups based on alignment limit alignment_map := make(map[string]RESTAlignment) request_body := bytes.Buffer{} alignment_count := 0 chunk_index := 0 number_of_domains := len(dash_domain_id_map) number_of_domain_alignments := (number_of_domains*(number_of_domains-1)/2) for x := 0; x < len(dash_domain_id_map); x++ { for y := x + 1; y < len(dash_domain_id_map); y++ { _, err := request_body.WriteString(fmt.Sprintf("%s_%s\n", dash_domain_id_map[x], dash_domain_id_map[y])) check(err) alignment_count += 1 if alignment_count >= AlignmentLimit { current_progress := chunk_index*AlignmentLimit current_percent := current_progress*100/number_of_domain_alignments fmt.Printf("Downloading alignments - [%d%%]\n", current_percent) //Submit to server response := http_query("POST", DASHDomainAlignmentURL(dash_url), &request_body) scanner = bufio.NewScanner(response.Body) //Parse results for scanner.Scan() { alignment := parse_domain_alignment(scanner.Text()) if alignment.StatusCode == -1 { alignment_id := alignment.ID1 + "_" + alignment.ID2 alignment_map[alignment_id] = alignment } } //Reset chunk_index += 1 io.Copy(ioutil.Discard, response.Body) response.Body.Close() alignment_count = 0 request_body.Reset() } } } //Get remaining alignments if alignment_count > 0 { current_progress := chunk_index*AlignmentLimit current_percent := current_progress*100/number_of_domain_alignments fmt.Printf("Downloading alignments - [%d%%]\n", current_percent) //Submit to server response := http_query("POST", DASHDomainAlignmentURL(dash_url), &request_body) scanner = bufio.NewScanner(response.Body) //Parse results for scanner.Scan() { alignment := parse_domain_alignment(scanner.Text()) if alignment.StatusCode == -1 { alignment_id := alignment.ID1 + "_" + alignment.ID2 alignment_map[alignment_id] = alignment } } io.Copy(ioutil.Discard, response.Body) response.Body.Close() } fmt.Printf("Downloading alignments - [100%%]\n") //Combine domain alignments into full chain alignments number_of_chains := len(dash_inputs) number_of_chain_alignments := (number_of_chains*(number_of_chains-1)/2) alignment_count = 0 for x := 0; x < len(dash_inputs); x++ { for y := x + 1; y < len(dash_inputs); y++ { if alignment_count % AlignmentLimit == 0 { current_percent := alignment_count*100/number_of_chain_alignments fmt.Printf("Combining domain alignments - [%d%%]\n", current_percent) } alignment_count += 1 dash_input_a := dash_inputs[x] dash_input_b := dash_inputs[y] size_a := len(dash_input_a.Sequence) size_b := len(dash_input_b.Sequence) //Initialize equivalence matrix with BLOSUM equivalence_matrix := InitializeFloatMatrix(size_a, size_b) low_similarity := true for x, query_residue := range dash_input_a.Sequence { for y, template_residue := range dash_input_b.Sequence { blosum_score := float64(BLOSUM62[byte(query_residue)][byte(template_residue)]) if blosum_score > 0 { //Re-scale BLOSUM scores to RASH's 0-9.99 scale blosum_score = blosum_score / float64(BLOSUM62Max) * 9.999 //Multiply by a factor to control the influence (*equivalence_matrix)[x][y] = blosum_score * blosum_alpha } } } for _, domain_a := range(dash_input_a.Domains) { for _, domain_b := range(dash_input_b.Domains) { //Fill matrix with equivalence data for realignment alignment_id := domain_a.DomainID + "_" + domain_b.DomainID alignment, exist := alignment_map[alignment_id] if !exist { alignment_id = domain_b.DomainID + "_" + domain_a.DomainID alignment, exist = alignment_map[alignment_id] if exist { alignment.Reverse() } } if exist { low_similarity = false index_a := -1 index_b := -1 for i := 0; i < len(alignment.PRIMS1); i++ { if alignment.PRIMS1[i] != '-' { index_a += 1 } if alignment.PRIMS2[i] != '-' { index_b += 1 } if alignment.PRIMS1[i] != '-' && alignment.PRIMS2[i] != '-' { matrix_index_a := domain_a.ResidueNumberInts[index_a] - dash_input_a.Start matrix_index_b := domain_b.ResidueNumberInts[index_b] - dash_input_b.Start equivalence, err := strconv.ParseFloat(string(alignment.EQUIVALENCE[i]), 64) check(err) if matrix_index_a >= 0 && matrix_index_a < size_a && matrix_index_b >= 0 && matrix_index_b < size_b { (*equivalence_matrix)[matrix_index_a][matrix_index_b] = equivalence } } } } } } //Construct alignment from realignment aligned_matrix, _, _ := AlignMatrix(equivalence_matrix) PRIMS1_bytes := []byte(strings.Repeat("-", len(aligned_matrix))) PRIMS2_bytes := []byte(strings.Repeat("-", len(aligned_matrix))) EQUIVALENCE_bytes := []byte(strings.Repeat("0", len(aligned_matrix))) for i, row := range(aligned_matrix) { index_a := int(row[0]) index_b := int(row[1]) equivalence := strconv.Itoa(int(row[2])) if index_a != -1 && index_b != -1 { EQUIVALENCE_bytes[i] = equivalence[0] } if index_a != -1 { PRIMS1_bytes[i] = dash_input_a.Sequence[index_a] } if index_b != -1 { PRIMS2_bytes[i] = dash_input_b.Sequence[index_b] } } var alignment RESTAlignment alignment.ID1 = dash_input_a.FullID alignment.ID2 = dash_input_b.FullID alignment.PRIMS1 = string(PRIMS1_bytes) alignment.PRIMS2 = string(PRIMS2_bytes) alignment.SECOS1 = strings.Repeat(" ", len(aligned_matrix)) alignment.SECOS2 = alignment.SECOS1 alignment.EQUIVALENCE = string(EQUIVALENCE_bytes) alignment.LOWSIMILARITY = low_similarity //Output alignment and hat3 fmt.Fprintln(alignments_output_file, format_alignment_legacy(alignment)) if !alignment.LOWSIMILARITY { output_alignment_hat3(hat3_output_file, dash_input_a.Hat3Index, dash_input_b.Hat3Index, alignment.PRIMS1, alignment.PRIMS2, alignment.EQUIVALENCE, equivalence_threshold, equivalence_scale, minimum_segment_length, equivalence_lookaround) } } } fmt.Printf("Combining domain alignments - [100%%]\n") //Filter unused sequences from hat3 and sequences file which are below the threshold fmt.Println("Filtering structural restraint(hat3) file...") sequences_output_file.Close() hat3_output_file.Close() filter_sequences_and_hat3(sequences_output_path, hat3_output_path, filter) //Combine original sequences with DASH sequences fmt.Println("Combining original sequences with DASH sequences...") dash_sequences := ParseFASTA(sequences_output_path) final_sequence_file, err := os.Create(sequences_output_path) check(err) for _, sequence := range(dash_sequences) { fmt.Fprintln(final_sequence_file, fmt.Sprintf(">%s\n%s", sequence.Label, sequence.Sequence)) } for _, sequence := range(sequences) { fmt.Fprintln(final_sequence_file, fmt.Sprintf(">%s\n%s", sequence.Label, sequence.Sequence)) } final_sequence_file.Close() //Final user output fmt.Println("------------------") fmt.Println("Ready to run MAFFT:") fmt.Println(" mafft --seedtable", hat3_output_path, "--localpair", "--maxiterate 100", sequences_output_path) } mafft-7.505-without-extensions/core/nodepair.c0000644000175000017500000002351014224501721020747 0ustar nileshnilesh#include "mltaln.h" #include #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define SHISHAGONYU 0 // for debug // from tbfast static int treein; static int treeout; // from pairlocalalign static int stdout_dist; static void arguments( int argc, char *argv[] ) { int c; nthread = 1; nadd = 0; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; stdout_dist = 0; // dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; penalty_shift_factor = 1000.0; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; specificityconsideration = 0.0; usenaivescoreinsteadofalignmentscore = 0; specifictarget = 0; nwildcard = 0; compacttree = 2; // tsuneni! treein = 0; treeout = 0; fastathreshold = 2.7; constraint = 2; // localhomfile = 0; // tbfast.c no wo tsukaunode comment out // reporterr( "argc=%d\n", argc ); // reporterr( "*argv=%s\n", *argv ); // reporterr( "(*argv)[0]=%c\n", (*argv)[0] ); while( --argc > 0 && (*++argv)[0] == '-' ) { // reporterr( "(*argv)[0] in while loop = %s\n", (*argv) ); while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; // fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; // fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; #if 0 case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #else #endif case 'C': nthread = myatoi( *++argv ); if( nthread == 0 ) nthread = 1; // fprintf( stderr, "nthread = %d\n", nthread ); --argc; #ifndef enablemultithread nthread = 1; #endif goto nextoption; case 'I': nadd = myatoi( *++argv ); // fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'u': specificityconsideration = (double)myatof( *++argv ); // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; case 'c': stdout_dist = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; #if 0 case 'r': fmodel = -1; break; #endif case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; #endif #if 0 case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; case 'a': alg = 'a'; break; case 'S': alg = 'S'; break; #endif case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'L': alg = 'L'; break; case 'Z': usenaivescoreinsteadofalignmentscore = 1; break; case 'B': // hitsuyou! memopt -M -B no tame break; #if 0 case 'Y': alg = 'Y'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> L; break; case 's': alg = 's'; break; case 'G': alg = 'G'; break; case 'B': // hitsuyou! memopt -M -B no tame break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'r': alg = 'r'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> R, last break; case 'V': alg = 'V'; break; #endif case 'T': // tbfast.c no noalign ni taiou break; case 'F': use_fft = 1; break; case 'U': treein = 1; break; case 't': treeout = 1; break; case 'y': divpairscore = 1; break; case '=': specifictarget = 1; break; case ':': nwildcard = 1; break; case 'q': lhlimit = myatoi( *++argv ); --argc; goto nextoption; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "pairlocalalign options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static int *nlen = NULL; static int *selfscore = NULL; static char **name = NULL, **seq = NULL; static double *eff = NULL; int i; static int ***topol = NULL; static Treedep *dep = NULL; static double **len = NULL; FILE *infp = NULL; char c; arguments( argc, argv ); if( alg != 'A' && alg != 'L' && alg != 'N' ) { reporterr( "alg %c is not yet supported\n", alg ); exit( 1 ); } if( alg != 'N' && usenaivescoreinsteadofalignmentscore == 1 ) { reporterr( "The combination of usenaivescoreinsteadofalignmentscore and alg %c is not yet supported\n", alg ); exit( 1 ); } if( fastathreshold < 0.0001 ) { constraint = 0; lhlimit = 0; } if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } #if !defined(mingw) && !defined(_MSC_VER) setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou. #endif seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); selfscore = AllocateIntVec( njob ); topol = AllocateIntCub( njob, 2, 0 ); len = AllocateFloatMtx( njob, 2 ); eff = AllocateDoubleVec( njob ); dep = (Treedep *)calloc( njob, sizeof( Treedep ) ); #if 0 readData( infp, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); fclose( infp ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); // WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illegal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); if( treein ) loadtree( njob, topol, len, name, nlen, dep, treeout ); pairalign_node( njob, nlenmax, name, seq, topol, len, dep, treein, treeout ); FreeCharMtx( seq ); seq = NULL; FreeCharMtx( name ); name = NULL; free( nlen ); nlen = NULL; free( selfscore ); selfscore = NULL; for( i=0; i for output # Uses seekquencer_v3 backend # # 4.0 05.12.14 Added new options: -run -trd -noin # Sets -seqa fast in seekquencer.pl # Uses seekquencer_v4 backend # # 4.1 05.19.14 Added a check on running REST requests before proceeding # to avoid server load problems # # 4.2 05.27.14 Seq limit processing done in seekquencer.pl script # to avoid server load problems # # 4.3 07.22.14 Added new option: -seqd # Blast limit changed from factor of 10 to -blim option # Timing on sleep changed; added srand() for making seed # Moved the job limit processing to server side # # 4.4 08.05.14 Modified to work in multiple OS # # #################################################################################### use strict; use Getopt::Long; use File::Path qw(make_path remove_tree); use Cwd; use LWP::Simple; use LWP::UserAgent; # to prevent error: Header line too long (limit is 8192) use LWP::Protocol::http; push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft"; my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG ); my $OUTTYPE = "mafftash"; my $SEQDATABASE = "uniref100"; my $SEQLIMIT = 100; my $SEQBLASTLIMIT = 100; my $RUNMODE = "normal"; # thread|normal my $THREADCOUNT = 3; GetOptions ( 'inp=s' => \$INPUTFILE, 'idf=s' => \$IDLISTFILE, 'seqf=s' => \$SEQFASTAFILE, 'out=s' => \$OUTPUTFILE, 'str' => \$STRFLAG, 'seq' => \$SEQFLAG, 'seqd=s' => \$SEQDATABASE, 'lim=i' => \$SEQLIMIT, 'blim=i' => \$SEQBLASTLIMIT, 'pre' => \$EVALFLAG, 'noin' => \$NOINFLAG, 'mod=s' => \$OUTTYPE, 'run=s' => \$RUNMODE, 'trd=i' => \$THREADCOUNT, ); my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0; print STDERR "[Seekquencer-premafft 4.4 on $^O]\n"; # set temp directory my $CWD = getcwd; my $TMP = "$CWD/seekpremafft$$"; make_path($TMP) unless -d $TMP; ###### # validation help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE ); help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) ); help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) ); help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) ); help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) ); help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE ); help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG ); help("Invalid value for '-seqd '") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot"); help("Invalid value for '-mod '") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" ); help("Invalid value for '-run '") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" ); help("Invalid value for '-trd '; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) ); ###### # check existing requests print STDERR "Checking server status...\n"; # generate seed srand($$); # sleep a bit to give time for lsf response sleep(int(rand(6))+1); my $browser = LWP::UserAgent->new; $browser->timeout(0); # get: check if you can send a new request this time my $jobsResponse = $browser->get("$BASEURL/isAllowed"); if ( $jobsResponse->is_success ) { my $status = parseJobQueryResponse($jobsResponse->content); bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0; } else { bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content))); } ###### # make a temporary input if lists were provided unless ( defined $INPUTFILE ) { $INPUTFILE = "$TMP/input.homemade"; open INPF, ">$INPUTFILE" or bail("Error writing to input file."); if ( defined $IDLISTFILE ) { open IDLIST, "<$IDLISTFILE" or bail("Error reading input file."); while( ) { chomp; if ( /(\w{5})/ ) { print INPF ">PDBID\n$1\n"; } } close IDLIST; } if ( defined $SEQFASTAFILE ) { open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file."); while( ) { chomp; print INPF "$_\n"; } close FASTA; } close INPF; } ###### # prepare parameters print STDERR "Preparing parameters for service request...\n"; my @parameters = (); push(@parameters, "fileinput" => ["$INPUTFILE"]); push(@parameters, "out_type" => $OUTTYPE); push(@parameters, "rest_flag" => "1"); push(@parameters, "cls_flag" => "1"); push(@parameters, "pre_flag" => "1") if defined $EVALFLAG; push(@parameters, "noin_flag" => "1") if defined $NOINFLAG; push(@parameters, "run_mode" => $RUNMODE); push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread"; if ( defined $STRFLAG ) { push(@parameters, "str_flag" => "1"); push(@parameters, "ash_flag" => "1"); } elsif ( defined $SEQFLAG ) { push(@parameters, "seq_flag" => "1"); push(@parameters, "seq_algorithm" => "fast"); push(@parameters, "seq_database" => $SEQDATABASE); push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); push(@parameters, "seq_outputlimit" => $SEQLIMIT); } else { push(@parameters, "str_flag" => "1"); push(@parameters, "ash_flag" => "1"); push(@parameters, "seq_flag" => "1"); push(@parameters, "seq_algorithm" => "fast"); push(@parameters, "seq_database" => $SEQDATABASE); push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); push(@parameters, "seq_outputlimit" => $SEQLIMIT); } ###### # start rest service print STDERR "Sending service request...\n"; # post: running a mafftash job my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' ); bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success); # get response from post request my ($status, $seekid) = parseResponse($postResponse->content); my $MAXTRIES = 3; my $STIMER = 5; my $timer = 0; print STDERR "Request sent! Waiting for response...[$seekid]\n"; my $checklist = {}; # wait for results until it becomes available while(1) { # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,, $timer = $timer >= 60 ? 60 : $timer+$STIMER; sleep($timer+int(rand(4))); # get: get results for mafftash job my $getResponse = $browser->get("$BASEURL/$seekid"); if ( $getResponse->is_success ) { # get response from get request ($status, $seekid) = parseResponse($getResponse->content); next unless ( $status eq "done" ); # if job is finished and ready print STDERR "Results found!\n"; my $csfile = "$TMP/checksum"; my $try1 = 1; while(1) { print STDERR "Fetching Results... [Trial $try1]\n"; if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile ) { # get response from get request $checklist = extractchecksum($csfile); bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); foreach my $id ( sort keys %$checklist ) { sleep 1; my $checkfile = "$TMP/$id"; my $checkid = $checklist->{$id}; my $try2 = 1; while(1) { unlink $checkfile if -e $checkfile; if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) { last if $ISWINDOWS; my $hashid = getchecksum($checkfile); #print STDERR "[hashid]$hashid [checkid]$checkid\n"; if ($hashid ne "" && $hashid ne $checkid ) { #unlink $checkfile if -e $checkfile; bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; $try2++; sleep $STIMER; } else { last; } } else { bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; $try2++; sleep $STIMER; } } } last; } else { bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; $try1++; sleep $STIMER; } } last; } else { bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content))); } } # make sure outputs were generated # decompress print STDERR "Assembling final results...\n"; foreach my $id ( sort keys %$checklist ) { if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ ) { bail("Error: Output file corrupted!") unless -e "$TMP/$id"; appendToFile("$TMP/$id","$OUTPUTFILE".$1); } } cleanup(); #################### #################### sub parseResponse { my $response = shift; my $status = ""; my $seekid = ""; if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) { $seekid = $1; $status = $2; } return ($status, $seekid); } sub parseJobQueryResponse { my $response = shift; my $jobs = 100; if ( $response =~ /^(\d+)$/ ) { $jobs = $1; } return $jobs; } sub extractchecksum { my $infile = shift; my %dataset = (); #open CSUM, "tar -zxf $infile -O|" or return \%dataset; open CSUM, "<$infile" or return \%dataset; while() { chomp; if ( /^(\S+)\s+(\S+)$/ ) { $dataset{$2} = $1; } } close CSUM; return \%dataset; } sub parseError { my $response = shift; #"error":"Invalid number of inputs found." my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response; return $errorstr; } sub getchecksum { my $infile = shift; # md5 binary check my $MD5BIN = ""; if ( -x "/usr/bin/md5sum" ) { $MD5BIN = "/usr/bin/md5sum"; } elsif ( -x "/sbin/md5" ) { $MD5BIN = "/sbin/md5 -q"; } return "" if $MD5BIN eq ""; my $checksum = ""; open MD5EXE, "$MD5BIN $infile|" or return ""; while() { if (/^(\S+)\s+(\S+)$/) { $checksum = $1; last; } elsif (/^(\S+)$/) { $checksum = $1; last; } } close MD5EXE; return $checksum; } sub backticks { my $command = shift; `$command`; return ($? == -1) ? 0 : 1; } sub bail { my $str = shift; my $status = shift; #0 for success and 1 for error $status = 1 unless defined; print STDERR "$str\n" if defined $str; cleanup(); exit($status); } sub cleanup { return if ($TMP eq "" || !-d $TMP); opendir(MAINDIR, $TMP); my @files = readdir(MAINDIR); closedir(MAINDIR); foreach my $file (@files) { unlink "$TMP/$file" if -e "$TMP/$file"; } remove_tree($TMP); } sub appendToFile { my $inpfile = shift; my $outfile = shift; open INPF, "<$inpfile" or bail("Server Error: Error in reading file."); open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file."); while() { print OUTF $_; } close OUTF; close INPF; } sub help { my $str = shift; print <<'HELPME'; USAGE ./seekquencer_premafft.pl -inp -out [-str|-seq] ./seekquencer_premafft.pl -idf -seqf -out [-str|-seq] PARAMETERS -inp INFILE is a FASTA-formatted file PDB entries are written as: >PDBID [5-character pdbid+chain] While sequence entries are written as: >[id] [sequence] -idf IDLISTFILE is a file containing a list of pdbids pdbids should be a 5-character pdbid + chain -seqf SEQFASTA is a fasta file entries are written as: >[id] [sequence] -out Results are writen to a file named OUTFILE -str Only structures will be collected by Seekquencer If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer -seq Only sequences will be collected by Seekquencer If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer OPTIONAL PARAMETERS: -seqd Search Database for sequence homologs. Default value: uniref100 -lim this sets the maximum number of sequence homologs collected. Default value: 100 -blim this sets the -b and -v value when running blastall. Default value: 100 -pre When -str is set, this will compare all structures against all using pdp-ash This would ensure that all structures collected are matching All structures that do not match will be removed -noin When set, inputs will not be included in the output -mod Defines the output format mafftash (default) will print a mafftash-formatted fasta file mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq) fasta will print a regular fasta file -run thread will run simultaneous jobs during blast queries (faster but takes more nodes) normal will run sequential blast queries (slower but takes less nodes) Default value: normal -trd if -run is defined, this sets the number of parallel jobs to run. Default value: 3 HELPME bail($str); } mafft-7.505-without-extensions/core/disttbfast.c0000644000175000017500000043104614224501721021324 0ustar nileshnilesh#include "mltaln.h" #define REPORTCOSTS 0 #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define SKIP 1 #define ITERATIVECYCLE 2 #define END_OF_VEC -1 static int treein; static int topin; static int treeout; static int noalign; static int distout; static int tuplesize; static int subalignment; static int subalignmentoffset; static int nguidetree; static int sparsepickup; static int keeplength; static int ndeleted; static int mapout; static int smoothing; static double maxdistmtxsize; static int nthreadtb; static int useexternalanchors; static int oneiteration; static double maxanchorseparation; #if 0 #define PLENFACA 0.0123 #define PLENFACB 10252 #define PLENFACC 10822 #define PLENFACD 0.5 #define DLENFACA 0.01 #define DLENFACB 2445 #define DLENFACC 2412 #define DLENFACD 0.1 #else #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define D6LENFACA 0.01 #define D6LENFACB 2500 #define D6LENFACC 2500 #define D6LENFACD 0.1 #define D10LENFACA 0.01 #define D10LENFACB 1000000 #define D10LENFACC 1000000 #define D10LENFACD 0.0 #endif typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _msacompactdistmtxthread_arg { int njob; int thread_no; int *selfscore; double **partmtx; char **seq; int **skiptable; double *mindist; int *mindistfrom; int *jobpospt; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } msacompactdistmtxthread_arg_t; typedef struct _compactdistmtxthread_arg { int njob; int thread_no; int *nogaplen; int **pointt; int *selfscore; double **partmtx; int *jobpospt; double *mindist; int *mindistfrom; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } compactdistmtxthread_arg_t; typedef struct _msadistmtxthread_arg { int njob; int thread_no; int *selfscore; double **iscore; double **partmtx; char **seq; int **skiptable; Jobtable *jobpospt; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } msadistmtxthread_arg_t; #ifdef enablemultithread // ue futatsu ha singlethread demo tsukau typedef struct _treebasethread_arg { int thread_no; int njob; int *nrunpt; int *nlen; int *jobpospt; int ***topol; Treedep *dep; double ***cpmxhist; int **memhist; char **aseq; double *effarr; int *alloclenpt; int *fftlog; char *mergeoralign; double **newdistmtx; int *selfscore; ExtAnch *extanch; int **anchindex; pthread_mutex_t *mutex; pthread_cond_t *treecond; } treebasethread_arg_t; typedef struct _distancematrixthread_arg { int thread_no; int njob; int *jobpospt; int **pointt; double **mtx; pthread_mutex_t *mutex; } distancematrixthread_arg_t; #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; nthreadpair = 1; nthreadtb = 1; outnumber = 0; topin = 0; treein = 0; treeout = 0; distout = 0; noalign = 0; nevermemsave = 0; inputfile = NULL; nadd = 0; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; useexternalanchors = 0; oneiteration = 0; force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; sueff_global = 0.1; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty_dist = NOTSPECIFIED; ppenalty = -1530; ppenalty_ex = NOTSPECIFIED; penalty_shift_factor = 1000.0; poffset = -123; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; TMorJTT = JTT; scoreout = 0; spscoreout = 0; tuplesize = 6; subalignment = 0; subalignmentoffset = 0; legacygapcost = 0; specificityconsideration = 0.0; nguidetree = 1; sparsepickup = 0; keeplength = 0; mapout = 0; smoothing = 0; nwildcard = 0; maxanchorseparation = 1000.0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; reporterr( "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); reporterr( "nadd = %d\n", nadd ); --argc; goto nextoption; case 'V': ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // reporterr( "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); reporterr( "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // reporterr( "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); reporterr( "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // reporterr( "blosum %d / kimura 200 \n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; reporterr( "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; reporterr( "tm %d\n", pamN ); --argc; goto nextoption; case 'C': nthreadpair = nthread = myatoi( *++argv ); reporterr( "nthread = %d\n", nthread ); reporterr( "nthreadpair = %d\n", nthread ); if( strchr( *argv, '-' ) ) nthreadtb = myatoi( strchr( *argv, '-' )+1 ); else nthreadtb = nthread; reporterr( "nthreadtb = %d\n", nthreadtb ); --argc; goto nextoption; case 's': specificityconsideration = (double)myatof( *++argv ); // reporterr( "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case 't': treeout = 1; break; case '^': treeout = 2; break; case 'T': noalign = 1; break; case 'r': oneiteration = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'L': legacygapcost = 1; break; case 'e': fftscore = 0; break; case 'x': maxanchorseparation = myatof( *++argv ); --argc; goto nextoption; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'R': fftRepeatStop = 1; break; #endif case 'n' : outnumber = 1; break; #if 0 case 's': treemethod = 's'; break; case 'q': treemethod = 'q'; // minimum break; #endif case 'q': sparsepickup = myatoi( *++argv ); // reporterr( "sparsepickup = %d\n", sparsepickup ); --argc; goto nextoption; case 'X': treemethod = 'X'; sueff_global = atof( *++argv ); // fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; case 'E': nguidetree = myatoi( *++argv ); // reporterr( "nguidetree = %d\n", nguidetree ); --argc; goto nextoption; #if 0 case 'a': alg = 'a'; break; case 'H': alg = 'H'; break; case 'R': alg = 'R'; break; #endif case 'A': alg = 'A'; break; case '&': alg = 'a'; break; case '@': alg = 'd'; break; case 'N': nevermemsave = 1; break; case 'M': alg = 'M'; break; #if 0 case 'S' : scoreout = 1; // for checking parallel calculation break; #else case 'S' : spscoreout = 1; // 2014/Dec/30, sp score break; #endif case 'B': // hitsuyou! memopt -M -B no tame break; case 'F': use_fft = 1; break; case 'l': useexternalanchors = 1; case 'G': use_fft = 1; force_fft = 1; break; #if 0 case 'V': topin = 1; break; #endif case 'U': treein = 1; break; case 'u': weight = 0; tbrweight = 0; break; case 'v': tbrweight = 3; break; #if 1 case 'd': disp = 1; break; #endif #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif case 'J': tbutree = 0; break; #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; #endif case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'W': tuplesize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'Z': checkC = 1; break; #endif case 'Y': keeplength = 1; break; case 'z': mapout = 2; break; case 'Z': mapout = 1; break; case 'p': smoothing = 1; break; case ':': nwildcard = 1; break; default: reporterr( "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { reporterr( "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { reporterr( "conflicting options : o, m or u\n" ); exit( 1 ); } } static int varpairscore( int nseq, int npick, int nlenmax, char **seq, int seed ) { int i, j, npair; int *slist; char **pickseq; double score; double scoreav; double scoreav2; double scorestd; double scorevar; slist = calloc( nseq, sizeof( int ) ); pickseq = AllocateCharMtx( npick, nlenmax ); reporterr( "nseq = %d, nlenmax=%d, seed=%d\n", nseq, nlenmax, seed ); srand( seed ); for( i=0; i longestlen[i][0] ) { longestlen[i][0] = seqlen[m]; longestseq[i][0] = m; } // reporterr( "%d ", topol[i][0][j] ); } // reporterr( "longest = %d (%d)\n", longestlen[i][0], longestseq[i][0] ); longestlen[i][1] = -1; longestseq[i][1] = -1; for( j=0; (m=topol[i][1][j])!=-1; j++ ) // sukoshi muda { if( seqlen[m] > longestlen[i][1] ) { longestlen[i][1] = seqlen[m]; longestseq[i][1] = m; } // reporterr( "%d ", topol[i][1][j] ); } // reporterr( "longest = %d (%d)\n", longestlen[i][1], longestseq[i][1] ); } m = 1; for( i=n-2; i>-1; i-- ) { // reporterr( "longest[%d][0] = %d (%d)\n", i, longestlen[i][0], longestseq[i][0] ); // reporterr( "longest[%d][1] = %d (%d)\n", i, longestlen[i][1], longestseq[i][1] ); select[longestseq[i][0]] = 1; select[longestseq[i][1]] = 1; m += 1; if( m >= sparsepickup ) break; } for( i=0, k=0, j=0; injob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; int *nogaplen = targ->nogaplen; int **pointt = targ->pointt; int *jobpospt = targ->jobpospt; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int i, j; double tmpdist, preference, tmpdistx; //, tmpdisty; int *table1; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == -1 ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i-1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == -1 ) { commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i-1; } table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 100 == 0 ) { if( nthreadpair ) reporterr( "\r% 5d / %d (thread %4d)", njob-i, njob, thread_no ); else reporterr( "\r% 5d / %d", njob-i, njob ); } makecompositiontable_p( table1, pointt[i] ); // for( j=i+1; j-1; j-- ) { tmpdist = distcompact( nogaplen[i], nogaplen[j], table1, pointt[j], selfscore[i], selfscore[j] ); preference = preferenceval( i, j, njob ); tmpdistx = tmpdist + preference; if( tmpdistx < mindist[i] ) { mindist[i] = tmpdistx; mindistfrom[i] = j; } // preference = preferenceval( j, i, njob ); // tmpdisty = tmpdist + preference; // if( tmpdisty < mindist[j] ) // { // mindist[j] = tmpdisty; // mindistfrom[j] = i; // } if( partmtx[i] ) partmtx[i][j] = tmpdist; if( partmtx[j] ) partmtx[j][i] = tmpdist; } free( table1 ); } } static void *ylcompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau { compactdistmtxthread_arg_t *targ = (compactdistmtxthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; int *nogaplen = targ->nogaplen; int **pointt = targ->pointt; int *jobpospt = targ->jobpospt; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int i, j; double tmpdist, preference, tmpdistx, tmpdisty; int *table1; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == njob-1 ) { commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; } table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 100 == 0 ) { if( nthreadpair ) reporterr( "\r% 5d / %d (thread %4d)", i+1, njob, thread_no ); else reporterr( "\r% 5d / %d", i+1, njob ); } makecompositiontable_p( table1, pointt[i] ); for( j=i+1; j-1; j-- ) { tmpdist = distcompact( nogaplen[i], nogaplen[j], table1, pointt[j], selfscore[i], selfscore[j] ); preference = preferenceval( i, j, njob ); tmpdistx = tmpdist + preference; if( tmpdistx < mindist[i] ) { mindist[i] = tmpdistx; mindistfrom[i] = j; } preference = preferenceval( j, i, njob ); tmpdisty = tmpdist + preference; if( tmpdisty < mindist[j] ) { mindist[j] = tmpdisty; mindistfrom[j] = i; } if( partmtx[i] ) partmtx[i][j] = tmpdist; if( partmtx[j] ) partmtx[j][i] = tmpdist; } free( table1 ); } } static void *msacompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau { msacompactdistmtxthread_arg_t *targ = (msacompactdistmtxthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; char **seq = targ->seq; int **skiptable = targ->skiptable; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int *jobpospt = targ->jobpospt; double tmpdist, preference, tmpdistx; //, tmpdisty; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == -1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } *jobpospt = i-1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == -1 ) { return( NULL ); } *jobpospt = i-1; } if( i % 100 == 0 ) { if( nthreadpair ) fprintf( stderr, "\r% 5d / %d (thread %4d)", njob-i, njob, thread_no ); else fprintf( stderr, "\r% 5d / %d", i, njob ); } for( j=i-1; j>-1; j-- ) // for( j=i+1; jnjob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; char **seq = targ->seq; int **skiptable = targ->skiptable; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int *jobpospt = targ->jobpospt; double tmpdist, preference, tmpdistx, tmpdisty; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == njob-1 ) { return( NULL ); } *jobpospt = i+1; } if( i % 100 == 0 ) { if( nthreadpair ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); else fprintf( stderr, "\r% 5d / %d", i, njob ); } // for( j=i-1; j>-1; j-- ) for( j=i+1; jnjob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; double ssi, ssj, bunbo, iscoretmp; int i, j; int nlim = njob-1; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = jobpospt->i; // (jobpospt-i)++ dato, shuuryou hantei no mae ni ++ surunode, tomaranakunaru. if( i == nlim ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } jobpospt->i += 1; pthread_mutex_unlock( targ->mutex ); if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); } else #endif { i = (jobpospt->i)++; if( i == nlim ) return( NULL ); if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob ); } ssi = selfscore[i]; for( j=i+1; j 10 ) iscoretmp = 10.0; // 2015/Mar/17 } if( iscoretmp < 0.0 ) { reporterr( "WARNING: negative distance, iscoretmp = %f\n", iscoretmp ); iscoretmp = 0.0; } iscore[i][j-i] = iscoretmp; // printf( "i,j=%d,%d, iscoretmp=%f\n", i, j, iscoretmp ); } } } #else static void *msadistmtxthread( void *arg ) // enablemultithread == 0 demo tsukau { msadistmtxthread_arg_t *targ = (msadistmtxthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; double ssi, ssj, bunbo, iscoretmp; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) pthread_mutex_lock( targ->mutex ); #endif j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; j = i + 1; if( i == njob-1 ) { #ifdef enablemultithread if( nthreadpair ) pthread_mutex_unlock( targ->mutex ); #endif return( NULL ); } } jobpospt->j = j; jobpospt->i = i; #ifdef enablemultithread if( nthreadpair ) pthread_mutex_unlock( targ->mutex ); #endif if( nthreadpair ) { if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); } else { if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob ); } ssi = selfscore[i]; ssj = selfscore[j]; bunbo = MIN( ssi, ssj ); //fprintf( stderr, "bunbo = %f\n", bunbo ); //fprintf( stderr, "naivepairscorefast() = %f\n", naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) ); if( bunbo == 0.0 ) iscoretmp = 2.0; // 2013/Oct/17 else { iscoretmp = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast if( iscoretmp > 10 ) iscoretmp = 10.0; // 2015/Mar/17 } iscore[i][j-i] = iscoretmp; } } #endif #ifdef enablemultithread static void *distancematrixthread( void *arg ) { distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; int thread_no = targ->thread_no; int njob = targ->njob; int *jobpospt = targ->jobpospt; int **pointt = targ->pointt; double **mtx = targ->mtx; int *table1; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 100 == 0 ) { reporterr( "\r% 5d / %d (thread %4d)", i+1, njob, thread_no ); } makecompositiontable_p( table1, pointt[i] ); for( j=i; j-1; j++ ) { if( pairanch[j].i == k ) { // reporterr( "pairanch[%d].endi: %d->%d\n", j, pairanch[j].endi, map[pairanch[j].endi] ); pairanch[j].starti = map[pairanch[j].starti]; pairanch[j].endi = map[pairanch[j].endi]; } } } free( map ); len = strlen( seq2[0] )+1; map = calloc( sizeof( int ), len ); for( k=0; k-1; j++ ) { if( pairanch[j].j == k ) { // reporterr( "pairanch[%d].endj: %d->%d\n", j, pairanch[j].endj, map[pairanch[j].endj] ); pairanch[j].startj = map[pairanch[j].startj]; pairanch[j].endj = map[pairanch[j].endj]; } } } free( map ); } static int anchidcomp( const void *p, const void *q ) { if ( ((ExtAnch *)q)->i != ((ExtAnch *)p)->i ) return ((ExtAnch *)p)->i - ((ExtAnch *)q)->i; return ((ExtAnch *)p)->j - ((ExtAnch *)q)->j; } static int anchcomp( const void *p, const void *q ) { if ( ((ExtAnch *)q)->starti != ((ExtAnch *)p)->starti ) return ((ExtAnch *)p)->starti - ((ExtAnch *)q)->starti; return (int)((void *)p - (void *)q); } static int anchscorecomp( const void *p, const void *q ) { if ( ((ExtAnch *)q)->score != ((ExtAnch *)p)->score ) return ((ExtAnch *)q)->score - ((ExtAnch *)p)->score; return (int)((void *)q - (void *)p); } static void indexanchors( ExtAnch *a, int **idx ) { int n; for( n=0; a[n].i>-1; n++ ) ; qsort( a, n, sizeof( ExtAnch ), anchidcomp ); for( n=0; a[n].i>-1; n++ ) { // reporterr( "%d, %dx%d, %d-%d x %d-%d\n", n, a[n].i, a[n].j, a[n].starti, a[n].endi, a[n].startj, a[n].endj ); if( idx[a[n].i][a[n].j] == -1 ) idx[a[n].i][a[n].j] = n; } #if 0 int m; for( n=0; n %d\n", n, m, idx[n][m] ); exit( 1 ); #endif } #if 0 static void checkanchors_internal( ExtAnch *a ) { int p, q, r, s; int i, j; int consistent; int m; #if 0 reporterr( "before sortscore\n" ); for( p=0; a[p].i>-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } #endif for( r=0; a[r].i>-1; ) { i = a[r].i; j = a[r].j; s = r; for( ; i==a[r].i && j==a[r].j; r++ ) ; // reporterr( "s=%d, r=%d\n", s, r ); qsort( a+s, r-s, sizeof( ExtAnch ), anchscorecomp ); #if 0 reporterr( "after sortscore, r=%d\n", r ); for( p=s; p m ) m = a[q].score; // reporterr( "INconsistent\n" ); // reporterr( "p=%d, q=%d\n", p, q ); // reporterr( "p: a[%d].regi,regj=%d-%d,%d-%d, score=%d\n", p, a[p].starti, a[p].endi, a[p].startj, a[p].endj, a[p].score ); // reporterr( "q: a[%d].regi,regj=%d-%d,%d-%d, score=%d\n", q, a[q].starti, a[q].endi, a[q].startj, a[q].endj, a[q].score ); // a[q].starti = a[q].startj = a[q].startj = a[q].endj = -1; // a[q].score = a[p].score - a[q].score; // ?? // a[q].score = ( a[p].score + a[q].score ) / 2; // ?? a[q].score = 0; } } if( !consistent ) // a[p].score = ( a[p].score + m ) / 2; // >= 0 a[p].score -= m; // >= 0 // a[p].score = 0; } } #if 0 reporterr( "after filtering\n" ); for( p=0; a[p].i>-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } exit( 1 ); #endif } #endif static void checkanchors_strongestfirst( ExtAnch *a, int s, double gapratio1, double gapratio2 ) { int p, q; double zureij; double nogaplenestimation1; double nogaplenestimation2; #if 0 reporterr( "before sortscore\n" ); for( p=0; a[p].i>-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } #endif qsort( a, s, sizeof( ExtAnch ), anchscorecomp ); nogaplenestimation1 = (double)a[0].starti / (1.0+gapratio1); nogaplenestimation2 = (double)a[0].startj / (1.0+gapratio2); zureij = nogaplenestimation1 - nogaplenestimation2; for( p=0; a[p].i>-1; p++ ) { if( a[p].starti == -1 ) continue; #if 0 nogaplenestimation1 = (double)a[p].starti / (1.0+gapratio1); nogaplenestimation2 = (double)a[p].startj / (1.0+gapratio2); if( fabs( zureij - ( nogaplenestimation1 - nogaplenestimation2 ) ) > maxanchorseparation ) { // reporterr( "warning: long internal gaps in %d-%d, |%5.2f-%5.2f - %5.2f| = %5.2f > %5.2f\n", a[p].i, a[p].j, nogaplenestimation1, nogaplenestimation2, zureij, fabs( zureij - ( nogaplenestimation1, nogaplenestimation2 ) ), maxanchorseparation ); a[p].starti = a[p].startj = a[p].startj = a[p].endj = -1; continue; } #else int nearest, mindist; double zurei, zurej; if( p ) { mindist = 999999999; for( q=0; q maxanchorseparation ) // if( fabs( zurei - zurej ) > maxanchorseparation || zurei > maxanchorseparation || zurej > maxanchorseparation ) // test { // reporterr( "warning: long internal gaps in %d-%d, |%5.2f-%5.2f - %5.2f| = %5.2f > %5.2f\n", a[p].i, a[p].j, nogaplenestimation1, nogaplenestimation2, zureij, fabs( zureij - ( nogaplenestimation1, nogaplenestimation2 ) ), maxanchorseparation ); a[p].starti = a[p].startj = a[p].startj = a[p].endj = -1; continue; } #endif // reporterr( "P score=%d, %d-%d, %d-%d\n", a[p].score, a[p].starti, a[p].endi, a[p].startj, a[p].endj ); for( q=p+1; a[q].i>-1; q++ ) { if( a[q].starti == -1 ) continue; // reporterr( "Q score=%d, %d-%d, %d-%d\n", a[q].score, a[q].starti, a[q].endi, a[q].startj, a[q].endj ); if( a[p].endi < a[q].starti && a[p].endj < a[q].startj ) { // reporterr( "consistent\n" ); ; } else if( a[p].endi == a[q].starti && a[p].endj < a[q].startj && a[q].starti-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } #endif } static double gapnongapratio( int n, char **s ) { int i, j, len; char *seq, *pt1, *pt2; double fv, ng; len = strlen( s[0] ); seq = calloc( len+1, sizeof( char ) ); fv = 0.0; ng = 0.0; for( i=0; i jump to %d\n", i, j, m1[i], m2[j], anchindex[m1[i]][m2[j]] ); k = anchindex[m1[i]][m2[j]]; while( ( k!=-1 ) && ( extanch[k].i == m1[i] && extanch[k].j == m2[j] ) ) { s++; k++; } } else { // reporterr( "%dx%d, %dx%d -> jump to %d\n", j, i, m1[i], m2[j], anchindex[m2[j]][m1[i]] ); k = anchindex[m2[j]][m1[i]]; while( ( k!=-1 ) && ( extanch[k].i == m2[j] && extanch[k].j == m1[i] ) ) { s++; k++; } } #else k = 0; while( extanch[k].i > -1 ) // kanari muda { //reporterr( "m1[i],m2[j]=%d,%d ? extanch[k].i,j=%d,%d k=%d\n", m1[i], m2[j], extanch[k].i, extanch[k].j, k ); if( ( extanch[k].i == m1[i] && extanch[k].j == m2[j] ) || ( extanch[k].i == m2[j] && extanch[k].j == m1[i] ) ) { //reporterr( "hit, extanch[k].startj=%d\n", extanch[k].startj ); s++; } k++; } #endif } *pairanch = calloc( sizeof( ExtAnch ), s+1 ); s = 0; for( i=0; i -1 ) // kanari muda { if( extanch[k].i == m1[i] && extanch[k].j == m2[j] ) { (*pairanch)[s].i = i; (*pairanch)[s].j = j; (*pairanch)[s].starti = extanch[k].starti; // map mae (*pairanch)[s].endi = extanch[k].endi; // map mae (*pairanch)[s].startj = extanch[k].startj; // map mae (*pairanch)[s].endj = extanch[k].endj; // map mae (*pairanch)[s].score = extanch[k].score; s++; } if( extanch[k].j == m1[i] && extanch[k].i == m2[j] ) { (*pairanch)[s].i = i; (*pairanch)[s].j = j; (*pairanch)[s].starti = extanch[k].startj; // map mae (*pairanch)[s].endi = extanch[k].endj; // map mae (*pairanch)[s].startj = extanch[k].starti; // map mae (*pairanch)[s].endj = extanch[k].endi; // map mae (*pairanch)[s].score = extanch[k].score; s++; } k++; } #endif } (*pairanch)[s].i = (*pairanch)[s].j = -1; recountpositions( *pairanch, n1, n2, seq1, seq2 ); // truncateseq_group( *pairanch, seq1, seq2, n1, n2 ); // copybackanchors( *pairanch, ddn1, n2, seq1, seq2 ); // tabun dame #if 0 reporterr( "Before check\n" ); for( k=0; (*pairanch)[k].i>-1; k++ ) { if( (*pairanch)[k].starti!=-1) reporterr( "seq1-%d,seq2-%d %d-%d,%d-%d\n", (*pairanch)[k].i, (*pairanch)[k].j, (*pairanch)[k].starti, (*pairanch)[k].endi, (*pairanch)[k].startj, (*pairanch)[k].endj ); } #endif #if 0 reporterr( "\ngroup1=\n" ); for( i=0; m1[i]>-1; i++ ) reporterr( "%d ", m1[i] ); reporterr( "\n" ); reporterr( "\ngroup2=\n" ); for( i=0; m2[i]>-1; i++ ) reporterr( "%d ", m2[i] ); reporterr( "\n" ); #endif checkanchors_strongestfirst( *pairanch, s, gapnongapratio( n1, seq1 ), gapnongapratio( n2, seq2 ) ); // qsort( *pairanch, s, sizeof( ExtAnch ), anchcomp ); // checkanchors_new( *pairanch ); #if 0 reporterr( "After check\n" ); for( k=0; (*pairanch)[k].i>-1; k++ ) { if( (*pairanch)[k].starti!=-1) reporterr( "seq1-%d,seq2-%d %d-%d,%d-%d\n", (*pairanch)[k].i, (*pairanch)[k].j, (*pairanch)[k].starti, (*pairanch)[k].endi, (*pairanch)[k].startj, (*pairanch)[k].endj ); } #endif } static void *treebasethread( void *arg ) { treebasethread_arg_t *targ = (treebasethread_arg_t *)arg; int thread_no = targ->thread_no; int *nrunpt = targ->nrunpt; int njob = targ->njob; int *nlen = targ->nlen; int *jobpospt = targ->jobpospt; int ***topol = targ->topol; Treedep *dep = targ->dep; double ***cpmxhist = targ->cpmxhist; int **memhist = targ->memhist; char **aseq = targ->aseq; double *effarr = targ->effarr; int *alloclen = targ->alloclenpt; int *fftlog = targ->fftlog; char *mergeoralign = targ->mergeoralign; double **newdistmtx = targ->newdistmtx; int *selfscore = targ->selfscore; ExtAnch *extanch = targ->extanch; int **anchindex = targ->anchindex; char **mseq1, **mseq2; char **localcopy; int i, m, j, l; int immin, immax; int len1, len2; int clus1, clus2; double pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; double **dynamicmtx; int ssi, ssm, bunbo; int tm, ti; int **localmem = NULL; double ***cpmxchild0, ***cpmxchild1; double orieff1, orieff2; ExtAnch *pairanch = NULL; #if SKIP int **skiptable1 = NULL, **skiptable2 = NULL; #endif #if 0 int i, j; #endif tscore = 0; mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); localcopy = calloc( njob, sizeof( char * ) ); for( i=0; imutex ); l = *jobpospt; if( l == njob-1 ) { pthread_mutex_unlock( targ->mutex ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( mseq1 ); free( mseq2 ); free( localcopy ); free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); if( specificityconsideration ) FreeDoubleMtx( dynamicmtx ); free( localmem ); return( NULL ); } *jobpospt = l+1; // reporterr( "l=%d, child0=%d, child1=%d\n", l, dep[l].child0, dep[l].child1 ); if( dep[l].child0 != -1 ) { while( dep[dep[l].child0].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } if( dep[l].child1 != -1 ) { while( dep[dep[l].child1].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } // while( *nrunpt >= nthread ) // bug while( *nrunpt >= nthreadtb ) // tabun iranai pthread_cond_wait( targ->treecond, targ->mutex ); // tabun iranai (*nrunpt)++; m1 = topol[l][0][0]; m2 = topol[l][1][0]; #if 0 localmem[0][0] = -1; posinmem=topolorderz( localmem[0], topol, dep, l, 0 ) - localmem[0]; localmem[1][0] = -1; posinmem=topolorderz( localmem[1], topol, dep, l, 1 ) - localmem[1]; #else if( dep[l].child0 == -1 ) { localmem[0] = calloc( sizeof( int ), 2 ); localmem[0][0] = m1; localmem[0][1] = -1; clus1 = 1; } else { localmem[0] = memhist[dep[l].child0]; clus1 = intlen( localmem[0] ); } if( dep[l].child1 == -1 ) { localmem[1] = calloc( sizeof( int ), 2 ); localmem[1][0] = m2; localmem[1][1] = -1; clus2 = 1; } else { localmem[1] = memhist[dep[l].child1]; clus2 = intlen( localmem[1] ); } if( l != njob-2 ) { memhist[l] = calloc( sizeof( int ), clus1+clus2+1 ); intcpy( memhist[l], localmem[0] ); intcpy( memhist[l]+clus1, localmem[1] ); memhist[l][clus1+clus2] = -1; } #endif // moved, 2018/Mar/10. Must be after changing memhist[l] if( mergeoralign[l] == 'n' ) { // reporterr( "SKIP!\n" ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; pthread_mutex_unlock( targ->mutex ); free( localmem[0] ); free( localmem[1] ); continue; } // reporterr( "l=%d, dep[l].child0=%d, dep[l].child1=%d\n", l, dep[l].child0, dep[l].child1 ); if( dep[l].child0 == -1 ) cpmxchild0 = NULL; else cpmxchild0 = cpmxhist+dep[l].child0; if( dep[l].child1 == -1 ) cpmxchild1 = NULL; else cpmxchild1 = cpmxhist+dep[l].child1; // reporterr( "cpmxchild0=%p, cpmxchild1=%p\n", cpmxchild0, cpmxchild1 ); // reporterr( "\ndistfromtip = %f\n", dep[l].distfromtip ); if( specificityconsideration ) makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip ); else dynamicmtx = n_dis_consweight_multi; // reporterr( "dynamicmtx[0][1] = %f\n", dynamicmtx[0][1] ); len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen <= len1 + len2 ) { reporterr( "\nReallocating.." ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); reporterr( "done. *alloclen = %d\n", *alloclen ); } for( i=0; (j=localmem[0][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); // localcopy[j] = aseq[j]; } for( i=0; (j=localmem[1][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); // localcopy[j] = aseq[j]; } if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; } if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode { // reporterr( "Freeing commonIP (thread %d)\n", thread_no ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } pthread_mutex_unlock( targ->mutex ); #if 1 // CHUUI@@@@ clus1 = fastconjuction_noname( localmem[0], localcopy, mseq1, effarr1, effarr, indication1, 0.0, &orieff1 ); clus2 = fastconjuction_noname( localmem[1], localcopy, mseq2, effarr2, effarr, indication2, 0.0, &orieff2 ); #else clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1, indication1 ); clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2, indication2 ); #endif #if 0 for( i=0; i 66 ) reporterr( "..." ); reporterr( "\n" ); reporterr( "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) reporterr( "..." ); reporterr( "\n" ); #endif /* reporterr( "before align all\n" ); display( aseq, njob ); reporterr( "\n" ); reporterr( "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); reporterr( "\n" ); reporterr( "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); reporterr( "\n" ); */ // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // else ffttry = 0; ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708 // reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // reporterr( "fftlog=%d,%d, ffttry=%d\n", fftlog[m1], fftlog[m2], ffttry ); if( useexternalanchors ) { // reporterr( "%%%% %d vs %d\n", m1, m2 ); pickpairanch( &pairanch, extanch, anchindex, clus1, clus2, localmem[0], localmem[1], mseq1, mseq2 ); // reporterr( "pairanch: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); pscore = Falign_givenanchors( pairanch, NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); free( pairanch ); pairanch = NULL; } else if( force_fft || ( use_fft && ffttry ) ) { if( l < 500 || l % 100 == 0 ) reporterr( " f\b\b" ); if( alg == 'M' ) { if( l < 500 || l % 100 == 0 ) reporterr( "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } } else { if( l < 500 || l % 100 == 0 ) reporterr( " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): if( l < 500 || l % 100 == 0 ) reporterr( "m" ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "%d-%d", clus1, clus2 ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): if( 1 && clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT reporterr( "score = %10.2f\n", pscore ); #endif tscore += pscore; nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); if( disp ) display( localcopy, njob ); if( newdistmtx ) // tsukawanai { #if 0 reporterr( "group1 = " ); for( i=0; imutex ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); for( i=0; (j=localmem[0][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); for( i=0; (j=localmem[1][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); // reporterr( "at step %d\n", l ); // use_getrusage(); pthread_mutex_unlock( targ->mutex ); for( i=0; (j=localmem[0][i])!=-1; i++ ) { if(localcopy[j] ) free( localcopy[j] ); localcopy[j] = NULL; } for( i=0; (j=localmem[1][i])!=-1; i++ ) { if( localcopy[j] ) free( localcopy[j] ); localcopy[j] = NULL; } // if( topol[l][0] ) free( topol[l][0] ); // topol[l][0] = NULL; // if( topol[l][1] ) free( topol[l][1] ); // topol[l][1] = NULL; // if( topol[l] ) free( topol[l] ); // topol[l] = NULL; // reporterr( "\n" ); free( localmem[0] ); free( localmem[1] ); } #if SCOREOUT reporterr( "totalscore = %10.2f\n\n", tscore ); #endif } #endif static int dooneiteration( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, int **memhist, double ***cpmxhist, double *effarr, double **newdistmtx, int *selfscore, ExtAnch *extanch, int **anchindex, int *alloclen, int (*callback)(int, int, char*) ) { int l, ll, len1, len2, i, j; int clus1, clus2; double pscore; char *indication1 = NULL, *indication2 = NULL; double *effarr1 = NULL; double *effarr2 = NULL; int *fftlog = NULL; // fixed at 2006/07/26 // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; int *alreadyaligned = NULL; double **dynamicmtx = NULL; int **localmem = NULL; double ***cpmxchild0, ***cpmxchild1; double orieff1, orieff2; double oscore, nscore; ExtAnch *pairanch; char **oseq1, **oseq2; #if SKIP int **skiptable1 = NULL, **skiptable2 = NULL; #endif #if 0 int i, j; #endif if( effarr1 == NULL ) { effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); fftlog = AllocateIntVec( njob ); alreadyaligned = AllocateIntVec( njob ); if( specificityconsideration ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); localmem = calloc( sizeof( int * ), 2 ); } for( i=0; i 66 ) reporterr( "..." ); reporterr( "\n" ); reporterr( "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) reporterr( "..." ); reporterr( "\n" ); #endif /* reporterr( "before align all\n" ); display( aseq, njob ); reporterr( "\n" ); reporterr( "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); reporterr( "\n" ); reporterr( "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); reporterr( "\n" ); */ if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708 // reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); if( useexternalanchors ) { pickpairanch( &pairanch, extanch, anchindex, clus1, clus2, localmem[0], localmem[1], mseq1, mseq2 ); // reporterr( "pairanch: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); pscore = Falign_givenanchors( pairanch, NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); free( pairanch ); pairanch = NULL; } else if( force_fft || ( use_fft && ffttry ) ) { if( l < 500 || l % 100 == 0 ) reporterr( " f\b\b" ); if( alg == 'M' ) { if( l < 500 || l % 100 == 0 ) reporterr( "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); // reporterr( "######### mseq1[0] = %s\n", mseq1[0] ); } } else { if( l < 500 || l % 100 == 0 ) reporterr( " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): if( l < 500 || l % 100 == 0 ) reporterr( "m" ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "%d-%d", clus1, clus2 ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): if( 1 && clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "\n\n %d - %d (%d x %d) : \n", topol[l][0][0], topol[l][1][0], clus1, clus2 ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, strlen( mseq1[0] ), &nscore ); #if SCOREOUT reporterr( "score = %10.2f\n", pscore ); #endif if( nscore < oscore ) { for( i=0; ig1\n%s\n", mseq1[i] ); for( i=0; ig2\n%s\n", mseq2[i] ); exit( 1 ); } #endif // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; // reporterr( ">514\n%s\n", aseq[514] ); free( localmem[0] ); free( localmem[1] ); } Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); free( fftlog ); if( specificityconsideration ) FreeDoubleMtx( dynamicmtx ); free( alreadyaligned ); free( localmem ); effarr1 = NULL; return( 0 ); chudan_tbfast: Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? if( effarr1 ) free( effarr1 ); effarr1 = NULL; if( effarr2 ) free( effarr2 ); effarr2 = NULL; if( indication1 ) free( indication1 ); indication1 = NULL; if( indication2 ) free( indication2 ); indication2 = NULL; if( fftlog ) free( fftlog ); fftlog = NULL; if( alreadyaligned ) free( alreadyaligned ); alreadyaligned = NULL; if( specificityconsideration ) { if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); dynamicmtx = NULL; } if( localmem ) free( localmem ); localmem = NULL; #if SKIP if( skiptable1 ) FreeIntMtx( skiptable1 ); skiptable1 = NULL; if( skiptable2 ) FreeIntMtx( skiptable2 ); skiptable2 = NULL; #endif return( 1 ); } static int treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, int **memhist, double ***cpmxhist, double *effarr, double **newdistmtx, int *selfscore, ExtAnch *extanch, int **anchindex, int *alloclen, int (*callback)(int, int, char*) ) { int l, len1, len2, i, m, immin, immax; int len1nocommongap, len2nocommongap; int clus1, clus2; double pscore, tscore; char *indication1 = NULL, *indication2 = NULL; double *effarr1 = NULL; double *effarr2 = NULL; int *fftlog = NULL; // fixed at 2006/07/26 // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; int *gaplen = NULL; int *gapmap = NULL; int *alreadyaligned = NULL; double **dynamicmtx = NULL; double ssi, ssm, bunbo; int tm, ti; int gapmaplen; int **localmem = NULL; double ***cpmxchild0, ***cpmxchild1; double orieff1, orieff2; ExtAnch *pairanch; #if SKIP int **skiptable1 = NULL, **skiptable2 = NULL; #endif #if 0 int i, j; #endif if( effarr1 == NULL ) { effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); fftlog = AllocateIntVec( njob ); gaplen = AllocateIntVec( *alloclen+10 ); gapmap = AllocateIntVec( *alloclen+10 ); alreadyaligned = AllocateIntVec( njob ); if( specificityconsideration ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); localmem = calloc( sizeof( int * ), 2 ); } for( i=0; i 0 && dep[l].child0 == l-1 && dep[l].child1 == -1 && dep[dep[l].child0].child1 == -1 ) { localmem[0][clus1] = topol[l-1][1][0]; localmem[0][clus1+1] = -1; localmem[1][0] = topol[l][1][0]; localmem[1][1] = -1; } else { localmem[0][0] = -1; posinmem = topolorderz( localmem[0], topol, dep, l, 0 ) - localmem[0]; localmem[1][0] = -1; posinmem = topolorderz( localmem[1], topol, dep, l, 1 ) - localmem[1]; } #else if( dep[l].child0 == -1 ) { localmem[0] = calloc( sizeof( int ), 2 ); localmem[0][0] = m1; localmem[0][1] = -1; clus1 = 1; } else { localmem[0] = memhist[dep[l].child0]; clus1 = intlen( localmem[0] ); } if( dep[l].child1 == -1 ) { localmem[1] = calloc( sizeof( int ), 2 ); localmem[1][0] = m2; localmem[1][1] = -1; clus2 = 1; } else { localmem[1] = memhist[dep[l].child1]; clus2 = intlen( localmem[1] ); } if( l != njob-2 ) { memhist[l] = calloc( sizeof( int ), clus1+clus2+1 ); intcpy( memhist[l], localmem[0] ); intcpy( memhist[l]+clus1, localmem[1] ); memhist[l][clus1+clus2] = -1; } #endif if( mergeoralign[l] == 'n' ) { // reporterr( "SKIP!\n" ); // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; free( localmem[0] ); free( localmem[1] ); continue; } // reporterr( "\ndistfromtip = %f\n", dep[l].distfromtip ); if( specificityconsideration ) makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip ); else dynamicmtx = n_dis_consweight_multi; // makedynamicmtx( dynamicmtx, n_dis_consweight_multi, ( dep[l].distfromtip - 0.2 ) * 3 ); len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen < len1 + len2 ) { reporterr( "\nReallocating.." ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) ); if( gaplen == NULL ) { reporterr( "Cannot realloc gaplen\n" ); exit( 1 ); } gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) ); if( gapmap == NULL ) { reporterr( "Cannot realloc gapmap\n" ); exit( 1 ); } reporterr( "done. *alloclen = %d\n", *alloclen ); } #if 1 // CHUUI@@@@ clus1 = fastconjuction_noname( localmem[0], aseq, mseq1, effarr1, effarr, indication1, 0.0, &orieff1 ); clus2 = fastconjuction_noname( localmem[1], aseq, mseq2, effarr2, effarr, indication2, 0.0, &orieff2 ); #else clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 ); clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 ); // clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1, indication1 ); // clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2, indication2 ); #endif if( mergeoralign[l] == '1' || mergeoralign[l] == '2' ) { newgapstr = "="; } else newgapstr = "-"; len1nocommongap = len1; len2nocommongap = len2; if( mergeoralign[l] == '1' ) // nai { findcommongaps( clus2, mseq2, gapmap ); commongappick( clus2, mseq2 ); len2nocommongap = strlen( mseq2[0] ); } else if( mergeoralign[l] == '2' ) { findcommongaps( clus1, mseq1, gapmap ); commongappick( clus1, mseq1 ); len1nocommongap = strlen( mseq1[0] ); } #if 0 for( i=0; i 66 ) reporterr( "..." ); reporterr( "\n" ); reporterr( "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) reporterr( "..." ); reporterr( "\n" ); #endif /* reporterr( "before align all\n" ); display( aseq, njob ); reporterr( "\n" ); reporterr( "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); reporterr( "\n" ); reporterr( "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); reporterr( "\n" ); */ if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // else ffttry = 0; ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // reporterr( "fftlog=%d,%d, ffttry=%d\n", fftlog[m1], fftlog[m2], ffttry ); if( useexternalanchors ) { pickpairanch( &pairanch, extanch, anchindex, clus1, clus2, localmem[0], localmem[1], mseq1, mseq2 ); // reporterr( "pairanch: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); pscore = Falign_givenanchors( pairanch, NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); free( pairanch ); pairanch = NULL; } else if( force_fft || ( use_fft && ffttry ) ) { if( l < 500 || l % 100 == 0 ) reporterr( " f\b\b" ); if( alg == 'M' ) { if( l < 500 || l % 100 == 0 ) reporterr( "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); // reporterr( "######### mseq1[0] = %s\n", mseq1[0] ); } } else { if( l < 500 || l % 100 == 0 ) reporterr( " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): if( l < 500 || l % 100 == 0 ) reporterr( "m" ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "%d-%d", clus1, clus2 ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): if( 1 && clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "\n\n %d - %d (%d x %d) : \n", topol[l][0][0], topol[l][1][0], clus1, clus2 ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT reporterr( "score = %10.2f\n", pscore ); #endif tscore += pscore; nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); // writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); // reporterr( "\n" ); if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. { reporterr( "Check source!!!\n" ); exit( 1 ); } if( mergeoralign[l] == '2' ) { // if( localkeeplength ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, NULL ); // for( i=0; iSTEP0 mseq1[%d] = \n%s\n", i, mseq1[i] ); // for( i=0; iSTEP0 mseq2[%d] = \n%s\n", i, mseq2[i] ); gapmaplen = strlen( mseq1[0] )-len1nocommongap+len1; adjustgapmap( gapmaplen, gapmap, mseq1[0] ); #if 0 reporterr( "\n" ); for( i=0; iSTEP1 mseq1[%d] = \n%s\n", i, mseq1[i] ); for( i=0; iSTEP1 mseq2[%d] = \n%s\n", i, mseq2[i] ); #endif // if( clus1 + clus2 < njob ) restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); if( smoothing ) { restorecommongapssmoothly( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps_bothorders( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, gapmaplen, *alloclen, alg, '-' ); } else { restorecommongaps( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, *alloclen, alg, '-' ); } #if 0 reporterr( "\n" ); for( i=0; iSTEP3 mseq1[%d] = \n%s\n", i, mseq1[i] ); for( i=0; iSTEP3 mseq2[%d] = \n%s\n", i, mseq2[i] ); #endif #if 0 for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( newdistmtx ) // tsukawanai { #if 0 reporterr( "group1 = " ); for( i=0; ig1\n%s\n", mseq1[i] ); for( i=0; ig2\n%s\n", mseq2[i] ); exit( 1 ); } #endif // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; // reporterr( ">514\n%s\n", aseq[514] ); free( localmem[0] ); free( localmem[1] ); } #if SCOREOUT reporterr( "totalscore = %10.2f\n\n", tscore ); #endif Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); free( fftlog ); free( gaplen ); free( gapmap ); if( specificityconsideration ) FreeDoubleMtx( dynamicmtx ); free( alreadyaligned ); free( localmem ); effarr1 = NULL; return( 0 ); chudan_tbfast: Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? if( effarr1 ) free( effarr1 ); effarr1 = NULL; if( effarr2 ) free( effarr2 ); effarr2 = NULL; if( indication1 ) free( indication1 ); indication1 = NULL; if( indication2 ) free( indication2 ); indication2 = NULL; if( fftlog ) free( fftlog ); fftlog = NULL; if( gaplen ) free( gaplen ); gaplen = NULL; if( gapmap ) free( gapmap ); gapmap = NULL; if( alreadyaligned ) free( alreadyaligned ); alreadyaligned = NULL; if( specificityconsideration ) { if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); dynamicmtx = NULL; } if( localmem ) free( localmem ); localmem = NULL; #if SKIP if( skiptable1 ) FreeIntMtx( skiptable1 ); skiptable1 = NULL; if( skiptable2 ) FreeIntMtx( skiptable2 ); skiptable2 = NULL; #endif return( 1 ); } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } reporterr( "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'X' ) fprintf( fp, "Tree = UPGMA (mix).\n" ); else if( treemethod == 'E' ) fprintf( fp, "Tree = UPGMA (average).\n" ); else if( treemethod == 'q' ) fprintf( fp, "Tree = Minimum linkage.\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } static double **preparepartmtx( int nseq ) { int i; double **val; double size; val = (double **)calloc( nseq, sizeof( double *) );; size = 0; if( compacttree == 1 ) { for( i=0; i maxdistmtxsize ) { reporterr( "\n\nThe size of full distance matrix is estimated to exceed %.2fGB.\n", maxdistmtxsize / 1000 / 1000 /1000 ); reporterr( "Will try the calculation using a %d x %d matrix.\n", nseq, i ); reporterr( "This calculation will be slow due to the limited RAM space.\n", i, nseq ); reporterr( "To avoid the slowdown, please try '--initialramusage xGB' (x>>%.2f),\n", maxdistmtxsize / 1000 / 1000 /1000 ); reporterr( "if larger RAM space is available.\n" ); reporterr( "Note that xGB is NOT the upper limit of RAM usage.\n" ); reporterr( "Two to three times larger space may be used for building a guide tree.\n" ); reporterr( "Memory usage of the MSA stage depends on similarity of input sequences.\n\n" ); // reporterr( "If the RAM is small, try '--initialramusage xGB' with a smaller x value.\n" ); reporterr( "The '--memsavetree' option uses smaller RAM space.\n" ); reporterr( "If tree-like relationship can be ignored, try '--pileup' or '--randomchain'.\n\n" ); reporterr( "The result of --initialramusage xGB is almost identical to the default, except for rounding differences.\n" ); reporterr( "In the cases of --memsavetree, --pileup and --randomchain, the result differs from the default.\n\n" ); break; } val[i] = (double *)calloc( nseq, sizeof( double ) ); } if( i == nseq ) reporterr( "The full matrix will be used.\n" ); for( ;i nlenmax ) nlenmax = ien; } infp = NULL; // stderr = fopen( "/dev/null", "a" ); // Windows???? tmpargv = AllocateCharMtx( argc, 0 ); for( i=0; i 1000000 ) { reporterr( "The number of sequences must be < %d\n", 1000000 ); reporterr( "Please try the --parttree option for such large data.\n" ); exit( 1 ); } if( njob < 2 ) { seq = AllocateCharMtx( 2, nlenmax*1+1 ); name = AllocateCharMtx( 2, B+1 ); nlen = AllocateIntVec( 2 ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); gappick0( seq[1], seq[0] ); writeData_pointer( stdout, njob, name, nlen, seq+1 ); reporterr( "Warning: Only %d sequence found.\n", njob ); FreeCharMtx( seq ); FreeCharMtx( name ); free( nlen ); exit( 0 ); } if( specificityconsideration != 0.0 && nlenmax) { if( nlenmax > 100000 ) { reporterr( "\n" ); reporterr( "Too long to apply --allowshift or --unalignlevel>0\n" ); reporterr( "Please use the normal mode.\n" ); reporterr( "Please also note that MAFFT does not assume genomic rearrangements.\n" ); reporterr( "\n" ); exit( 1 ); } } #if !defined(mingw) && !defined(_MSC_VER) setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou. #endif if( subalignment ) { readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem ); reporterr( "nsubalignments = %d\n", nsubalignments ); reporterr( "maxmem = %d\n", maxmem ); subtable = AllocateIntMtx( nsubalignments, maxmem+1 ); insubtable = AllocateIntVec( njob ); preservegaps = AllocateIntVec( njob ); for( i=0; i equivalent to v7.448 free( tmpseq ); } constants( njob, seq ); #if 0 reporterr( "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { reporterr( "Illegal character %c\n", c ); exit( 1 ); } reporterr( "\n" ); // reporterr( "tuplesize = %d, dorp = %c\n", tuplesize, dorp ); if( dorp == 'p' && tuplesize != 6 ) { reporterr( "tuplesize must be 6 for aa sequence\n" ); exit( 1 ); } if( dorp == 'd' && tuplesize != 6 && tuplesize != 10 ) { reporterr( "tuplesize must be 6 or 10 for dna sequence\n" ); exit( 1 ); } if( treein ) { int npickx; treein = check_guidetreefile( &randomseed, &npickx, &maxdistmtxsize ); if( treein == 't' ) { varpairscore( njob, npickx, nlenmax, seq, randomseed ); exit( 1 ); } else if( treein == 'c' ) { compacttree = 1; treein = 0; // use_fft = 0; // kankeinai? // maxdistmtxsize = 5 * 1000 * 1000; // 5GB. ato de kahen ni suru. // maxdistmtxsize = 1.0 * 1000 * 1000 * 1000; // 5GB. ato de kahen ni suru. } else if( treein == 'Y' ) { compacttree = 4; // youngest linkage, 3 ha tbfast de tsukaunode ichiou sakeru treein = 0; // use_fft = 0; // kankeinai? } else if( treein == 'S' || treein == 'C' ) { compacttree = 2; // 3 ha tbfast de tsukaunode ichiou sakeru treein = 0; // use_fft = 0; // kankeinai? } else if( treein == 'a' ) { // reporterr( "Compute pairwise scores\n" ); if( njob > 200000 ) { reporterr( "Chain?\n" ); treein = 's'; nguidetree = 1; } else if( njob < 100 || 't' == varpairscore( njob, npickx, nlenmax, seq, randomseed ) ) { if( treein == 'c' ) exit( 1 ); reporterr( "Tree!\n" ); treein = 0; nguidetree = 2; } else { reporterr( "Chain!\n" ); treein = 's'; nguidetree = 1; } } else if ( treein != 0 ) // auto no toki arieru nguidetree = 1; } # if 0 // tameshini if( sueff_global < 0.0001 || compacttree == 2 ) { nthread = 0; nthreadtb = 0; } #endif // if( njob > 10000 ) nthreadtb = 0; if( njob > 20000 ) nthreadtb = 0; // 2018/Jan. Hairetsu ga ooi toki // 1. topolorder_lessargs no stack ga tarinakunaru // 2. localcopy no tame kouritsu warui if( compacttree == 1 ) { if( maxdistmtxsize > (double)njob * (njob-1) * sizeof( double ) / 2 ) { reporterr( "Use conventional tree.\n" ); compacttree = 0; } } if( !treein ) { reporterr( "\n\nMaking a distance matrix ..\n" ); if( callback && callback( 0, 0, "Distance matrix" ) ) goto chudan; tmpseq = AllocateCharVec( nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); if( !compacttree ) mtx = AllocateFloatHalfMtx( njob ); if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize ); else tsize = (int)pow( 6, 6 ); if( dorp == 'd' && tuplesize == 6 ) { lenfaca = D6LENFACA; lenfacb = D6LENFACB; lenfacc = D6LENFACC; lenfacd = D6LENFACD; } else if( dorp == 'd' && tuplesize == 10 ) { lenfaca = D10LENFACA; lenfacb = D10LENFACB; lenfacc = D10LENFACC; lenfacd = D10LENFACD; } else { lenfaca = PLENFACA; lenfacb = PLENFACB; lenfacc = PLENFACC; lenfacd = PLENFACD; } maxl = 0; for( i=0; i maxl ) maxl = nogaplen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); // makepointtable_nuc( pointt[i], grpseq ); // makepointtable_nuc_octet( pointt[i], grpseq ); if( tuplesize == 10 ) makepointtable_nuc_dectet( pointt[i], grpseq ); else if( tuplesize == 6 ) makepointtable_nuc( pointt[i], grpseq ); else { reporterr( "tuplesize=%d: not supported\n", tuplesize ); exit( 1 ); } } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } if( nunknown ) reporterr( "\nThere are %d ambiguous characters.\n", nunknown ); if( compacttree ) { reporterr( "Compact tree, step 1\n" ); mindistfrom = (int *)calloc( njob, sizeof( int ) ); mindist = (double *)calloc( njob, sizeof( double ) ); selfscore = (int *)calloc( njob, sizeof( int ) ); partmtx = preparepartmtx( njob ); for( i=0; i 0 ) { compactdistmtxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; double **mindistthread; int **mindistfromthread; if( compacttree == 4 ) jobpos = 0; else jobpos = njob-1; targ = calloc( nthreadpair, sizeof( compactdistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); mindistthread = AllocateDoubleMtx( nthreadpair, njob ); mindistfromthread = AllocateIntMtx( nthreadpair, njob ); pthread_mutex_init( &mutex, NULL ); for( j=0; j 0 ) { distancematrixthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos = 0; targ = calloc( nthreadpair, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i nogaplen[j] ) { longer=(double)nogaplen[i]; shorter=(double)nogaplen[j]; } else { longer=(double)nogaplen[j]; shorter=(double)nogaplen[i]; } // if( tuplesize == 6 ) lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // else // lenfac = 1.0; // reporterr( "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter ); bunbo = MIN( mtx[i][0], mtx[j][0] ); if( bunbo == 0.0 ) mtx[i][j-i] = 2.0; // 2013/Oct/17 -> 2bai else mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac * 2.0; // 2013/Oct/17 -> 2bai // reporterr( "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo ); } } if( disopt ) { for( i=0; i iguidetree loop nai ni idou if( distout ) { hat2p = fopen( "hat2", "w" ); WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx ); fclose( hat2p ); } #endif } #if 0 else { reporterr( "Loading 'hat2' ... " ); prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_double( prep, njob, name, mtx ); // name chuui fclose( prep ); reporterr( "done.\n" ); } #endif // reporterr( "after computing distance matrix," ); // use_getrusage(); if( nadd && keeplength ) { originalgaps = (char *)calloc( nlenmax+1, sizeof( char) ); recordoriginalgaps( originalgaps, njob-nadd, seq ); if( mapout ) { addbk = (char **)calloc( nadd+1, sizeof( char * ) ); for( i=0; i= njob ) // check sumi { reporterr( "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { reporterr( "\n" ); reporterr( "###############################################################################\n" ); reporterr( "# ERROR!\n" ); reporterr( "# Subalignment %d must be aligned.\n", i+1 ); reporterr( "# Please check the alignment lengths of following sequences.\n" ); reporterr( "#\n" ); reporterr( "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); reporterr( "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); reporterr( "#\n" ); reporterr( "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { reporterr( "#\n" ); reporterr( "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); reporterr( "# In this case, the rule of numbering is:\n" ); reporterr( "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); reporterr( "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } reporterr( "###############################################################################\n" ); reporterr( "\n" ); goto chudan; // TEST!! //exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou reporterr( "\n" ); reporterr( "###############################################################################\n" ); reporterr( "# ERROR!\n" ); reporterr( "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 ); reporterr( "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); reporterr( "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); reporterr( "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); reporterr( "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { reporterr( "#\n" ); reporterr( "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); reporterr( "# In this case, the rule of numbering is:\n" ); reporterr( "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); reporterr( "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } reporterr( "############################################################################### \n" ); reporterr( "\n" ); goto chudan; // TEST!! //exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %c\n\n", i, mergeoralign[i] ); } #endif for( i=0; i iguidetree loop no soto he FreeIntMtx( subtable ); free( insubtable ); for( i=0; i 0 && nadd == 0 ) // nthreadpair ha minai { treebasethread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t treecond; int *fftlog; int nrun; int nthread_yoyu; nthread_yoyu = nthreadtb * 1; nrun = 0; jobpos = 0; targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); fftlog = AllocateIntVec( njob ); handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &treecond, NULL ); for( i=0; i0 && nadd==0 ) if( calcpairdists ) if( calcpairdists && !compacttree ) #else // if( 0 && nadd==0 ) if( calcpairdists ) // zettai nai if( calcpairdists && !compacttree ) #endif { reporterr( "Making a distance matrix from msa.. \n" ); skiptable = AllocateIntMtx( njob, 0 ); makeskiptable( njob, skiptable, bseq ); // allocate suru. #ifdef enablemultithread if( nthreadpair > 0 ) { msadistmtxthread_arg_t *targ; Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthreadpair, sizeof( msadistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 0 ) { msacompactdistmtxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; double **mindistthread; int **mindistfromthread; mindistthread = AllocateDoubleMtx( nthreadpair, njob ); mindistfromthread = AllocateIntMtx( nthreadpair, njob ); targ = calloc( nthreadpair, sizeof( msacompactdistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); if( compacttree == 4 ) jobpos = 0; else jobpos = njob-1; for( i=0; i lgui ) { reporterr( "alignmentlength = %d, gui allocated %d", ien, lgui ); val = GUI_LENGTHOVER; } else { for( i=0; i 0 ) { reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted ); if( mapout ) reporterr( "The deleted letters are shown in the (filename).map file.\n" ); else reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" ); } if( subalignment ) { FreeIntMtx( subtable ); free( insubtable ); for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': disopt = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { int ktuple; int i, j; FILE *infp; FILE *hat2p; FILE *hat3p; char **seq = NULL; // by D.Mathog char **seq1; static char **name; static char **name1; static int nlen1[M]; double **mtx; double **mtx2; static int nlen[M]; char b[B]; double max; char com[1000]; int opt[M]; int res; char *home; char queryfile[B]; char datafile[B]; char fastafile[B]; char hat2file[B]; int pid = (int)getpid(); LocalHom **localhomtable, *tmpptr; #if 1 home = getenv( "HOME" ); #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ home = NULL; #endif #if DEBUG if( home ) fprintf( stderr, "home = %s\n", home ); #endif if( !home ) home = ""; sprintf( queryfile, "%s/tmp/query-%d", home, pid ); sprintf( datafile, "%s/tmp/data-%d", home, pid ); sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); sprintf( hat2file, "hat2-%d", pid ); arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( infp, &njob, &nlenmax ); #else dorp = NOTSPECIFIED; getnumlen( infp ); #endif if( dorp == 'd' ) { scoremtx = -1; pamN = NOTSPECIFIED; } else { nblosum = 62; scoremtx = 1; } constants( njob, seq ); rewind( infp ); name = AllocateCharMtx( njob, B+1 ); name1 = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); seq1 = AllocateCharMtx( 2, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i %s", queryfile, datafile, fastafile ); else sprintf( com, "blastall -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7( hat2p, mtx[i], i, name1, localhomtable[i] ); fclose( hat2p ); #if 0 for( j=0; jnext ) { if( tmpptr->opt == -1.0 ) continue; // fprintf( stderr, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } } #endif if( res < njob-i+i%10 ) { fprintf( stderr, "WARNING: count (blast) = %d < %d\n", res, njob-i+i%10 ); } #if 0 { int ii, jj; if( i < njob-1 ) for( jj=i; jj j ) continue; if( mtx[j][i] > mtx[i][j] ) continue; for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } fclose( hat3p ); #endif for( i=0; i %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); else sprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] ); fclose( hat2p ); if( res < njob - i ) { fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); exit( 1 ); } if( i == 0 ) for( j=0; j %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); } } } for( i=0; iR = result->I = 0.0; result++; } } #if 0 // by D.Mathog static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed ) { int i; for( i=st; i= 0 ) result->R += incr * score[n]; #if 0 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R ); #endif } } static void seq_vec_3( Fukusosuu **result, double incr, char *seq ) { int i; int n; for( i=0; *seq; i++ ) { n = amino_n[(int)*seq++]; if( n < n20or4or2 && n >= 0 ) result[n][i].R += incr; } } static void seq_vec_5( Fukusosuu *result, double *score1, double *score2, double incr, char *seq ) { int n; for( ; *seq; result++ ) { n = amino_n[(int)*seq++]; if( n > 20 ) continue; result->R += incr * score1[n]; result->I += incr * score2[n]; #if 0 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R ); #endif } } static void seq_vec_4( Fukusosuu *result, double incr, char *seq ) { char s; for( ; *seq; result++ ) { s = *seq++; if( s == 'a' ) result->R += incr; else if( s == 't' ) result->R -= incr; else if( s == 'g' ) result->I += incr; else if( s == 'c' ) result->I -= incr; } } #if 0 // by D.Mathog static void seq_vec( Fukusosuu *result, char query, double incr, char *seq ) { #if 0 int bk = nlen; #endif while( *seq ) { if( *seq++ == query ) result->R += incr; result++; #if 0 fprintf( stderr, "i = %d result->R = %f\n", bk-nlen, (result-1)->R ); #endif } } static int checkRepeat( int num, int *cutpos ) { int tmp, buf; buf = *cutpos; while( num-- ) { if( ( tmp = *cutpos++ ) < buf ) return( 1 ); buf = tmp; } return( 0 ); } static int segcmp( void *ptr1, void *ptr2 ) { int diff; Segment **seg1 = (Segment **)ptr1; Segment **seg2 = (Segment **)ptr2; #if 0 return( (*seg1)->center - (*seg2)->center ); #else diff = (*seg1)->center - (*seg2)->center; if( diff ) return( diff ); diff = (*seg1)->start - (*seg2)->start; if( diff ) return( diff ); diff = (*seg1)->end - (*seg2)->end; if( diff ) return( diff ); fprintf( stderr, "USE STABLE SORT !!\n" ); exit( 1 ); return( 0 ); #endif } #endif static void mymergesort( int first, int last, Segment **seg ) { int middle; static TLS int i, j, k, p; static TLS int allo = 0; static TLS Segment **work = NULL; if( seg == NULL ) { if( work ) free( work ); work = NULL; allo = 0; return; } if( last > allo ) { allo = last; if( work ) free( work ); work = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) ); } if( first < last ) { middle = ( first + last ) / 2; mymergesort( first, middle, seg ); mymergesort( middle+1, last, seg ); p = 0; for( i=first; i<=middle; i++ ) work[p++] = seg[i]; i = middle + 1; j = 0; k = first; while( i <= last && j < p ) { if( work[j]->center <= seg[i]->center ) seg[k++] = work[j++]; else seg[k++] = seg[i++]; } while( j < p ) seg[k++] = work[j++]; } } double Fgetlag( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen ) { int i, j, k, l, m; int nlen, nlen2, nlen4; static TLS int crossscoresize = 0; static TLS char **tmpseq1 = NULL; static TLS char **tmpseq2 = NULL; static TLS char **tmpptr1 = NULL; static TLS char **tmpptr2 = NULL; static TLS char **tmpres1 = NULL; static TLS char **tmpres2 = NULL; static TLS char **result1 = NULL; static TLS char **result2 = NULL; #if RND static TLS char **rndseq1 = NULL; static TLS char **rndseq2 = NULL; #endif static TLS Fukusosuu **seqVector1 = NULL; static TLS Fukusosuu **seqVector2 = NULL; static TLS Fukusosuu **naiseki = NULL; static TLS Fukusosuu *naisekiNoWa = NULL; static TLS double *soukan = NULL; static TLS double **crossscore = NULL; int nlentmp; static TLS int *kouho = NULL; static TLS Segment *segment = NULL; static TLS Segment *segment1 = NULL; static TLS Segment *segment2 = NULL; static TLS Segment **sortedseg1 = NULL; static TLS Segment **sortedseg2 = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; static TLS int localalloclen = 0; int lag; int tmpint; int count, count0; int len1, len2; int totallen; double dumdb = 0.0; int headgp, tailgp; len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( !localalloclen ) { kouho = AllocateIntVec( NKOUHO ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 4; else if( fftscore == 1 ) n20or4or2 = 2; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i /dev/tty" ); #endif if( fftkeika ) fprintf( stderr, " FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; m /dev/tty" ); #if 0 fftfp = fopen( "list.plot", "w" ); fprintf( fftfp, "plot 'frt'\n pause +1" ); fclose( fftfp ); system( "/usr/bin/gnuplot list.plot" ); #endif #endif getKouho( kouho, NKOUHO, soukan, nlen ); #if 0 for( i=0; iCandidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, ">Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fprintf( fftfp, ">\n", k+1, lag ); fclose( fftfp ); #endif tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); if( tmpint == 0 ) break; // 060430 iinoka ? while( tmpint-- > 0 ) { if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "Goukaku=%dko\n", tmpint ); fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 fprintf( stderr, "done. (%d anchors)\r", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stdout, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( crossscoresize < count+2 ) { crossscoresize = count+2; fprintf( stderr, "####################################################################################################################################allocating crossscore, size = %d\n", crossscoresize ); if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if DEBUG fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { fprintf( stderr, "REPEAT!? \n" ); if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); fprintf( stderr, "done. (%d anchors)\n", count ); #endif } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l alloclen ) ErrorExit( "LENGTH OVER in Falign\n " ); for( j=0; j= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif result1 = AllocateCharMtx( clus1, alloclen ); result2 = AllocateCharMtx( clus2, alloclen ); tmpres1 = AllocateCharMtx( clus1, alloclen ); tmpres2 = AllocateCharMtx( clus2, alloclen ); sgap1 = AllocateCharVec( clus1 ); egap1 = AllocateCharVec( clus1 ); sgap2 = AllocateCharVec( clus2 ); egap2 = AllocateCharVec( clus2 ); tmpptr1 = calloc( clus1, sizeof( char * ) ); tmpptr2 = calloc( clus2, sizeof( char * ) ); tmpseq1 = AllocateCharMtx( clus1, nlen ); tmpseq2 = AllocateCharMtx( clus2, nlen ); #if RND rndseq1 = AllocateCharMtx( clus1, nlen ); rndseq2 = AllocateCharMtx( clus2, nlen ); for( i=0; i /dev/tty" ); #endif if( !kobetsubunkatsu ) { if( fftkeika ) fprintf( stderr, " FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; mCandidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, ">Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fprintf( fftfp, ">\n", k+1, lag ); fclose( fftfp ); #endif // fprintf( stderr, "lag = %d\n", lag ); tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); // if( lag == -50 ) exit( 1 ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); if( tmpint == 0 ) break; // 060430 iinoka ? while( tmpint-- > 0 ) { #if 0 if( segment[count].end - segment[count].start < fftWinSize ) { count++; continue; } #endif if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; } } #if 0 if( !kobetsubunkatsu && fftkeika ) fprintf( stderr, "%d anchors found\r", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stderr, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 if( fftkeika ) fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if 0 fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l%d of GROUP1\n", j ); fprintf( stdout, "%s\n", tmpres1[j] ); } for( j=0; j%d of GROUP2\n", j ); fprintf( stdout, "%s\n", tmpres2[j] ); } fflush( stdout ); #endif switch( alg ) { case( 'a' ): totalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen ); break; case( 'M' ): if( scoringmatrices ) // called by tditeration.c totalscore += MSalignmm_variousdist( NULL, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); else totalscore += MSalignmm( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp, NULL, NULL, NULL, 0.0, 0.0 ); // totalscore += MSalignmm( n_dis_consweight_multi, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); break; case( 'd' ): if( clus1 == 1 && clus2 == 1 ) { totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp ); } else { if( scoringmatrices ) // called by tditeration.c { totalscore += D__align_variousdist( whichmtx, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, 0, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); } else totalscore += D__align( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, 0, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { if( codonpos || codonscore ) { // reporterr( "calling G__align11psg\n" ); totalscore += G__align11psg( codonscoremtx, n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp, gstart+cut1[i], gend+cut1[i] ); } else totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp ); } else { if( codonpos ) { reporterr( "\n\ncodonpos will be soon supported for a reference MSA. For now, use a single sequence as reference.\n\n\n" ); exit( 1 ); } if( scoringmatrices ) // called by tditeration.c { totalscore += A__align_variousdist( whichmtx, scoringmatrices, NULL, penalty, penalty_ex, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, 0, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); } else totalscore += A__align( n_dynamicmtx, penalty, penalty_ex, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, 0, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); } break; default: fprintf( stderr, "alg = %c\n", alg ); ErrorExit( "ERROR IN SOURCE FILE Falign.c" ); break; } #ifdef enablemultithread if( chudanres && *chudanres ) { // fprintf( stderr, "\n\n## CHUUDAN!!! at Falign_localhom\n" ); // Added 2021/Jul/25. FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); free( sgap1 ); free( egap1 ); free( sgap2 ); free( egap2 ); free( tmpptr1 ); free( tmpptr2 ); #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif return( -1.0 ); } #endif nlen = strlen( tmpres1[0] ); if( totallen + nlen > alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j= n / 2 ) break; } if( o >= n/2 ) c[(j-start)*d] = 'o'; else c[(j-start)*d] = '-'; } c[(j-start)*d] = 0; reporterr( "c=%s\n", c ); l = 0; for( j=start; j!=end; j+=d ) if( c[j] == 'o' ) l++; reporterr( "l=%d\n", l ); free( c ); return( l ); } static int nogapmargin( int n, char **s, int start, int end, int m ) { int i, j, l, d; int minl; if( start < end ) d = 1; else d = -1; // reporterr( "\nin nogapmargin, d=%d\n", d ); minl = (end-start)*d; for( i=0; im ) break; } // reporterr( "i=%d, l=%d, j=%d\n", i, l, j ); if( (j-start)*d < minl ) minl = (j-start)*d; } minl += 1; // reporterr( "minl=%d, so returning %d\n", minl, start+minl*d ); return( start + minl*d ); } #endif double Falign_givenanchors( ExtAnch *pairanch, int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int clus1, int clus2, int alloclen, int *fftlog ) { int i, j; int nlen, nlen2, nlen4; static TLS int prevalloclen = 0; //static TLS int crossscoresize = 0; //static TLS char **tmpseq1 = NULL; //static TLS char **tmpseq2 = NULL; //static TLS char **tmpptr1 = NULL; //static TLS char **tmpptr2 = NULL; static TLS char **tmpres1 = NULL; static TLS char **tmpres2 = NULL; static TLS char **result1 = NULL; static TLS char **result2 = NULL; #if RND //static TLS char **rndseq1 = NULL; //static TLS char **rndseq2 = NULL; #endif //static TLS Fukusosuu **seqVector1 = NULL; //static TLS Fukusosuu **seqVector2 = NULL; //static TLS Fukusosuu **naiseki = NULL; //static TLS Fukusosuu *naisekiNoWa = NULL; //static TLS double *soukan = NULL; //static TLS double **crossscore = NULL; int nlentmp; //static TLS int *kouho = NULL; //static TLS Segment *segment = NULL; //static TLS Segment *segment1 = NULL; //static TLS Segment *segment2 = NULL; //static TLS Segment **sortedseg1 = NULL; //static TLS Segment **sortedseg2 = NULL; static TLS int *alignorcopy = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; static TLS char *sgap1, *egap1, *sgap2, *egap2; static TLS int localalloclen = 0; // int lag; // int tmpint; int count, count0; int len1, len2; int totallen; double totalscore; // int nkouho = 0; int headgp, tailgp; // double dumfl = 0.0; int orilen1, orilen2; int cutadd; int starttermcut1, starttermcut2, endtermcut1, endtermcut2; double marginfac1, marginfac2; if( seq1 == NULL ) { if( result1 ) { // fprintf( stderr, "### Freeing localarrays in Falign\n" ); localalloclen = 0; prevalloclen = 0; //crossscoresize = 0; mymergesort( 0, 0, NULL ); //alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL ); //fft( 0, NULL, 1 ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); A__align_variousdist( NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); D__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); //blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL ); //if( crossscore ) FreeDoubleMtx( crossscore ); //crossscore = NULL; // reallocate sareru kanousei ga arunode. FreeCharMtx( result1 ); result1 = NULL; FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); //FreeCharMtx( tmpseq1 ); //FreeCharMtx( tmpseq2 ); free( sgap1 ); free( egap1 ); free( sgap2 ); free( egap2 ); //free( kouho ); free( alignorcopy ); free( cut1 ); free( cut2 ); //free( tmpptr1 ); //free( tmpptr2 ); //free( segment ); //free( segment1 ); //free( segment2 ); //free( sortedseg1 ); //free( sortedseg2 ); //if( !kobetsubunkatsu ) //{ // FreeFukusosuuMtx ( seqVector1 ); // FreeFukusosuuMtx ( seqVector2 ); // FreeFukusosuuVec( naisekiNoWa ); // FreeFukusosuuMtx( naiseki ); // FreeDoubleVec( soukan ); //} } else { // fprintf( stderr, "Did not allocate localarrays in Falign\n" ); } return( 0.0 ); } len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if 0 fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( prevalloclen != alloclen ) // Falign_noudp mo kaeru { if( prevalloclen ) { FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); } // fprintf( stderr, "\n\n\nreallocating ...\n" ); result1 = AllocateCharMtx( njob, alloclen ); // ato de loca nseq ni kakihaosu result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); prevalloclen = alloclen; } if( !localalloclen ) { sgap1 = AllocateCharVec( njob ); egap1 = AllocateCharVec( njob ); sgap2 = AllocateCharVec( njob ); egap2 = AllocateCharVec( njob ); //kouho = AllocateIntVec( NKOUHO_LONG ); alignorcopy = AllocateIntVec( MAXSEG ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); //tmpptr1 = AllocateCharMtx( njob, 0 ); //tmpptr2 = AllocateCharMtx( njob, 0 ); //segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); //segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); //segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); //sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); //sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); //if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) // ErrorExit( "Allocation error\n" ); //if ( scoremtx == -1 ) n20or4or2 = 1; //else if( fftscore ) n20or4or2 = 1; //else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 //if( !kobetsubunkatsu ) //{ // FreeFukusosuuMtx ( seqVector1 ); // FreeFukusosuuMtx ( seqVector2 ); // FreeFukusosuuVec( naisekiNoWa ); // FreeFukusosuuMtx( naiseki ); // FreeDoubleVec( soukan ); //} //FreeCharMtx( tmpseq1 ); //FreeCharMtx( tmpseq2 ); #endif #if RND //FreeCharMtx( rndseq1 ); //FreeCharMtx( rndseq2 ); #endif } //tmpseq1 = AllocateCharMtx( njob, nlen ); //tmpseq2 = AllocateCharMtx( njob, nlen ); //if( !kobetsubunkatsu ) //{ // naisekiNoWa = AllocateFukusosuuVec( nlen ); // naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); // seqVector1 = AllocateFukusosuuMtx( n20or4or2, nlen+1 ); // seqVector2 = AllocateFukusosuuMtx( n20or4or2, nlen+1 ); // soukan = AllocateDoubleVec( nlen+1 ); //} #if RND //rndseq1 = AllocateCharMtx( njob, nlen ); //rndseq2 = AllocateCharMtx( njob, nlen ); //for( i=0; i MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); // fprintf( stderr, "##### k=%d / %d\n", k, maxk ); // if( tmpint == 0 ) break; // 060430 iinoka ? // 090530 yameta while( tmpint-- > 0 ) { #if 0 if( segment[count].end - segment[count].start < fftWinSize ) { count++; continue; } #endif if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "##### k=%d / %d\n", k, maxk ); fprintf( stderr, "anchor %d, score = %f\n", count, segment1[count].score ); fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 if( !kobetsubunkatsu ) if( fftkeika ) fprintf( stderr, "done. (%d anchors) ", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stderr, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( count < 5000 ) { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 if( fftkeika ) fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if 0 fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } } else { fprintf( stderr, "\nMany anchors were found. The upper-level DP is skipped.\n\n" ); cut1[0] = 0; cut2[0] = 0; count0 = 0; for( i=0; icenter, sortedseg1[i]->pair->center ); if( sortedseg1[i]->center > cut1[count0] && sortedseg1[i]->pair->center > cut2[count0] ) { count0++; cut1[count0] = sortedseg1[i]->center; cut2[count0] = sortedseg1[i]->pair->center; } else { if( i && sortedseg1[i]->score > sortedseg1[i-1]->score ) { if( sortedseg1[i]->center > cut1[count0-1] && sortedseg1[i]->pair->center > cut2[count0-1] ) { cut1[count0] = sortedseg1[i]->center; cut2[count0] = sortedseg1[i]->pair->center; } else { // count0--; } } } } // if( count-count0 ) // fprintf( stderr, "%d anchors unused\n", count-count0 ); cut1[count0+1] = len1; cut2[count0+1] = len2; count = count0 + 2; count0 = count; } } //uwagaki! #endif marginfac1 = 1.0 + estimategapfreq( clus1, seq1 ); marginfac2 = 1.0 + estimategapfreq( clus2, seq2 ); starttermcut1 = starttermcut2 = 0; endtermcut1 = endtermcut2 = 0; // reporterr( "marginfac1=%f\n", marginfac1 ); // reporterr( "marginfac2=%f\n", marginfac2 ); // reporterr( "length1,length2=%d,%d\n", len1, len2 ); // reporterr( "pairanch when uwagaki: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); // reporterr( "pairanch when uwagaki: i=%d, j=%d\n", pairanch[0].i, pairanch[0].j ); count = count0 = 0; cut1[0] = 0; cut2[0] = 0; alignorcopy[0] = 'a'; // while( pairanch[count].i == 0 && pairanch[count].j == 0 ) // ato de kentou while( pairanch[count0].i > -1 ) { if( pairanch[count0].starti == -1 ) { count0++; continue; } if( count+2 > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); #if 1 // mattan no tansaku hann'i wo seigen if( count == 0 ) { // if( pairanch[count0].starti - pairanch[count0].startj > TERMINALSEGMENTLENGTH ) // you kentou // nogaplen1 = estimatenogaplen( clus1, seq1, pairanch[count0].starti, 0 ); // nogaplen2 = estimatenogaplen( clus2, seq2, pairanch[count0].startj, 0 ); if( pairanch[count0].starti > terminalmargin(pairanch[count0].startj,marginfac1) ) { // alignorcopy[1] = 'A'; // reporterr( "check 1, because starti=%d > startj=%d -> %d (clus1=%d)\n", pairanch[count0].starti, pairanch[count0].startj, terminalmargin(pairanch[count0].startj,marginfac1), clus1 ); cutadd = pairanch[count0].starti - terminalmargin(pairanch[count0].startj,marginfac1); // reporterr( "cutadd(1)=%d\n", cutadd ); // if( 1 || cutadd > TERMINALMARGIN(0) ) // iranai { cut1[1] = cutadd; cut2[1] = 0; count += 1; alignorcopy[1] = 'A'; starttermcut1 = 1; } } else if( pairanch[count0].startj > terminalmargin(pairanch[count0].starti, marginfac2) ) { // alignorcopy[1] = 'A'; // reporterr( "check 2, because startj=%d > starti=%d -> %d (clus2=%d)\n", pairanch[count0].startj, pairanch[count0].starti, terminalmargin(pairanch[count0].starti,marginfac2), clus2 ); cutadd = pairanch[count0].startj - terminalmargin( pairanch[count0].starti, marginfac2 ); // reporterr( "cutadd(2)=%d\n", cutadd ); { cut1[1] = 0; cut2[1] = cutadd; count += 1; alignorcopy[1] = 'A'; starttermcut2 = 1; } } } #endif #if 1 // reporterr( "pairanch when uwagaki: %d:%d\n", pairanch[count0].starti, pairanch[count0].startj ); cut1[count+1] = pairanch[count0].starti; cut2[count+1] = pairanch[count0].startj; alignorcopy[count+1] = 'c'; count += 1; #if 1 if( pairanch[count0].endi - cut1[count] == pairanch[count0].endj - cut2[count] ) while( pairanch[count0].endi+1 - cut1[count] > 100 && pairanch[count0].endj+1 - cut2[count] > 100 ) { reporterr( "added an anchor, because the length is %d,%d > 100\n", pairanch[count0].endi+1 - cut1[count], pairanch[count0].endj+1 - cut2[count] ); cut1[count+1] = cut1[count] + 100; cut2[count+1] = cut2[count] + 100; alignorcopy[count+1] = 'c'; count += 1; } #endif cut1[count+1] = pairanch[count0].endi+1; cut2[count+1] = pairanch[count0].endj+1; alignorcopy[count+1] = 'a'; // reporterr( "\n###cut1 at %d / %d\n", cut1[count+1], len1 ); // reporterr( "###cut2 at %d / %d\n", cut2[count+1], len2 ); // reporterr( "sa1=%d, sa2=%d\n", cut1[count+1]-cut1[count], cut2[count+1]-cut2[count] ); count += 1; count0++; } #if 1 // mattan no tansaku hanni wo seigen alignorcopy[count] = 'a'; // if( count > 1 && (len1-cut1[count]) > (len2-cut2[count]) + 2*TERMINALSEGMENTLENGTH ) // 2 ha tekitou if( count > 1 && (len1-cut1[count]) > terminalmargin(len2-cut2[count],marginfac1) ) { // reporterr( "last\n" ); // alignorcopy[count] = 'A'; // mae no wo uwagaki //reporterr( "insert one anchor to restrict terminal gap length, 1, cut1[count]=%d, cut2[count]=%d\n", cut1[count], cut2[count] ); //alignorcopy[count] = 'A'; // mae no wo uwagaki // cut1[count+1] = cut1[count] + TERMINALSEGMENTLENGTH; // cut1[count+1] = cut1[count] + (len2-cut2[count]) + TERMINALSEGMENTLENGTH; cutadd = len1 - 1 - ( (len1-cut1[count]) - terminalmargin(len2-cut2[count], marginfac1) ); // wakarinikuikedo // if( 1 || len1-1 - cutadd > TERMINALMARGIN(0) ) { alignorcopy[count] = 'A'; // mae no wo uwagaki cut1[count+1] = cutadd; cut2[count+1] = len2; alignorcopy[count+1] = 'a'; cut1[count+2] = len1; cut2[count+2] = len2; alignorcopy[count+2] = 'c'; // tsukawanai count += 1; endtermcut1 = 1; } } // else if( count > 1 && (len2-cut2[count]) > (len1-cut1[count]) + 2*TERMINALSEGMENTLENGTH ) // 2 ha tekitou else if( count > 1 && (len2-cut2[count]) > terminalmargin(len1-cut1[count],marginfac2) ) { // reporterr( "last\n" ); // alignorcopy[count] = 'A'; // mae no wo uwagaki //reporterr( "insert one anchor to restrict terminal gap length, 2, cut1[count]=%d, cut2[count]=%d\n", cut1[count], cut2[count] ); //alignorcopy[count] = 'A'; // mae no wo uwagaki cutadd = len2 - 1 - ( (len2-cut2[count]) - terminalmargin(len1-cut1[count], marginfac2) ); // if( 1 || len2-1 - cutadd > TERMINALMARGIN(0) ) // iranai { alignorcopy[count] = 'A'; // mae no wo uwagaki cut1[count+1] = len1; cut2[count+1] = cutadd; alignorcopy[count+1] = 'a'; cut1[count+2] = len1; cut2[count+2] = len2; alignorcopy[count+2] = 'c'; // tsukawanai count += 1; endtermcut2 = 1; } } #endif if( cut1[count] != len1 || cut2[count] != len2 ) { cut1[count+1] = len1; cut2[count+1] = len2; alignorcopy[count+1] = 'c'; // tsukawanai kedo count += 1; } count += 1; #if 0 for( i=0; i%d of GROUP1\n", j ); fprintf( stdout, "%s\n", tmpres1[j] ); } for( j=0; j%d of GROUP2\n", j ); fprintf( stdout, "%s\n", tmpres2[j] ); } fflush( stdout ); #endif // reporterr( "i=%d, orilen1=%d, len1=%d, strlen(tmpseq1[0])=%d\n", i, orilen1, len1, strlen(tmpres1[0]) ); // if( i%2 == 1 && orilen1==len1 && orilen1==orilen2 && orilen1==strlen( tmpres1[0] ) ) // zenchou itchi no toki nomi // if( 0 && i%2 == 1 && orilen1==orilen2 && orilen1==strlen( tmpres1[0] ) && !strcmp( tmpres1[0], tmpres2[0] ) ) // ato de fukkatsu saseru if( alignorcopy[i] == 'c' && orilen1==orilen2 && orilen1==strlen( tmpres1[0] ) && !strcmp( tmpres1[0], tmpres2[0] ) ) // ato de fukkatsu saseru { // checklength = 1; #if 0 reporterr( "\ncopying\n" ); for( j=0; j alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; jgroup1-%d\n%100.100s\n", j, result1[j] ); } fprintf( stderr, "- - - - - - - - - - -\n" ); for( j=0; jgroup2-%d\n%100.100s\n", j, result2[j] ); } // if( clus1 == 1 && clus2 == 5 ) exit( 1 ); #endif return( totalscore ); } /* sakujo wo kentou (2010/10/05) */ double Falign_udpari_long( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int clus1, int clus2, int alloclen, int *fftlog ) { int i, j, k, l, m, maxk; int nlen, nlen2, nlen4; static TLS int crossscoresize = 0; char **tmpseq1 = NULL; char **tmpseq2 = NULL; char **tmpptr1 = NULL; char **tmpptr2 = NULL; char **tmpres1 = NULL; char **tmpres2 = NULL; char **result1 = NULL; char **result2 = NULL; #if RND char **rndseq1 = NULL; char **rndseq2 = NULL; #endif static TLS Fukusosuu **seqVector1 = NULL; static TLS Fukusosuu **seqVector2 = NULL; static TLS Fukusosuu **naiseki = NULL; static TLS Fukusosuu *naisekiNoWa = NULL; static TLS double *soukan = NULL; static TLS double **crossscore = NULL; int nlentmp; static TLS int *kouho = NULL; static TLS Segment *segment = NULL; static TLS Segment *segment1 = NULL; static TLS Segment *segment2 = NULL; static TLS Segment **sortedseg1 = NULL; static TLS Segment **sortedseg2 = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; char *sgap1, *egap1, *sgap2, *egap2; static TLS int localalloclen = 0; int lag; int tmpint; int count, count0; int len1, len2; int totallen; double totalscore; int nkouho = 0; int headgp, tailgp; // double dumfl = 0.0; if( seq1 == NULL ) { if( kouho ) { // fprintf( stderr, "### Freeing localarrays in Falign\n" ); localalloclen = 0; crossscoresize = 0; mymergesort( 0, 0, NULL ); alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL ); fft( 0, NULL, 1 ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); A__align_variousdist( NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); D__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL ); if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = NULL; // reallocate sareru kanousei ga arunode. free( kouho ); kouho = NULL; free( cut1 ); free( cut2 ); free( segment ); free( segment1 ); free( segment2 ); free( sortedseg1 ); free( sortedseg2 ); if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } } else { // fprintf( stderr, "Did not allocate localarrays in Falign\n" ); } return( 0.0 ); } len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if 0 fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif result1 = AllocateCharMtx( clus1, alloclen ); result2 = AllocateCharMtx( clus2, alloclen ); tmpres1 = AllocateCharMtx( clus1, alloclen ); tmpres2 = AllocateCharMtx( clus2, alloclen ); sgap1 = AllocateCharVec( clus1 ); egap1 = AllocateCharVec( clus1 ); sgap2 = AllocateCharVec( clus2 ); egap2 = AllocateCharVec( clus2 ); tmpseq1 = AllocateCharMtx( clus1, nlen ); tmpseq2 = AllocateCharMtx( clus2, nlen ); tmpptr1 = calloc( clus1, sizeof(char*) ); tmpptr2 = calloc( clus2, sizeof(char*) ); #if RND rndseq1 = AllocateCharMtx( clus1, nlen ); rndseq2 = AllocateCharMtx( clus2, nlen ); for( i=0; i /dev/tty" ); #endif if( !kobetsubunkatsu ) { if( fftkeika ) fprintf( stderr, " FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; mCandidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, ">Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fprintf( fftfp, ">\n", k+1, lag ); fclose( fftfp ); #endif // fprintf( stderr, "lag = %d\n", lag ); tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); // fprintf( stderr, "lag = %d, %d found\n", lag, tmpint ); // if( lag == -50 ) exit( 1 ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); // fprintf( stderr, "##### k=%d / %d\n", k, maxk ); // if( tmpint == 0 ) break; // 060430 iinoka ? // 090530 yameta while( tmpint-- > 0 ) { #if 0 if( segment[count].end - segment[count].start < fftWinSize ) { count++; continue; } #endif if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fprintf( stderr, "##### k=%d / %d\n", k, maxk ); fprintf( stderr, "anchor %d, score = %f\n", count, segment1[count].score ); fprintf( stderr, "in 1 %d\n", segment1[count].center ); fprintf( stderr, "in 2 %d\n", segment2[count].center ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 if( !kobetsubunkatsu ) if( fftkeika ) fprintf( stderr, "done. (%d anchors) ", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fprintf( stderr, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( count < 5000 ) { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 if( fftkeika ) fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if 0 fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } } else { fprintf( stderr, "\nMany anchors were found. The upper-level DP is skipped.\n\n" ); cut1[0] = 0; cut2[0] = 0; count0 = 0; for( i=0; icenter, sortedseg1[i]->pair->center ); if( sortedseg1[i]->center > cut1[count0] && sortedseg1[i]->pair->center > cut2[count0] ) { count0++; cut1[count0] = sortedseg1[i]->center; cut2[count0] = sortedseg1[i]->pair->center; } else { if( i && sortedseg1[i]->score > sortedseg1[i-1]->score ) { if( sortedseg1[i]->center > cut1[count0-1] && sortedseg1[i]->pair->center > cut2[count0-1] ) { cut1[count0] = sortedseg1[i]->center; cut2[count0] = sortedseg1[i]->pair->center; } else { // count0--; } } } } // if( count-count0 ) // fprintf( stderr, "%d anchors unused\n", count-count0 ); cut1[count0+1] = len1; cut2[count0+1] = len2; count = count0 + 2; count0 = count; } } // exit( 0 ); #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l%d of GROUP1\n", j ); fprintf( stdout, "%s\n", tmpres1[j] ); } for( j=0; j%d of GROUP2\n", j ); fprintf( stdout, "%s\n", tmpres2[j] ); } fflush( stdout ); #endif switch( alg ) { case( 'M' ): if( scoringmatrices ) // called by tditeration.c totalscore += MSalignmm_variousdist( NULL, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp ); else totalscore += MSalignmm( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp, NULL, NULL, NULL, 0.0, 0.0 ); // totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp ); // CHUUI!!! break; default: fprintf( stderr, "alg = %c\n", alg ); ErrorExit( "ERROR IN SOURCE FILE Falign.c" ); break; } nlen = strlen( tmpres1[0] ); if( totallen + nlen > alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j fmodel 0->default -1->raw int nblosum; // 45, 50, 62, 80 int kobetsubunkatsu; int bunkatsu; int dorp = NOTSPECIFIED; // arguments de shitei suruto, tbfast -> pairlocalalign no yobidashi de futsugou int niter; int contin; int calledByXced; int devide; int scmtd; int weight; int utree; int tbutree; int refine; int check; double cut; int cooling; int trywarp = 0; int penalty, ppenalty, penaltyLN; int penalty_dist, ppenalty_dist; int RNApenalty, RNAppenalty; int RNApenalty_ex, RNAppenalty_ex; int penalty_ex, ppenalty_ex, penalty_exLN; int penalty_EX, ppenalty_EX; int penalty_OP, ppenalty_OP; int penalty_shift, ppenalty_shift; double penalty_shift_factor = 100.0; int RNAthr, RNApthr; int offset, poffset, offsetLN, offsetFFT; int scoremtx; int TMorJTT; char use_fft; char force_fft; int nevermemsave; int fftscore; int fftWinSize; int fftThreshold; int fftRepeatStop; int fftNoAnchStop; int divWinSize; int divThreshold; int disp; int outgap = 1; char alg; int cnst; int mix; int tbitr; int tbweight; int tbrweight; int disopt; int pamN; int checkC; double geta2; int treemethod; int kimuraR; char *swopt; int fftkeika; int score_check; int makedistmtx; char *inputfile; char *addfile; int addprofile = 1; int rnakozo; char rnaprediction; int scoreout = 0; int spscoreout = 0; int outnumber = 0; int legacygapcost = 0; double minimumweight = 0.0005; int nwildcard = 0; char *signalSM; FILE *prep_g; FILE *trap_g; char **seq_g; char **res_g; double consweight_multi = 1.0; double consweight_rna = 0.0; char RNAscoremtx = 'n'; TLS char *newgapstr = "-"; int nalphabets = 26; int nscoredalphabets = 20; double specificityconsideration = 0.0; int ndistclass = 10; int maxdistclass = -1; int gmsg = 0; double sueff_global = SUEFF; double lenfaca, lenfacb, lenfacc, lenfacd; int maxl, tsize; char codonpos = 0; char codonscore = 0; void initglobalvariables() { commonAlloc1 = 0; commonAlloc2 = 0; commonIP = NULL; commonJP = NULL; nthread = 1; randomseed = 0; parallelizationstrategy = BAATARI1; trywarp = 0; penalty_shift_factor = 100.0; outgap = 1; addprofile = 1; scoreout = 0; outnumber = 0; legacygapcost = 0; consweight_multi = 1.0; consweight_rna = 0.0; RNAscoremtx = 'n'; newgapstr = "-"; nalphabets = 26; nscoredalphabets = 20; specificityconsideration = 0.0; ndistclass = 10; maxdistclass = -1; gmsg = 0; } // for usetmpfile int compacttree = 0; int lhlimit = INT_MAX; int specifictarget = 0; int nadd = 0; // <- static in tbfast.c, pairlocalalign.c int usenaivescoreinsteadofalignmentscore = 0; int nthreadreadlh = 1; mafft-7.505-without-extensions/core/Falign_localhom.c0000644000175000017500000005300214224501721022223 0ustar nileshnilesh#include "mltaln.h" //static FILE *fftfp; static TLS int n20or4or2; #define KEIKA 0 #define RND 0 #define DEBUG 0 extern int fft( int, Fukusosuu *, int ); #if 0 static void generateRndSeq( char *seq, int len ) { while( len-- ) #if 1 *seq++ = (int)( rnd() * n20or4or2 ); #else *seq++ = (int)1; #endif } #endif static void vec_init( Fukusosuu *result, int nlen ) { while( nlen-- ) { result->R = result->I = 0.0; result++; } } #if 0 static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed ) { int i; for( i=st; i= 0 ) result->R += incr * score[n]; #if 0 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R ); #endif } } static void seq_vec_3( Fukusosuu **result, double incr, char *seq ) { int i; int n; for( i=0; *seq; i++ ) { n = amino_n[(unsigned char)*seq++]; if( n < n20or4or2 && n >= 0 ) result[n][i].R += incr; } } #if 0 static void seq_vec( Fukusosuu *result, char query, double incr, char *seq ) { #if 0 int bk = nlen; #endif while( *seq ) { if( *seq++ == query ) result->R += incr; result++; #if 0 fprintf( stderr, "i = %d result->R = %f\n", bk-nlen, (result-1)->R ); #endif } } static int checkRepeat( int num, int *cutpos ) { int tmp, buf; buf = *cutpos; while( num-- ) { if( ( tmp = *cutpos++ ) < buf ) return( 1 ); buf = tmp; } return( 0 ); } static int segcmp( void *ptr1, void *ptr2 ) { int diff; Segment **seg1 = (Segment **)ptr1; Segment **seg2 = (Segment **)ptr2; #if 0 return( (*seg1)->center - (*seg2)->center ); #else diff = (*seg1)->center - (*seg2)->center; if( diff ) return( diff ); diff = (*seg1)->start - (*seg2)->start; if( diff ) return( diff ); diff = (*seg1)->end - (*seg2)->end; if( diff ) return( diff ); fprintf( stderr, "USE STABLE SORT !!\n" ); exit( 1 ); return( 0 ); #endif } #endif static void mymergesort( int first, int last, Segment **seg ) { int middle; static TLS int i, j, k, p; static TLS int allo = 0; static TLS Segment **work = NULL; if( seg == NULL ) { free( work ); work = NULL; return; } if( last > allo ) { allo = last; if( work ) free( work ); work = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) ); } if( first < last ) { middle = ( first + last ) / 2; mymergesort( first, middle, seg ); mymergesort( middle+1, last, seg ); p = 0; for( i=first; i<=middle; i++ ) work[p++] = seg[i]; i = middle + 1; j = 0; k = first; while( i <= last && j < p ) { if( work[j]->center <= seg[i]->center ) seg[k++] = work[j++]; else seg[k++] = seg[i++]; } while( j < p ) seg[k++] = work[j++]; } } double Falign_localhom( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int clus1, int clus2, int alloclen, int constraint, double *totalimpmatch, int *gapmap1, int *gapmap2, int *chudanpt, int chudanref, int *chudanres ) { // tditeration.c deha alloclen ha huhen nanode // prevalloclen ha iranai. int i, j, k, l, m, maxk; int nlen, nlen2, nlen4; static TLS int crossscoresize = 0; static TLS char **tmpseq1 = NULL; static TLS char **tmpseq2 = NULL; static TLS char **tmpptr1 = NULL; static TLS char **tmpptr2 = NULL; static TLS char **tmpres1 = NULL; static TLS char **tmpres2 = NULL; static TLS char **result1 = NULL; static TLS char **result2 = NULL; #if RND static TLS char **rndseq1 = NULL; static TLS char **rndseq2 = NULL; #endif static TLS Fukusosuu **seqVector1 = NULL; static TLS Fukusosuu **seqVector2 = NULL; static TLS Fukusosuu **naiseki = NULL; static TLS Fukusosuu *naisekiNoWa = NULL; static TLS double *soukan = NULL; static TLS double **crossscore = NULL; int nlentmp; static TLS int *kouho = NULL; static TLS Segment *segment = NULL; static TLS Segment *segment1 = NULL; static TLS Segment *segment2 = NULL; static TLS Segment **sortedseg1 = NULL; static TLS Segment **sortedseg2 = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; static TLS char *sgap1, *egap1, *sgap2, *egap2; static TLS int localalloclen = 0; int lag; int tmpint; int count, count0; int len1, len2; int totallen; double totalscore; double impmatch; extern Fukusosuu *AllocateFukusosuuVec(); extern Fukusosuu **AllocateFukusosuuMtx(); if( seq1 == NULL ) { if( result1 ) { // fprintf( stderr, "Freeing localarrays in Falign\n" ); localalloclen = 0; crossscoresize = 0; mymergesort( 0, 0, NULL ); alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL ); fft( 0, NULL, 1 ); // A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 ); // iru? G__align11( NULL, NULL, NULL, 0, 0, 0 ); partA__align( NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL ); partA__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL ); blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL ); if( crossscore ) FreeDoubleMtx( crossscore ); FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); free( sgap1 ); free( egap1 ); free( sgap2 ); free( egap2 ); free( kouho ); free( cut1 ); free( cut2 ); free( tmpptr1 ); free( tmpptr2 ); free( segment ); free( segment1 ); free( segment2 ); free( sortedseg1 ); free( sortedseg2 ); if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } } else { // fprintf( stderr, "Did not allocate localarrays in Falign\n" ); } return( 0.0 ); } len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( !localalloclen ) { sgap1 = AllocateCharVec( njob ); egap1 = AllocateCharVec( njob ); sgap2 = AllocateCharVec( njob ); egap2 = AllocateCharVec( njob ); kouho = AllocateIntVec( NKOUHO ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 4; else if( fftscore == 1 ) n20or4or2 = 2; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); if( !kobetsubunkatsu ) { naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); } #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i /dev/tty" ); #endif if( !kobetsubunkatsu ) { fprintf( stderr, "FFT ... " ); for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( j=0; j /dev/tty" ); #endif for( k=0; k /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; m /dev/tty" ); #if 0 fftfp = fopen( "list.plot", "w" ); fprintf( fftfp, "plot 'frt'\n pause +1" ); fclose( fftfp ); system( "/usr/bin/gnuplot list.plot" ); #endif #endif getKouho( kouho, NKOUHO, soukan, nlen ); #if 0 for( i=0; i MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); while( tmpint-- > 0 ) { if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fftfp = fopen( "cand", "a" ); fprintf( fftfp, "Goukaku=%dko\n", tmpint ); fprintf( fftfp, "in 1 %d\n", segment1[count].center ); fprintf( fftfp, "in 2 %d\n", segment2[count].center ); fclose( fftfp ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 if( !kobetsubunkatsu ) fprintf( stderr, "%d segments found\n", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT before sort:\n" ); for( l=0; lnumber = i; for( i=0; inumber = i; if( kobetsubunkatsu ) { for( i=0; icenter; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; inumber+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if DEBUG fprintf( stderr, "AFTER SORT\n" ); for( i=0; i count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j numt ) { // reporterr( "RNA!\n" ); // reporterr( "r before ttou =%s\n", r ); ttou( r+1 ); // reporterr( "r after ttou =%s\n", r ); } } void gappick_samestring( char *seq ) { char *aseq = seq; for( ; *seq != 0; seq++ ) { if( *seq != '-' ) *aseq++ = *seq; } *aseq = 0; } #if 0 static int addlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, int skip ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt; int st; int nlocalhom = 0; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; #if 0 fprintf( stderr, "nlocalhom = %d in addlocalhom\n", nlocalhom ); fprintf( stderr, "al1 = %s, al2 = %s\n", al1, al2 ); fprintf( stderr, "off1 = %d, off2 = %d\n", off1, off2 ); fprintf( stderr, "localhopt = %p, skip = %d\n", localhompt, skip ); fprintf( stderr, "pt1 = \n%s\n, pt2 = \n%s\n", pt1, pt2 ); #endif if( skip ) { while( --skip > 0 ) localhompt = localhompt->next; localhompt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); localhompt = localhompt->next; // fprintf( stderr, "tmppt = %p, localhompt = %p\n", tmppt, localhompt ); } tmppt = localhompt; st = 0; iscore = 0; while( *pt1 != 0 ) { // fprintf( stderr, "In in while loop\n" ); // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 isumscore += iscore; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = iscore * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif iscore = 0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } iscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // fprintf( stderr, "%c-%c, score(0) = %d\n", *pt1, *pt2, iscore ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( st ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; #if 1 isumscore += iscore; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = (double)iscore * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (2)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = (double)sumscore * 5.8 / 600 / sumoverlap; } return( nlocalhom ); } #endif static int addlocalhom_r( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, int skip, char korh ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; double score; double sumscore; int sumoverlap; LocalHom *tmppt = NULL; // by D.Mathog, a guess int st; int nlocalhom = 0; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; sumscore = 0.0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess #if 0 fprintf( stderr, "nlocalhom = %d in addlocalhom\n", nlocalhom ); fprintf( stderr, "al1 = %s, al2 = %s\n", al1, al2 ); fprintf( stderr, "off1 = %d, off2 = %d\n", off1, off2 ); fprintf( stderr, "localhopt = %p, skip = %d\n", localhompt, skip ); #endif fprintf( stderr, "pt1 = \n%s\n, pt2 = \n%s\n", pt1, pt2 ); if( skip ) { while( --skip > 0 ) localhompt = localhompt->next; localhompt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); localhompt = localhompt->next; fprintf( stderr, "tmppt = %p, localhompt = %p\n", (void *)tmppt, (void *)localhompt ); } tmppt = localhompt; st = 0; score = 0.0; while( *pt1 != 0 ) { fprintf( stderr, "In in while loop\n" ); fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 sumscore += score; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = score * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif fprintf( stderr, "score (1)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); score = 0.0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } score += (double)n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 sumscore += score; sumoverlap += end2-start2+1; #else tmppt->overlapaa = end2-start2+1; tmppt->opt = score * 5.8 / 600; tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif fprintf( stderr, "score (2)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = sumscore * 5.8 / 600 / sumoverlap; } return( nlocalhom ); } void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; double score; double sumscore; int sumoverlap; LocalHom *tmppt; LocalHom *subnosento; int st; int saisho; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; sumscore = 0.0; sumoverlap = 0; start1 = 0; // by Mathog, a guess start2 = 0; // by Mathog, a guess subnosento = localhompt; while( subnosento->next ) subnosento = subnosento->next; tmppt = subnosento; saisho = ( localhompt->nokori == 0 ); fprintf( stderr, "localhompt = %p\n", (void *)localhompt ); fprintf( stderr, "tmppt = %p\n", (void *)tmppt ); fprintf( stderr, "subnosento = %p\n", (void *)subnosento ); st = 0; score = 0.0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( localhompt->nokori++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; if( tmppt->overlapaa>0) tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; else tmppt->opt = -1.0; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (1)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif score = 0.0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } score += (double)n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( *(pt1-1) != '-' && *(pt2-1) != '-' ) { if( localhompt->nokori++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; if( tmppt->overlapaa>0) tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; else tmppt->opt = -1.0; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (2)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } fprintf( stderr, "sumscore = %f\n", sumscore ); if( !divpairscore ) { if( !saisho ) subnosento = subnosento->next; for( tmppt=subnosento; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; if( tmppt->overlapaa>0) tmppt->opt = sumscore * 5.8 / 600 / sumoverlap; else tmppt->opt = -1.0; fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess st = 0; iscore = 0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; if( tmppt->overlapaa>0) tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; else tmppt->opt = -1.0; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif iscore = 0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } iscore += n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, iscore(0) = %d\n", *pt1, *pt2, iscore ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( *(pt1-1) != '-' && *(pt2-1) != '-' ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; if( tmppt->overlapaa>0) tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; else tmppt->opt = -1.0; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (2)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } if( !divpairscore ) { for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; // tmppt->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); tmppt->opt = (double)600 * 5.8 / 600; // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh ) { int posinaln, pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; // int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess posinaln = 0; while( *pt1 != 0 ) { if( *pt1 != '-' && *pt2 != '-' && equiv[posinaln] > 0.0 ) { start1 = end1 = pos1; start2 = end2 = pos2; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ... (posinaln=%d)\n", posinaln ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; tmppt->overlapaa = 1; // tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; tmppt->opt = equiv[posinaln] * scale; // fprintf( stdout, "*pt1=%c, *pt2=%c, equiv=%f\n", *pt1, *pt2, equiv[posinaln] ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; posinaln++; } } void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; isumscore = 0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess st = 0; iscore = 0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; tmppt->nokori += 1; localhompt->last = tmppt; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; if(tmppt->overlapaa>0) tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; else tmppt->opt = -1.0; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif iscore = 0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } iscore += n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, iscore(0) = %d\n", *pt1, *pt2, iscore ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( *(pt1-1) != '-' && *(pt2-1) != '-' ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; tmppt->nokori += 1; localhompt->last = tmppt; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; if(tmppt->overlapaa>0) tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; else tmppt->opt = -1.0; } else { isumscore += iscore; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "iscore (2)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif } if( !divpairscore ) { for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; if(tmppt->overlapaa>0) tmppt->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); else tmppt->opt = -1.0; // fprintf( stderr, "tmpptr->opt = %f, sumoverlap=%d\n", tmppt->opt, sumoverlap ); } } } #if 0 void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh ) { int pos1, pos2, start1, start2, end1, end2; char *pt1, *pt2; double score; double sumscore; int sumoverlap; LocalHom *tmppt = localhompt; int nlocalhom = 0; int st; pt1 = al1; pt2 = al2; pos1 = off1; pos2 = off2; sumscore = 0.0; sumoverlap = 0; start1 = 0; // by D.Mathog, a guess start2 = 0; // by D.Mathog, a guess st = 0; score = 0.0; while( *pt1 != 0 ) { // fprintf( stderr, "pt = %c, %c, st=%d\n", *pt1, *pt2, st ); if( st == 1 && ( *pt1 == '-' || *pt2 == '-' ) ) { end1 = pos1 - 1; end2 = pos2 - 1; if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (1)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif score = 0.0; st = 0; } else if( *pt1 != '-' && *pt2 != '-' ) { if( st == 0 ) { start1 = pos1; start2 = pos2; st = 1; } score += (double)n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â // fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score ); } if( *pt1++ != '-' ) pos1++; if( *pt2++ != '-' ) pos2++; } if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } end1 = pos1 - 1; end2 = pos2 - 1; tmppt->start1 = start1; tmppt->start2 = start2; tmppt->end1 = end1 ; tmppt->end2 = end2 ; tmppt->korh = korh ; #if 1 if( divpairscore ) { tmppt->overlapaa = end2-start2+1; tmppt->opt = score / tmppt->overlapaa * 5.8 / 600; } else { sumscore += score; sumoverlap += end2-start2+1; } #else tmppt->overlapaa = overlapaa; tmppt->opt = (double)opt; #endif #if 0 fprintf( stderr, "score (2)= %f\n", score ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif if( !divpairscore ) { for( tmppt=localhompt; tmppt; tmppt=tmppt->next ) { tmppt->overlapaa = sumoverlap; tmppt->opt = sumscore * 5.8 / 600 / sumoverlap; // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } } #endif char *cutal( char *al, int al_display_start, int start, int end ) { int pos; char *pt = al; char *val = NULL; pos = al_display_start; do { if( start == pos ) val = pt; if( end == pos ) break; // fprintf( stderr, "pos=%d, *pt=%c, val=%p\n", pos, *pt, val ); if( *pt != '-' ) pos++; } while( *pt++ != 0 ); *(pt+1) = 0; return( val ); } void ErrorExit( char *message ) { fprintf( stderr, "%s\n", message ); exit( 1 ); } void strncpy_caseC( char *str1, char *str2, int len ) { if( dorp == 'd' && upperCase > 0 ) { while( len-- ) *str1++ = toupper( *str2++ ); } else strncpy( str1, str2, len ); } void seqUpper( int nseq, char **seq ) { int i, j, len; for( i=0; i return 1 */ { int c, i = 0 ; int noteofflag = 0; for( i=0; i return 1 */ char s[] ; int l ; FILE *fp ; { int c = 0, i = 0 ; int noteofflag = 0; if( feof( fp ) ) return( 1 ); for( i=0; i M ) { fprintf( stderr, "TOO MANY SEQUENCE!\n" ); fprintf( stderr, "%d > %d\n", njob, M ); exit( 1 ); } } int allSpace( char *str ) { int value = 1; while( *str ) value *= ( !isdigit( *str++ ) ); return( value ); } void Read( char name[M][B], int nlen[M], char **seq ) { extern void FRead( FILE *x, char y[M][B], int z[M], char **w ); FRead( stdin, name, nlen, seq ); } void FRead( FILE *fp, char name[][B], int nlen[], char **seq ) { int i, j; char b[B]; fgets( b, B-1, fp ); #if DEBUG fprintf( stderr, "b = %s\n", b ); #endif if( strstr( b, "onnet" ) ) scoremtx = 1; else if( strstr( b, "DnA" ) ) { scoremtx = -1; upperCase = -1; } else if( strstr( b, "dna" ) ) { scoremtx = -1; upperCase = 0; } else if( strstr( b, "DNA" ) ) { scoremtx = -1; upperCase = 1; } else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2; else scoremtx = 0; #if DEBUG fprintf( stderr, " %s->scoremtx = %d\n", b, scoremtx ); #endif geta2 = GETA2; #if 0 if( strlen( b ) >=25 ) { b[25] = 0; #if DEBUG fprintf( stderr, "kimuraR = %s\n", b+20 ); #endif kimuraR = atoi( b+20 ); if( kimuraR < 0 || 20 < kimuraR ) ErrorExit( "Illeagal kimuraR value.\n" ); if( allSpace( b+20 ) ) kimuraR = NOTSPECIFIED; } else kimuraR = NOTSPECIFIED; #if DEBUG fprintf( stderr, "kimuraR = %d\n", kimuraR ); #endif if( strlen( b ) >=20 ) { b[20] = 0; #if DEBUG fprintf( stderr, "pamN = %s\n", b+15 ); #endif pamN = atoi( b+15 ); if( pamN < 0 || 400 < pamN ) ErrorExit( "Illeagal pam value.\n" ); if( allSpace( b+15 ) ) pamN = NOTSPECIFIED; } else pamN = NOTSPECIFIED; if( strlen( b ) >= 15 ) { b[15] = 0; #if DEBUG fprintf( stderr, "poffset = %s\n", b+10 ); #endif poffset = atoi( b+10 ); if( poffset > 500 ) ErrorExit( "Illegal extending gap ppenalty\n" ); if( allSpace( b+10 ) ) poffset = NOTSPECIFIED; } else poffset = NOTSPECIFIED; if( strlen( b ) >= 10 ) { b[10] = 0; #if DEBUG fprintf( stderr, "ppenalty = %s\n", b+5 ); #endif ppenalty = atoi( b+5 ); if( ppenalty > 0 ) ErrorExit( "Illegal opening gap ppenalty\n" ); if( allSpace( b+5 ) ) ppenalty = NOTSPECIFIED; } else ppenalty = NOTSPECIFIED; #endif for( i=0; i' ) ) value++; b = c; } rewind( fp ); return( value ); } void searchKUorWA( FILE *fp ) { int c, b; b = '\n'; while( !( ( ( c = getc( fp ) ) == '>' || c == EOF ) && b == '\n' ) ) b = c; ungetc( c, fp ); } #if 0 static int onlyGraph( char *str ) { char tmp; char *res = str; char *bk = str; // while( (tmp=*str++) ) if( isgraph( tmp ) ) *res++ = tmp; while( (tmp=*str++) ) { if( 0x20 < tmp && tmp < 0x7f ) *res++ = tmp; if( tmp == '>' || tmp == '(' ) { fprintf( stderr, "========================================================\n" ); fprintf( stderr, "========================================================\n" ); fprintf( stderr, "=== \n" ); fprintf( stderr, "=== ERROR!! \n" ); // fprintf( stderr, "=== In the '--anysymbol' and '--preservecase' modes, \n" ); fprintf( stderr, "=== '>' and '(' are acceptable only in title lines.\n" ); fprintf( stderr, "=== \n" ); fprintf( stderr, "========================================================\n" ); fprintf( stderr, "========================================================\n" ); exit( 1 ); } } *res = 0; return( res - bk ); } #endif static int charfilter( unsigned char *str ) { unsigned char tmp; unsigned char *res = str; unsigned char *bk = str; while( (tmp=*str++) ) { // if( tmp == '=' || tmp == '*' || tmp == '<' || tmp == '>' || tmp == '(' || tmp == ')' ) if( tmp == '=' || tmp == '<' || tmp == '>' ) { fprintf( stderr, "\n" ); fprintf( stderr, "Characters '= < >' can be used only in the title lines in the --anysymbol or --text mode.\n" ); fprintf( stderr, "\n" ); exit( 1 ); } // if( 0x20 < tmp && tmp < 0x7f ) // if( 0x0 <=tmp && tmp < 0x100 && if( tmp != 0x0a && tmp != 0x20 && tmp != 0x0d ) // if( tmp != '\n' && tmp != ' ' && tmp != '\t' ) // unprintable characters mo ok. { *res++ = tmp; // reporterr( "tmp=%d (%c)\n", tmp, tmp ); } } *res = 0; return( res - bk ); } static int onlyAlpha_lower( char *str ) { char tmp; char *res = str; char *bk = str; while( (tmp=*str++) ) if( isalpha( tmp ) || tmp == '-' || tmp == '*' || tmp == '.' ) *res++ = tolower( tmp ); *res = 0; return( res - bk ); } static int onlyAlpha_upper( char *str ) { char tmp; char *res = str; char *bk = str; while( (tmp=*str++) ) if( isalpha( tmp ) || tmp == '-' || tmp == '*' || tmp == '.' ) *res++ = toupper( tmp ); *res = 0; return( res - bk ); } void kake2hiku( char *str ) { do if( *str == '*' ) *str = '-'; while( *str++ ); } char *load1SeqWithoutName_realloc_casepreserve( FILE *fpp ) { int c, b; char *cbuf; int size = N; char *val; val = malloc( (size+1) * sizeof( char ) ); cbuf = val; b = '\n'; while( ( c = getc( fpp ) ) != EOF && !( ( c == '>' || c == EOF ) && b == '\n' ) ) { *cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */ if( cbuf - val == size ) { size += N; fprintf( stderr, "reallocating...\n" ); val = (char *)realloc( val, (size+1) * sizeof( char ) ); if( !val ) { fprintf( stderr, "Allocation error in load1SeqWithoutName_realloc \n" ); exit( 1 ); } fprintf( stderr, "done.\n" ); cbuf = val + size-N; } b = c; } ungetc( c, fpp ); *cbuf = 0; // onlyGraph( val ); charfilter( (unsigned char *) val ); // kake2hiku( val ); return( val ); } char *load1SeqWithoutName_realloc( FILE *fpp ) { int c, b; char *cbuf; int size = N; char *val; val = malloc( (size+1) * sizeof( char ) ); cbuf = val; b = '\n'; while( ( c = getc( fpp ) ) != EOF && !( ( c == '>' || c == EOF ) && b == '\n' ) ) { *cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */ if( cbuf - val == size ) { size += N; fprintf( stderr, "reallocating...\n" ); val = (char *)realloc( val, (size+1) * sizeof( char ) ); if( !val ) { fprintf( stderr, "Allocation error in load1SeqWithoutName_realloc \n" ); exit( 1 ); } fprintf( stderr, "done.\n" ); cbuf = val + size-N; } b = c; } ungetc( c, fpp ); *cbuf = 0; if( nblosum == -2 ) { charfilter( (unsigned char *) val ); } else { if( dorp == 'd' ) onlyAlpha_lower( val ); else onlyAlpha_upper( val ); kake2hiku( val ); } return( val ); } int load1SeqWithoutName_new( FILE *fpp, char *cbuf ) { int c, b; char *bk = cbuf; b = '\n'; while( ( c = getc( fpp ) ) != EOF && /* by T. Nishiyama */ !( ( c == '>' || c == EOF ) && b == '\n' ) ) { *cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */ b = c; } ungetc( c, fpp ); *cbuf = 0; if( dorp == 'd' ) onlyAlpha_lower( bk ); else onlyAlpha_upper( bk ); kake2hiku( bk ); return( 0 ); } void readDataforgaln( FILE *fp, char **name, int *nlen, char **seq ) { int i; static char *tmpseq = NULL; #if 0 if( !tmpseq ) { tmpseq = AllocateCharVec( N ); } #endif rewind( fp ); searchKUorWA( fp ); for( i=0; i', stdout ); // puts( dumname+1 ); strncat( name[npos], dumname, B-1 ); name[npos][B-1] = 0; if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq ); seqlen = strlen( tmpseq ); lpos = 0; for( j=0; j<5; j++ ) { if( regtable[0][j*2] == -1 && regtable[0][j*2+1] == -1 ) continue; startpos = regtable[0][j*2]; endpos = regtable[0][j*2+1]; if( startpos > endpos ) { endpos = regtable[0][j*2]; startpos = regtable[0][j*2+1]; } if( startpos < 0 ) startpos = 0; if( endpos < 0 ) endpos = 0; if( endpos >= seqlen ) endpos = seqlen-1; if( startpos >= seqlen ) startpos = seqlen-1; // fprintf( stderr, "startpos = %d, endpos = %d\n", startpos, endpos ); outlen = endpos - startpos+1; if( revtable[0][j] == 'f' ) { // fprintf( stderr, "regtable[%d][st] = %d\n", i, regtable[0][j*2+0] ); // fprintf( stderr, "regtable[%d][en] = %d\n", i, regtable[0][j*2+1] ); // fprintf( stderr, "outlen = %d\n", outlen ); // fprintf( stdout, "%.*s\n", outlen, tmpseq+regtable[0][j*2] ); strncpy( outseq[npos] + lpos, tmpseq+startpos, outlen ); lpos += outlen; } else { fs = AllocateCharVec( outlen+1 ); rs = AllocateCharVec( outlen+1 ); fs[outlen] = 0; strncpy( fs, tmpseq+startpos, outlen ); sreverse( rs, fs ); // fprintf( stdout, "%s\n", rs ); strncpy( outseq[npos] + lpos, rs, outlen ); lpos += outlen; free( fs ); free( rs ); } outseq[npos][lpos] = 0; } npos++; } free( tmpseq ); } } void cutData( FILE *fp, int **regtable, char **revtable, int *outtable ) { int i, j; int outlen, seqlen, startpos, endpos; static char *tmpseq = NULL; static char *dumname = NULL; char *fs, *rs; if( dumname == NULL ) { dumname = AllocateCharVec( N ); } rewind( fp ); searchKUorWA( fp ); for( i=0; i', stdout ); puts( dumname+1 ); seqlen = strlen( tmpseq ); if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq ); if( outtable[i] == 2 ) { startpos = 0; endpos = seqlen-1; outlen = endpos - startpos + 1; fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos ); } else { for( j=0; j<5; j++ ) { if( regtable[i][j*2] == -1 && regtable[i][j*2+1] == -1 ) continue; startpos = regtable[i][j*2]; endpos = regtable[i][j*2+1]; if( startpos > endpos ) { endpos = regtable[i][j*2]; startpos = regtable[i][j*2+1]; } if( startpos < 0 ) startpos = 0; if( endpos < 0 ) endpos = 0; if( endpos >= seqlen ) endpos = seqlen-1; if( startpos >= seqlen ) startpos = seqlen-1; outlen = endpos - startpos + 1; if( revtable[i][j] == 'f' ) { fprintf( stderr, "startpos = %d\n", startpos ); fprintf( stderr, "endpos = %d\n", endpos ); fprintf( stderr, "outlen = %d\n", outlen ); fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos ); } else { fs = AllocateCharVec( outlen+1 ); rs = AllocateCharVec( outlen+1 ); fs[outlen] = 0; strncpy( fs, tmpseq+startpos, outlen ); sreverse( rs, fs ); fprintf( stdout, "%s\n", rs ); free( fs ); free( rs ); } } } } free( tmpseq ); } } void catData( FILE *fp ) { int i; static char *tmpseq = NULL; static char *dumname = NULL; // char *cptr; if( dumname == NULL ) { dumname = AllocateCharVec( N ); } rewind( fp ); searchKUorWA( fp ); for( i=0; i_numo_s_%08d_numo_e_", i+1 ); } else { putc( '>', stdout ); } puts( dumname+1 ); tmpseq = load1SeqWithoutName_realloc( fp ); if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq ); puts( tmpseq ); free( tmpseq ); } } int countATGCandN( char *s, int *countN, int *total ) { int nATGC; int nChar; int nN; char c; nN = nATGC = nChar = 0; if( *s == 0 ) { *total = 0; return( 0 ); } do { c = tolower( *s ); if( isalpha( c ) ) { nChar++; if( c == 'a' || c == 't' || c == 'g' || c == 'c' || c == 'u' || c == 'n' ) nATGC++; if( c == 'n' ) nN++; } } while( *++s ); // reporterr( "nN = %d", nN ); *total = nChar; *countN = nN; return( nATGC ); } int countATGC( char *s, int *total ) { int nATGC; int nChar; char c; nATGC = nChar = 0; if( *s == 0 ) { *total = 0; return( 0 ); } do { c = tolower( *s ); if( isalpha( c ) ) { nChar++; if( c == 'a' || c == 't' || c == 'g' || c == 'c' || c == 'u' || c == 'n' ) nATGC++; } } while( *++s ); *total = nChar; return( nATGC ); } double countATGCbk( char *s ) { int nATGC; int nChar; char c; nATGC = nChar = 0; do { c = tolower( *s ); if( isalpha( c ) ) { nChar++; if( c == 'a' || c == 't' || c == 'g' || c == 'c' || c == 'u' || c == 'n' ) nATGC++; } } while( *++s ); return( (double)nATGC / nChar ); } int countnogaplen( char *seq ) { int val = 0; while( *seq ) if( *seq++ != '-' ) val++; return( val ); } int countnormalletters( char *seq, char *ref ) { int val = 0; while( *seq ) if( strchr( ref, *seq++ ) ) val++; return( val ); } void getnumlen_casepreserve( FILE *fp, int *nlenminpt ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; #if mingw setmode( fileno( fp ), O_BINARY ); setmode( fileno( stdout ), O_BINARY ); #endif tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; if( total < 1000000 ) { atgcnum += countATGC( tmpseq, &nsite ); total += nsite; } free( tmpseq ); } free( tmpname ); atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } void getnumlen_nogap_countn( FILE *fp, int *nlenminpt, double *nfreq ) { int total; int nsite = 0; int atgcnum, nnum, nN; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; nnum = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; if( total < 100000 ) { atgcnum += countATGCandN( tmpseq, &nN, &nsite ); total += nsite; } nnum += nN; free( tmpseq ); // if( i % 10000 == 0 ) reporterr( "atgcnum=%d, total=%d\n", atgcnum, total ); } free( tmpname ); atgcfreq = (double)atgcnum / total; *nfreq = (double)nnum / atgcnum; // fprintf( stderr, "##### nnum = %d\n", nnum ); // fprintf( stderr, "##### atgcfreq = %f, *nfreq = %f\n", atgcfreq, *nfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } void getnumlen_nogap( FILE *fp, int *nlenminpt ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; #if mingw // web nomi de shiyou suru node nakutemo ii setmode( fileno( fp ), O_BINARY ); setmode( fileno( stdout ), O_BINARY ); #endif tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; if( total < 100000 ) { atgcnum += countATGC( tmpseq, &nsite ); total += nsite; } free( tmpseq ); } free( tmpname ); atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } void getnumlen_nogap_outallreg( FILE *fp, int *nlenminpt ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq, *tmpname; double atgcfreq; #if mingw // web nomi de shiyou suru node nakutemo ii setmode( fileno( fp ), O_BINARY ); setmode( fileno( stdout ), O_BINARY ); #endif tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; if( total < 100000 ) { atgcnum += countATGC( tmpseq, &nsite ); total += nsite; } free( tmpseq ); } free( tmpname ); atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } } static void escapehtml( char *res, char *ori, int maxlen ) { char *res0 = res; while( *ori ) { if( *ori == '<' ) { strcpy( res, "<" ); res += 3; } else if( *ori == '>' ) { strcpy( res, ">" ); res += 3; } else if( *ori == '&' ) { strcpy( res, "&" ); res += 4; } else if( *ori == '"' ) { strcpy( res, """ ); res += 5; } else if( *ori == ' ' ) { strcpy( res, " " ); res += 5; } else { *res = *ori; } res++; ori++; if( res - res0 -10 > N ) break; } *res = 0; } void getnumlen_nogap_outallreg_web( FILE *fp, FILE *ofp, int *nlenminpt, int *isalignedpt ) { int total; int nsite = 0; int atgcnum; int alnlen = 0, alnlen_prev; int i, tmp, lennormalchar; char *tmpseq, *tmpname, *tmpname2; double atgcfreq; #if mingw // web nomi de shiyou suru node nakutemo ii setmode( fileno( fp ), O_BINARY ); setmode( fileno( stdout ), O_BINARY ); #endif tmpname = AllocateCharVec( N ); tmpname2 = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; *nlenminpt = 99999999; atgcnum = 0; total = 0; alnlen_prev = -1; *isalignedpt = 1; for( i=0; i nlenmax ) nlenmax = tmp; if( tmp < *nlenminpt ) *nlenminpt = tmp; atgcnum += countATGC( tmpseq, &nsite ); total += nsite; alnlen = strlen( tmpseq ); // fprintf( stdout, "##### alnlen, alnlen_prev = %d, %d\n", alnlen, alnlen_prev ); if( i>0 && alnlen_prev != alnlen ) *isalignedpt = 0; alnlen_prev = alnlen; atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); // if( dorp == NOTSPECIFIED ) // you kentou { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } if( dorp == 'd' ) lennormalchar = countnormalletters( tmpseq, "atgcuATGCU" ); else lennormalchar = countnormalletters( tmpseq, "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv" ); free( tmpseq ); fprintf( ofp, " \n", i, i, i, i, i, lennormalchar, tmpname2 ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); } free( tmpname ); free( tmpname2 ); atgcfreq = (double)atgcnum / total; fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); // if( dorp == NOTSPECIFIED ) // you kentou { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } fprintf( ofp, "\n" ); if( *isalignedpt ) { fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); fprintf( ofp, "" ); } } void getnumlen( FILE *fp ) { int total; int nsite = 0; int atgcnum; int i, tmp; char *tmpseq; char *tmpname; double atgcfreq; #if mingw setmode( fileno( fp ), O_BINARY ); setmode( fileno( stdout ), O_BINARY ); #endif tmpname = AllocateCharVec( N ); njob = countKUorWA( fp ); searchKUorWA( fp ); nlenmax = 0; atgcnum = 0; total = 0; for( i=0; i nlenmax ) nlenmax = tmp; if( total < 1000000 ) { atgcnum += countATGC( tmpseq, &nsite ); total += nsite; } // fprintf( stderr, "##### total = %d\n", total ); free( tmpseq ); } atgcfreq = (double)atgcnum / total; // fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq ); if( dorp == NOTSPECIFIED ) { if( atgcfreq > 0.75 ) { dorp = 'd'; upperCase = -1; } else { dorp = 'p'; upperCase = 0; } } free( tmpname ); } void WriteGapFill( FILE *fp, int locnjob, char name[][B], int nlen[M], char **aseq ) { static char b[N]; int i, j; int nalen[M]; static char gap[N]; static char buff[N]; #if IODEBUG fprintf( stderr, "IMAKARA KAKU\n" ); #endif nlenmax = 0; for( i=0; i%s\n", name[i]+1 ); for( j=0; j%s\n", name[i]+1 ); for( j=0; j%s\n", name[i]+1 ); for( j=0; j max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; max /= INTMTXSCALE; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i max ) max = mtx[i][j]; fprintf( hat2p, "%5d\n", 1 ); fprintf( hat2p, "%5d\n", locnjob ); fprintf( hat2p, " %#6.3f\n", max * 2.5 ); for( i=0; i", b, 19 ) || !strncmp( " ", b, 23 ) ) break; } if( !strncmp( " ", b, 19 ) ) { junban[count] = atoi( b+31 ); nlocalhom = 0; } while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 25 ) ) break; pt = b + 25; score = atof( pt ); sumscore += score; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 30 ) ) break; pt = b + 30; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 26 ) ) break; pt = b + 26; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 29 ) ) break; pt = b + 29; len = atoi( pt ); sumlen += len; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( qal, al+24 ); pt = qal; while( *++pt != '<' ) ; *pt = 0; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( tal, al+24 ); pt = tal; while( *++pt != '<' ) ; *pt = 0; // fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa ); while( fgets( b, B-1, fp ) ) if( !strncmp( " :", b, 18 ) ) break; fgets( b, B-1, fp ); if( !strncmp( " ", b, 21 ) ) { dis[junban[count++]] = sumscore; sumscore = 0.0; fgets( b, B-1, fp ); fgets( b, B-1, fp ); scorepersite = sumscore / sumlen; if( scorepersite != (int)scorepersite ) { fprintf( stderr, "ERROR! sumscore=%f, sumlen=%f, and scorepersite=%f\n", sumscore, sumlen, scorepersite ); exit( 1 ); } if( !strncmp( " ", b, 23 ) ) break; } } free( junban ); return (int)scorepersite; } int ReadBlastm7_scoreonly( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int *junban; int overlapaa; double score, sumscore; int qstart, qend, tstart, tend; static char qal[N], tal[N], al[N]; int nlocalhom; junban = calloc( nin, sizeof( int ) ); count = 0; sumscore = 0.0; score = 0.0; while( 1 ) { if( feof( fp ) ) break; while( fgets( b, B-1, fp ) ) { if( !strncmp( " ", b, 19 ) || !strncmp( " ", b, 23 ) ) break; } if( !strncmp( " ", b, 19 ) ) { junban[count] = atoi( b+31 ); nlocalhom = 0; } while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 25 ) ) break; pt = b + 25; score = atof( pt ); sumscore += score; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 30 ) ) break; pt = b + 30; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 26 ) ) break; pt = b + 26; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 29 ) ) break; pt = b + 29; overlapaa = atoi( pt ); while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( qal, al+24 ); pt = qal; while( *++pt != '<' ) ; *pt = 0; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( tal, al+24 ); pt = tal; while( *++pt != '<' ) ; *pt = 0; // fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa ); // nlocalhom += addlocalhom_r( qal, tal, localhomlist+junban[count], qstart, tstart, score, overlapaa, nlocalhom ); while( fgets( b, B-1, fp ) ) if( !strncmp( " :", b, 18 ) ) break; fgets( b, B-1, fp ); if( !strncmp( " ", b, 21 ) ) { dis[junban[count++]] = sumscore; sumscore = 0.0; fgets( b, B-1, fp ); fgets( b, B-1, fp ); if( !strncmp( " ", b, 23 ) ) break; } } free( junban ); return count; } int ReadBlastm7( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int overlapaa; double score, sumscore; int qstart, qend, tstart, tend; static char qal[N], tal[N], al[N]; int nlocalhom; count = 0; sumscore = 0.0; score = 0.0; nlocalhom = 0; while( 1 ) { if( feof( fp ) ) break; while( fgets( b, B-1, fp ) ) { if( !strncmp( " ", b, 19 ) || !strncmp( " ", b, 23 ) ) break; } if( !strncmp( " ", b, 19 ) ) { junban[count] = atoi( b+31 ); nlocalhom = 0; } while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 25 ) ) break; pt = b + 25; score = atof( pt ); sumscore += score; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 30 ) ) break; pt = b + 30; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 28 ) ) break; pt = b + 28; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 26 ) ) break; pt = b + 26; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( " ", b, 29 ) ) break; pt = b + 29; overlapaa = atoi( pt ); while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( qal, al+24 ); pt = qal; while( *++pt != '<' ) ; *pt = 0; while( fgets( al, N-100, fp ) ) if( !strncmp( " ", al, 24 ) ) break; strcpy( tal, al+24 ); pt = tal; while( *++pt != '<' ) ; *pt = 0; // fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa ); nlocalhom += addlocalhom_r( qal, tal, localhomlist+junban[count], qstart, tstart, score, overlapaa, nlocalhom, 'h' ); while( fgets( b, B-1, fp ) ) if( !strncmp( " :", b, 18 ) ) break; fgets( b, B-1, fp ); if( !strncmp( " ", b, 21 ) ) { dis[junban[count++]] = sumscore; sumscore = 0.0; fgets( b, B-1, fp ); fgets( b, B-1, fp ); if( !strncmp( " ", b, 23 ) ) break; } } return count; } int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int opt; double z, bits; count = 0; #if 0 for( i=0; i<10000000 && count>+==========+", b, 14 ) ) { break; } } if( !count ) return -1; count = 0; while( 1 ) { if( strncmp( ">>+==========+", b, 14 ) ) { fgets( b, B-1, fp ); if( feof( fp ) ) break; continue; } junban[count++] = atoi( b+14 ); // fprintf( stderr, "t = %d\n", atoi( b+14 ) ); while( fgets( b, B-1, fp ) ) if( !strncmp( "; fa_opt:", b, 9 ) || !strncmp( "; sw_s-w opt:", b, 13 ) ) break; pt = strstr( b, ":" ) +1; opt = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_overlap:", b+4, 9 ) ) break; pt = strstr( b, ":" ) +1; overlapaa = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) +1; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) +1; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) +1; qal_display_start = atoi( pt ) - 1; pt = qal; while( (c = fgetc( fp )) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) + 1; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) + 1; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) + 1; tal_display_start = atoi( pt ) - 1; pt = tal; while( ( c = fgetc( fp ) ) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; // fprintf( stderr, "(%d-%d:%d-%d)\n", qstart, qend, tstart, tend ); // fprintf( stderr, "qal_display_start = %d, tal_display_start = %d\n", qal_display_start, tal_display_start ); // fprintf( stderr, "qal = %s\n", qal ); // fprintf( stderr, "tal = %s\n", tal ); qal2 = cutal( qal, qal_display_start, qstart, qend ); tal2 = cutal( tal, tal_display_start, tstart, tend ); // fprintf( stderr, "qal2 = %s\n", qal2 ); // fprintf( stderr, "tal2 = %s\n", tal2 ); // fprintf( stderr, "putting %d - %d, opt = %d\n", qmem, junban[count-1], opt ); putlocalhom2( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa, 'h' ); } // fprintf( stderr, "count = %d\n", count ); return count; } int ReadFasta34m10( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int overlapaa; int opt, qstart, qend, tstart, tend; double z, bits; int qal_display_start, tal_display_start; static char qal[N], tal[N]; char *qal2, *tal2; int c; count = 0; #if 0 for( i=0; i<10000000 && count>+==========+", b, 14 ) ) { break; } } if( !count ) return -1; count = 0; while( 1 ) { if( strncmp( ">>+==========+", b, 14 ) ) { fgets( b, B-1, fp ); if( feof( fp ) ) break; continue; } junban[count++] = atoi( b+14 ); // fprintf( stderr, "t = %d\n", atoi( b+14 ) ); while( fgets( b, B-1, fp ) ) if( !strncmp( "; fa_opt:", b, 9 ) || !strncmp( "; sw_s-w opt:", b, 13 ) ) break; pt = strstr( b, ":" ) +1; opt = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_overlap:", b+4, 9 ) ) break; pt = strstr( b, ":" ) +1; overlapaa = atoi( pt ); while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) +1; qstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) +1; qend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) +1; qal_display_start = atoi( pt ) - 1; pt = qal; while( (c = fgetc( fp )) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; while( fgets( b, B-1, fp ) ) if( !strncmp( "_start:", b+4, 7 ) ) break; pt = strstr( b, ":" ) + 1; tstart = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_stop:", b+4, 6 ) ) break; pt = strstr( b, ":" ) + 1; tend = atoi( pt ) - 1; while( fgets( b, B-1, fp ) ) if( !strncmp( "_display_start:", b+4, 15 ) ) break; pt = strstr( b, ":" ) + 1; tal_display_start = atoi( pt ) - 1; pt = tal; while( ( c = fgetc( fp ) ) ) { if( c == '>' ) { ungetc( c, fp ); break; } if( isalpha( c ) || c == '-' ) *pt++ = c; } *pt = 0; // fprintf( stderr, "(%d-%d:%d-%d)\n", qstart, qend, tstart, tend ); // fprintf( stderr, "qal_display_start = %d, tal_display_start = %d\n", qal_display_start, tal_display_start ); // fprintf( stderr, "qal = %s\n", qal ); // fprintf( stderr, "tal = %s\n", tal ); qal2 = cutal( qal, qal_display_start, qstart, qend ); tal2 = cutal( tal, tal_display_start, tstart, tend ); // fprintf( stderr, "qal2 = %s\n", qal2 ); // fprintf( stderr, "tal2 = %s\n", tal2 ); // fprintf( stderr, "putting %d - %d, opt = %d\n", qmem, junban[count-1], opt ); putlocalhom2( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa, 'h' ); } // fprintf( stderr, "count = %d\n", count ); return count; } int ReadFasta34m10_scoreonly_nucbk( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int pos; int opt; double z, bits; count = 0; while( !feof( fp ) ) { fgets( b, B-1, fp ); if( !strncmp( "+===========+", b, 13 ) ) { pos = atoi( b+13 ); if( strchr( b, 'r' ) ) continue; // pt = strchr( b, ')' ) + 1; pt = strchr( b, ']' ) + 1; sscanf( pt, "%d %lf %lf", &opt, &bits, &z ); dis[pos] += (double)opt; count++; #if 0 fprintf( stderr, "b=%s\n", b ); fprintf( stderr, "opt=%d\n", opt ); fprintf( stderr, "pos=%d\n", pos ); fprintf( stderr, "dis[pos]=%f\n", dis[pos] ); #endif } else if( 0 == strncmp( ">>><<<", b, 6 ) ) { break; } } if( !count ) return -1; return count; } int ReadFasta34m10_scoreonly_nuc( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int pos; int opt; double z, bits; int c; int *yonda; yonda = AllocateIntVec( nin ); for( c=0; c>>", b, 3 ) ) { for( c=0; c>><<<", b, 6 ) ) { break; } } free( yonda ); if( !count ) return -1; return count; } int ReadFasta34m10_scoreonly( FILE *fp, double *dis, int nin ) { int count=0; char b[B]; char *pt; int pos; int opt; double z, bits; int c; int *yonda; yonda = AllocateIntVec( nin ); for( c=0; c>>", b, 3 ) ) { for( c=0; c>><<<", b, 6 ) ) { break; } } free( yonda ); if( !count ) return -1; return count; } int ReadFasta34( FILE *fp, double *dis, int nseq, char name[M][B], LocalHom *localhomlist ) { int count=0; char b[B]; char *pt; static int junban[M]; int overlapaa; int opt, qstart, qend, tstart, tend; double z, bits; count = 0; #if 0 for( i=0; i<10000000 && count>+==========+", b, 14 ) ) { break; } } if( !count ) return -1; count = 0; while( !feof( fp ) ) { if( !strncmp(">>+==========+", b, 14 ) ) { junban[count] = atoi( b+14 ); count++; fgets( b, B-1, fp ); // initn: pt = strstr( b, "opt: " ) + 5; localhomlist[junban[count-1]].opt = atof( pt ); fgets( b, B-1, fp ); // Smith-Waterman score pt = strstr( b, "ungapped) in " ) + 13; sscanf( pt, "%d", &overlapaa ); fprintf( stderr, "pt = %s, overlapaa = %d\n", pt, overlapaa ); pt = strstr( b, "overlap (" ) + 8; sscanf( pt, "(%d-%d:%d-%d)", &qstart, &qend, &tstart, &tend ); localhomlist[junban[count-1]].overlapaa = overlapaa; localhomlist[junban[count-1]].start1 = qstart-1; localhomlist[junban[count-1]].end1 = qend-1; localhomlist[junban[count-1]].start2 = tstart-1; localhomlist[junban[count-1]].end2 = tend-1; } fgets( b, B-1, fp ); } fprintf( stderr, "count = %d\n", count ); return count; } int ReadFasta3( FILE *fp, double *dis, int nseq, char name[M][B] ) { int count=0; char b[B]; char *pt; int junban[M]; int initn, init1, opt; double z; count = 0; #if 0 for( i=0; i<10000000 && count 0 ) { #if 0 /* /tmp/pre ¤Î´Ø·¸¤Ç¤Ï¤º¤·¤¿ */ if( ferror( prep_g ) ) prep_g = fopen( "pre", "w" ); if( !prep_g ) ErrorExit( "Cannot re-open pre." ); #endif rewind( prep_g ); signalSM[STATUS] = IMA_KAITERU; #if IODEBUG if( force ) fprintf( stderr, "FINAL " ); #endif if( devide ) dvWrite( prep_g, nseq, name, nlen, aseq ); else WriteGapFill( prep_g, nseq, name, nlen, aseq ); /* fprintf( prep_g, '\EOF' ); */ fflush( prep_g ); if( force ) signalSM[STATUS] = OSHIMAI; else signalSM[STATUS] = KAKIOWATTA; value = 1; signalSM[SEMAPHORE]++; #if IODEBUG fprintf( stderr, "signalSM[STATUS] = %c\n", signalSM[STATUS] ); #endif break; } else { #if IODEBUG fprintf( stderr, "YONDERUKARA_AKIRAMERU\n" ); #endif value = 0; signalSM[SEMAPHORE]++; if( !force ) break; #if IODEBUG fprintf( stderr, "MATSU\n" ); #endif sleep( 1 ); } } if( force && !value ) ErrorExit( "xced ga pre wo hanasanai \n" ); return( value ); #else if( force ) { rewind( prep_g ); writeData_pointer( prep_g, nseq, name, nlen, aseq ); } #endif return( 0 ); } void readOtherOptions( int *ppidptr, int *fftThresholdptr, int *fftWinSizeptr ) { if( calledByXced ) { FILE *fp = fopen( "pre", "r" ); char b[B]; if( !fp ) ErrorExit( "Cannot open pre.\n" ); fgets( b, B-1, fp ); sscanf( b, "%d %d %d", ppidptr, fftThresholdptr, fftWinSizeptr ); fclose( fp ); #if IODEBUG fprintf( stderr, "b = %s\n", b ); fprintf( stderr, "ppid = %d\n", ppid ); fprintf( stderr, "fftThreshold = %d\n", fftThreshold ); fprintf( stderr, "fftWinSize = %d\n", fftWinSize ); #endif } else { *ppidptr = 0; *fftThresholdptr = FFT_THRESHOLD; if( dorp == 'd' ) *fftWinSizeptr = FFT_WINSIZE_D; else *fftWinSizeptr = FFT_WINSIZE_P; } #if 0 fprintf( stderr, "fftThresholdptr=%d\n", *fftThresholdptr ); fprintf( stderr, "fftWinSizeptr=%d\n", *fftWinSizeptr ); #endif } void initSignalSM( void ) { // int signalsmid; #if IODEBUG if( ppid ) fprintf( stderr, "PID of xced = %d\n", ppid ); #endif if( !ppid ) { signalSM = NULL; return; } #if 0 signalsmid = shmget( (key_t)ppid, 3, IPC_ALLOC | 0666 ); if( signalsmid == -1 ) ErrorExit( "Cannot get Shared memory for signal.\n" ); signalSM = shmat( signalsmid, 0, 0 ); if( (int)signalSM == -1 ) ErrorExit( "Cannot attatch Shared Memory for signal!\n" ); signalSM[STATUS] = IMA_KAITERU; signalSM[SEMAPHORE] = 1; #endif } void initFiles( void ) { char pname[100]; if( ppid ) sprintf( pname, "/tmp/pre.%d", ppid ); else sprintf( pname, "pre" ); prep_g = fopen( pname, "w" ); if( !prep_g ) ErrorExit( "Cannot open pre" ); #if mingw setmode( fileno( prep_g ), O_BINARY ); #endif trap_g = fopen( "trace", "w" ); if( !trap_g ) ErrorExit( "cannot open trace" ); fprintf( trap_g, "PID = %d\n", getpid() ); fflush( trap_g ); } void closeFiles( void ) { fclose( prep_g ); fclose( trap_g ); } void WriteForFasta( FILE *fp, int locnjob, char **name, int nlen[M], char **aseq ) { static char b[N]; int i, j; int nalen[M]; for( i=0; i%s\n", name[i] ); for( j=0; j 0 ) { tmpptr1 = localhomtable[it][j].last; // fprintf( stderr, "reallocating, localhomtable[%d][%d].nokori = %d\n", i, j, localhomtable[i][j].nokori ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->extended = -1; tmpptr1->next = NULL; localhomtable[it][j].last = tmpptr1; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori ); } else { tmpptr1 = localhomtable[it]+j; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori ); } tmpptr1->start1 = start1; tmpptr1->start2 = start2; tmpptr1->end1 = end1; tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; } // else if( jt != -1 ) { if( localhomtable[jt][i].nokori++ > 0 ) { tmpptr2 = localhomtable[jt][i].last; tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->extended = -1; tmpptr2->next = NULL; localhomtable[jt][i].last = tmpptr2; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori ); } else { tmpptr2 = localhomtable[jt]+i; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori ); } tmpptr2->start2 = start1; tmpptr2->start1 = start2; tmpptr2->end2 = end1; tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "i=%d, j=%d, st1=%d, en1=%d, opt = %f\n", i, j, tmpptr1->start1, tmpptr1->end1, opt ); } } } void readlocalhomtable2_half( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ) { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; LocalHom *tmpptr1; // for( i=0; i= njob || i >= njob ) { reporterr( "Check hat3. The first sequence must be younger than the second one.\n" ); exit( 1 ); } { if( localhomtable[i][j-i].nokori++ > 0 ) { tmpptr1 = localhomtable[i][j-i].last; // fprintf( stderr, "reallocating, localhomtable[%d][%d].nokori = %d\n", i, j, localhomtable[i][j].nokori ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->extended = -1; tmpptr1->next = NULL; localhomtable[i][j-i].last = tmpptr1; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j-i].nokori ); } else { tmpptr1 = localhomtable[i]+j-i; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j-i].nokori ); } tmpptr1->start1 = start1; tmpptr1->start2 = start2; tmpptr1->end1 = end1; tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; } } } void readlocalhomtable2( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ) { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; LocalHom *tmpptr1, *tmpptr2; // for( i=0; i 0 ) { tmpptr1 = localhomtable[i][j].last; // fprintf( stderr, "reallocating, localhomtable[%d][%d].nokori = %d\n", i, j, localhomtable[i][j].nokori ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->extended = -1; tmpptr1->next = NULL; localhomtable[i][j].last = tmpptr1; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori ); } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori ); } tmpptr1->start1 = start1; tmpptr1->start2 = start2; tmpptr1->end1 = end1; tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; } // else { if( localhomtable[j][i].nokori++ > 0 ) { tmpptr2 = localhomtable[j][i].last; tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->extended = -1; tmpptr2->next = NULL; localhomtable[j][i].last = tmpptr2; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori ); } else { tmpptr2 = localhomtable[j]+i; // fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori ); } tmpptr2->start2 = start1; tmpptr2->start1 = start2; tmpptr2->end2 = end1; tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "i=%d, j=%d, st1=%d, en1=%d, opt = %f\n", i, j, tmpptr1->start1, tmpptr1->end1, opt ); } } } #if 0 void readlocalhomtable_target( FILE*fp, int ntarget, int njob, LocalHom **localhomtable, char *kozoarivec, int *targetmap ) { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2, it, jt; int **nlocalhom = NULL; LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( njob, njob ); for( i=0; i 0 ) { printf( "extending %d-%d, ->%d\n", i, j, nlocalhom[it][j] ); // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[it]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } // else if( jt != -1 ) { if( nlocalhom[jt][i]++ > 0 ) { printf( "extending %d-%d, ->%d\n", i, j, nlocalhom[jt][i] ); tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->next = NULL; } else tmpptr2 = localhomtable[jt]+i; tmpptr2->start2 = start1; // CHUUI!!!! tmpptr2->start1 = start2; tmpptr2->end2 = end1; // CHUUI!!!! tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt ); } } LocalHom *tmpptr; for( tmpptr = localhomtable[1]+11; tmpptr; tmpptr=tmpptr->next ) fprintf( stdout, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f, next=%p\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8, tmpptr->next ); FreeIntMtx( nlocalhom ); } void readlocalhomtable_half( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ) { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; LocalHom *tmpptr1=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( njob, njob ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j-i; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } } FreeIntMtx( nlocalhom ); } #endif void readlocalhomtable( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec ) { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( njob, njob ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } // else { if( nlocalhom[j][i]++ > 0 ) { tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->next = NULL; } else tmpptr2 = localhomtable[j]+i; tmpptr2->start2 = start1; // CHUUI!!!! tmpptr2->start1 = start2; tmpptr2->end2 = end1; // CHUUI!!!! tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt ); } } FreeIntMtx( nlocalhom ); } void readlocalhomtable_two( FILE*fp, int norg, int nadd, LocalHom **localhomtable, LocalHom **localhomtablex, char *kozoarivec ) // for test only { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; int **nlocalhomx = NULL; LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( norg, nadd ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } { if( nlocalhomx[j][i]++ > 0 ) { tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr2 = tmpptr2->next; tmpptr2->next = NULL; } else tmpptr2 = localhomtablex[j]+i; tmpptr2->start2 = start1+1; // CHUUI!!!! tmpptr2->start1 = start2; tmpptr2->end2 = end1+1; // CHUUI!!!! tmpptr2->end1 = end2; // tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr2->opt = opt; tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr2->overlapaa = overlapaa; tmpptr2->korh = *infor; // fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt ); } } FreeIntMtx( nlocalhom ); FreeIntMtx( nlocalhomx ); } void readlocalhomtable_one( FILE*fp, int norg, int nadd, LocalHom **localhomtable, char *kozoarivec ) // for test only { double opt; static char buff[B]; char infor[100]; int i, j, overlapaa, start1, end1, start2, end2; int **nlocalhom = NULL; LocalHom *tmpptr1=NULL; // by D.Mathog, a guess nlocalhom = AllocateIntMtx( norg, nadd ); for( i=0; i 0 ) { // fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->next = NULL; } else { tmpptr1 = localhomtable[i]+j; // fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] ); } tmpptr1->start1 = start1; // CHUUI!!!! tmpptr1->start2 = start2; tmpptr1->end1 = end1; // CHUUI!!!! tmpptr1->end2 = end2; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600; tmpptr1->overlapaa = overlapaa; tmpptr1->korh = *infor; // fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt ); } } FreeIntMtx( nlocalhom ); } void outlocalhom_part( LocalHom **localhom, int norg, int nadd ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8 ); } while( (tmpptr=tmpptr->next) ); } } void outlocalhom_target( LocalHom **localhom, int norg, int nadd ) { int i, j; LocalHom *tmpptr; for( i=0; inext ) { fprintf( stdout, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f, next=%p\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8, (void *)tmpptr->next ); } // while( (tmpptr=tmpptr->next) ); } } void outlocalhom_half( LocalHom **localhom, int nseq ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8, (void *)tmpptr->next ); } while( (tmpptr=tmpptr->next) ); } } void outlocalhom( LocalHom **localhom, int nseq ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt ); } while( (tmpptr=tmpptr->next) ); } } void outlocalhompt( LocalHom ***localhom, int n1, int n2 ) { int i, j; LocalHom *tmpptr; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt ); } while( (tmpptr=tmpptr->next) ); } } void initlocalhom1( LocalHom *lh ) { lh->start1 = -1; lh->end1 = -1; lh->start2 = -1; lh->end2 = -1; lh->opt = -1.0; lh->next = NULL; lh->nokori = 0; lh->extended = -1; lh->last = lh; lh->korh = 'h'; } void freelocalhom1( LocalHom *lh ) { if( lh == NULL ) return; LocalHom *tmpptr = lh; LocalHom *ppp; for( ; tmpptr; tmpptr=ppp ) { ppp = tmpptr->next; if( tmpptr!=lh ) { free( tmpptr ); continue; } tmpptr->start1 = -1; tmpptr->end1 = -1; tmpptr->start2 = -1; tmpptr->end2 = -1; tmpptr->opt = -1.0; tmpptr->next = NULL; tmpptr->nokori = 0; tmpptr->extended = -1; tmpptr->last = tmpptr; tmpptr->korh = 'h'; } } void FreeLocalHomTable_part( LocalHom **localhomtable, int n, int m ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable_two( LocalHom **localhomtable, int n, int m ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } for( i=n; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable_one( LocalHom **localhomtable, int n, int m ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable_half( LocalHom **localhomtable, int n ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } void FreeLocalHomTable( LocalHom **localhomtable, int n ) { int i, j; LocalHom *ppp, *tmpptr; for( i=0; inext; for( ; tmpptr; tmpptr=ppp ) { #if DEBUG fprintf( stderr, "i=%d, j=%d\n", i, j ); #endif ppp = tmpptr->next; if( tmpptr!=localhomtable[i]+j ) { #if DEBUG fprintf( stderr, "freeing %p\n", tmpptr ); #endif free( tmpptr ); } } } #if DEBUG fprintf( stderr, "freeing localhomtable[%d]\n", i ); #endif free( localhomtable[i] ); } #if DEBUG fprintf( stderr, "freeing localhomtable\n" ); #endif free( localhomtable ); #if DEBUG fprintf( stderr, "freed\n" ); #endif } char *progName( char *str ) { char *value; if( ( value = strrchr( str, '/' ) ) != NULL ) return( value+1 ); else return( str ); } static void tabtospace( char *str ) { char *p; // fprintf( stderr, "before = %s\n", str ); while( NULL != ( p = strchr( str , '\t' ) ) ) { *p = ' '; } // fprintf( stderr, "after = %s\n", str ); } static char *extractfirstword( char *str ) { char *val = str; tabtospace( str ); while( *str ) { if( val == str && *str == ' ' ) { val++; str++; } else if( *str != ' ' ) { str++; } else if( *str == ' ' ) { *str = 0; } } return( val ); } void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order, int namelen ) { int pos, pos2, j; if( namelen == -1 ) namelen = 10; pos = 0; fprintf( fp, " %d %d\n", nseq, maxlen ); while( pos < maxlen ) { for( j=0; j%s\n", name[k]+1 ); for( j=0; j%s\n", name[k]+1 ); for( j=0; j 19 ) break; } *rescalept = 1; for( i=0; i<20; i++ ) raw[20][i] = -1.0; while( !feof( mf ) ) { fgets( line, 999, mf ); sscanf( line, "%s", key ); if( !strcmp( key, "norescale" ) ) { reporterr( "no rescale\n" ); *rescalept = 0; break; } // else if( line[0] == 'f' ) else if( !strcmp( key, "frequency" ) ) { // fprintf( stderr, "found! line = %s\n", line ); ptr1 = line; for( j=0; j<20; j++ ) { while( !isdigit( *ptr1 ) && *ptr1 != '-' && *ptr1 != '.' ) ptr1++; raw[20][j] = atof( ptr1 ); // fprintf( stderr, "raw[20][]=%f, %c %d\n", raw[20][j], inorder[i], j ); ptr1 = strchr( ptr1, ' ' ); if( ptr1 == NULL && j<19) showaamtxexample(); } break; } } k = 0; for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { if( i != j ) { ii = MAX( map[i], map[j] ); jj = MIN( map[i], map[j] ); } else ii = jj = map[i]; val[k++] = raw[ii][jj]; // fprintf( stderr, "%c-%c, %f\n", aaorder[i], aaorder[j], val[k-1] ); } } for( i=0; i<20; i++ ) val[400+i] = raw[20][map[i]]; fprintf( stderr, "inorder = %s\n", inorder ); fclose( mf ); free( inorder ); free( line ); FreeDoubleMtx( raw ); free( map ); return( val ); } static void tab2space( char *s ) // nen no tame { while( *s ) { if( *s == '\t' ) *s = ' '; s++; } } static int readasubalignment( char *s, int *t, int *preservegaps ) { int v = 0; char status = 's'; char *pt = s; *preservegaps = 0; tab2space( s ); while( *pt ) { if( *pt == ' ' ) { status = 's'; } else { if( status == 's' ) { if( *pt == '\n' || *pt == '#' ) break; status = 'n'; t[v] = atoi( pt ); if( t[v] == 0 ) { fprintf( stderr, "Format error? Sequences must be specified as 1, 2, 3...\n" ); exit( 1 ); } if( t[v] < 0 ) *preservegaps = 1; t[v] = abs( t[v] ); t[v] -= 1; v++; } } pt++; } t[v] = -1; return( v ); } static int countspace( char *s ) { int v = 0; char status = 's'; char *pt = s; tab2space( s ); while( *pt ) { if( *pt == ' ' ) { status = 's'; } else { if( status == 's' ) { if( *pt == '\n' || *pt == '#' ) break; v++; status = 'n'; if( atoi( pt ) == 0 ) { fprintf( stderr, "Format error? Sequences should be specified as 1, 2, 3...\n" ); exit( 1 ); } } } pt++; } return( v ); } void readsubalignmentstable( int nseq, int **table, int *preservegaps, int *nsubpt, int *maxmempt ) { FILE *fp; char *line; int linelen = 1000000; int nmem; int lpos; int i, p; int *tab01; line = calloc( linelen, sizeof( char ) ); fp = fopen( "_subalignmentstable", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _subalignmentstable\n" ); exit( 1 ); } if( table == NULL ) { *nsubpt = 0; *maxmempt = 0; while( 1 ) { fgets( line, linelen-1, fp ); if( feof( fp ) ) break; if( line[strlen(line)-1] != '\n' ) { fprintf( stderr, "too long line? \n" ); exit( 1 ); } if( line[0] == '#' ) continue; if( atoi( line ) == 0 ) continue; nmem = countspace( line ); if( nmem > *maxmempt ) *maxmempt = nmem; (*nsubpt)++; } } else { tab01 = calloc( nseq, sizeof( int ) ); for( i=0; i nseq-1 ) { fprintf( stderr, "Sequence %d does not exist in the input sequence file.\n", p+1 ); exit( 1 ); } } lpos++; } free( tab01 ); } fclose( fp ); free( line ); } void readmccaskill( FILE *fp, RNApair **pairprob, int length ) { char gett[1000]; int *pairnum; int i; int left, right; double prob; int c; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i' || c == EOF ) { break; } fgets( gett, 999, fp ); // fprintf( stderr, "gett = %s\n", gett ); sscanf( gett, "%d %d %lf", &left, &right, &prob ); if( left >= length || right >= length ) { fprintf( stderr, "format error in hat4 - 2\n" ); exit( 1 ); } if( prob < 0.01 ) continue; // 080607, mafft ni dake eikyou if( left != right && prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", right, left, prob ); } } free( pairnum ); } void readpairfoldalign( FILE *fp, char *s1, char *s2, char *aln1, char *aln2, int q1, int q2, int *of1, int *of2, int sumlen ) { char gett[1000]; int *maptoseq1; int *maptoseq2; char dumc; int dumi; char sinseq[100], sinaln[100]; int posinseq, posinaln; int alnlen; int i; int pos1, pos2; char *pa1, *pa2; char qstr[1000]; *of1 = -1; *of2 = -1; maptoseq1 = AllocateIntVec( sumlen+1 ); maptoseq2 = AllocateIntVec( sumlen+1 ); posinaln = 0; // foldalign ga alingment wo kaesanaitok no tame. while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; ALIGNING", 10 ) ) break; } sprintf( qstr, "; ALIGNING %d against %d\n", q1+1, q2+1 ); if( strcmp( gett, qstr ) ) { fprintf( stderr, "Error in FOLDALIGN\n" ); fprintf( stderr, "qstr = %s, but gett = %s\n", qstr, gett ); exit( 1 ); } while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; --------", 10 ) ) break; } while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; ********", 10 ) ) break; // fprintf( stderr, "gett = %s\n", gett ); sscanf( gett, "%c %c %s %s %d %d", &dumc, &dumc, sinseq, sinaln, &dumi, &dumi ); posinaln = atoi( sinaln ); posinseq = atoi( sinseq ); // fprintf( stderr, "posinseq = %d\n", posinseq ); // fprintf( stderr, "posinaln = %d\n", posinaln ); maptoseq1[posinaln-1] = posinseq-1; } alnlen = posinaln; while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; --------", 10 ) ) break; } while( !feof( fp ) ) { fgets( gett, 999, fp ); if( !strncmp( gett, "; ********", 10 ) ) break; // fprintf( stderr, "gett = %s\n", gett ); sscanf( gett, "%c %c %s %s %d %d", &dumc, &dumc, sinseq, sinaln, &dumi, &dumi ); posinaln = atof( sinaln ); posinseq = atof( sinseq ); // fprintf( stderr, "posinseq = %d\n", posinseq ); // fprintf( stderr, "posinaln = %d\n", posinaln ); maptoseq2[posinaln-1] = posinseq-1; } if( alnlen != posinaln ) { fprintf( stderr, "Error in foldalign?\n" ); exit( 1 ); } pa1 = aln1; pa2 = aln2; for( i=0; i -1 ) *pa1++ = s1[pos1]; else *pa1++ = '-'; if( pos2 > -1 ) *pa2++ = s2[pos2]; else *pa2++ = '-'; } *pa1 = 0; *pa2 = 0; *of1 = 0; for( i=0; i -1 ) break; } *of2 = 0; for( i=0; i -1 ) break; } // fprintf( stderr, "*of1=%d, aln1 = :%s:\n", *of1, aln1 ); // fprintf( stderr, "*of2=%d, aln2 = :%s:\n", *of2, aln2 ); free( maptoseq1 ); free( maptoseq2 ); } int myatoi( char *in ) { if( in == NULL ) { fprintf( stderr, "Error in myatoi()\n" ); exit( 1 ); } return( atoi( in ) ); } unsigned long long myatoll( char *in ) { if( in == NULL ) { fprintf( stderr, "Error in myatoi()\n" ); exit( 1 ); } unsigned long long tanni; if( strchr( in, 'G' ) ) tanni = 1000 * 1000 * 1000; else if( strchr( in, 'M' ) ) tanni = 1000 * 1000; else if( strchr( in, 'k' ) ) tanni = 1000; else tanni = 1; return( tanni * atoi( in ) ); } double myatof( char *in ) { if( in == NULL ) { fprintf( stderr, "Error in myatof()\n" ); exit( 1 ); } return( atof( in ) ); } void reporterr( const char *str, ... ) { // static int loglen = 0; va_list args; if( gmsg ) { # if 1 // ato de sakujo static FILE *errtmpfp = NULL; if( errtmpfp == NULL ) errtmpfp = fopen( "maffterr", "w" ); else errtmpfp = fopen( "maffterr", "a" ); va_start( args, str ); vfprintf( errtmpfp, str, args ); va_end( args ); fclose( errtmpfp ); #endif #if 0 char *tmpptr; tmpptr = (char *)realloc( *gmsg, (loglen+10000) * sizeof( char ) ); if( tmpptr == NULL ) { fprintf( stderr, "Cannot relloc *gmsg\n" ); exit( 1 ); } *gmsg = tmpptr; va_start( args, str ); loglen += vsprintf( *gmsg + loglen, str, args ); va_end( args ); va_start( args, str ); loglen += vsprintf( *gmsg + loglen, str, args ); va_end( args ); *(*gmsg + loglen) = 0; if( loglen > gmsglen - 100 ) loglen = 0; // tekitou #endif } else { va_start( args, str ); vfprintf( stderr, str, args ); va_end( args ); // fflush( stderr ); // iru? } return; } #if !defined(mingw) && !defined(_MSC_VER) void setstacksize(rlim_t kStackSize ) { // const rlim_t kStackSize = 100 * 1024 * 1024; // min stack size = 10MB struct rlimit rl; int result; rlim_t originalsize; result = getrlimit(RLIMIT_STACK, &rl); if (result == 0) { originalsize = rl.rlim_cur; if (rl.rlim_cur < kStackSize) { rl.rlim_cur = kStackSize; reporterr( "stacksize: %d kb->%d kb\n", originalsize/1024, rl.rlim_cur/1024 ); result = setrlimit(RLIMIT_STACK, &rl); if (result != 0) { reporterr( "Warning: Failed to extend stack size. It's ok in most cases but there may be problems in --pileup and --chainedtree.\n" ); } } else reporterr( "stacksize: %d kb\n", rl.rlim_cur / 1024 ); } else reporterr( "Warning: Cannot check stack size.\n" ); } #endif void treeout_bin( FILE *fp, int n, int ***topol, double **len, Treedep *dep, int *nfilesfornode ) // dep ha nakutemo topol kara saigen dekiru. { int i; char c = '\n'; for( i=0; i%s\"\n", name[i]+1 ); reporterr( "Format has to be \">n0=1000 n1=51 n2=2 sequencename\"\n" ); exit( 1 ); } //reporterr( "tmp[0] = %s\n", tmp[0] ); //reporterr( "tmp[1] = %s\n", tmp[1] ); //reporterr( "tmp[2] = %s\n", tmp[2] ); w[i] = atof( tmp[0] ); if( w[i] == 0.0 ) { reporterr( "Error in reading \">%s\". n0=0?\n", name[i]+1 ); reporterr( "Format has to be \">n0=1000 n1=51 n2=2 sequencename\"\n" ); exit( 1 ); } w[i] = 1.0 / atof( tmp[0] ); if( w[i] == 0.0 || w[i] > 1.0 ) { reporterr( "Warning: weight for \">%s\" is %f\n", name[i]+1, w[i] ); } //reporterr( "w[%d] = %f\n", i, w[i] ); } } #if 0 #include #include void use_getrusage(void) { struct rusage r; if (getrusage(RUSAGE_SELF, &r) != 0) { /*Failure*/ } fprintf(stderr, "\nmaxrss = %ld MB\n", r.ru_maxrss/1000); } #endif void commongappick( int nseq, char **seq ) { int i, j, count; int len = strlen( seq[0] ); #if 1 int *mapfromnewtoold; mapfromnewtoold = calloc( len+1, sizeof( int ) ); for( i=0, count=0; i<=len; i++ ) { for( j=0; ji), &(((*extanch)+size)->j), &(((*extanch)+size)->starti), &(((*extanch)+size)->endi), &(((*extanch)+size)->startj), &(((*extanch)+size)->endj), &(((*extanch)+size)->score) ); // reporterr( "i=%d, j=%d, %d-%d, %d-%d, score=%d\n", (*extanch)[size].i, (*extanch)[size].j, (*extanch)[size].starti, (*extanch)[size].endi, (*extanch)[size].startj, (*extanch)[size].endj, (*extanch)[size].score ); ((*extanch)+size)->i -= 1; // 1-origin -> 0-origin ((*extanch)+size)->j -= 1; // 1-origin -> 0-origin ((*extanch)+size)->starti -= 1; ((*extanch)+size)->startj -= 1; ((*extanch)+size)->endi -= 1; ((*extanch)+size)->endj -= 1; if( (*extanch)[size].i >= nseq || (*extanch)[size].j >= nseq ) { reporterr( "\nOut of range? The input file has %d sequences but pair %d-%d was specified in line %d.\nNote that sequence IDs are counted from 1.\n", nseq, (*extanch)[size].i+1, (*extanch)[size].j+1, lineno ); exit( 1 ); } if( (*extanch)[size].i >= (*extanch)[size].j ) { reporterr( "\nFormat problem? \"%d %d\" in line %d.\nThe sequence id of the first column must be less than the second.\n", (*extanch)[size].i+1, (*extanch)[size].j+1, lineno ); exit( 1 ); } if( (*extanch)[size].starti > nogaplen[(*extanch)[size].i] ) { reporterr( "\nOut of range? len(seq%d)=%d, but anchor=%d in line %d.\nNote that position is counted from 1.\n", (*extanch)[size].i+1, nogaplen[(*extanch)[size].i], (*extanch)[size].starti+1, lineno ); exit( 1 ); } if( (*extanch)[size].startj > nogaplen[(*extanch)[size].j] ) { reporterr( "\nOut of range? len(seq%d)=%d, but anchor=%d in line %d.\nNote that position is counted from 1.\n", (*extanch)[size].j, nogaplen[(*extanch)[size].j]+1, (*extanch)[size].startj+1, lineno ); exit( 1 ); } size++; (*extanch)[size].i = (*extanch)[size].j = -1; } fclose( fp ); } static char id2nuc( int id ) // just for error message { if( id == 0 ) return 't'; if( id == 1 ) return 'c'; if( id == 2 ) return 'a'; if( id == 3 ) return 'g'; return -1; } static void id2codon( int id, char *codon ) // just for error message { codon[0] = id2nuc( id/16 ); id = id - 16 * id/16; codon[1] = id2nuc( id/4 ); id = id - 4 * id/4; codon[2] = id2nuc( id ); return; } static int nuc2id( char nuc ) { if( nuc == 't' ) return 0; if( nuc == 'c' ) return 1; if( nuc == 'a' ) return 2; if( nuc == 'g' ) return 3; return -1; } int codon2id( char *codon ) { int id0, id1, id2, id; id0 = nuc2id( codon[0] ); id1 = nuc2id( codon[1] ); id2 = nuc2id( codon[2] ); if( id0 < 0 || id1 < 0 | id2 < 0 ) return( -1 ); return id0 * 16 + id1 * 4 + id2; } double **loadcodonscore( FILE *fp, double **scoremtx ) { int i, j; char *buf = calloc( sizeof( char ), 1000 ); char codon1[3], codon2[3]; char aa1[1000], aa2[1000]; int id1, id2; double score; for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) scoremtx[i][j] = -99999; i = 0; while( fgets( buf, 1000, fp ) ) { // reporterr( "%s", buf ); if( buf[0] == '#' ) continue; if( buf[strlen(buf)-1] != '\n' ) { reporterr( "%s: too long in codonscore file.\n", buf ); exit( 1 ); } sscanf( buf, "%3s %s %3s %s %lf", codon1, aa1, codon2, aa2, &score ); // reporterr( "codon1=%s, id=%d\n", codon1, codon2id( codon1 ) ); // reporterr( "codon2=%s, id=%d\n", codon2, codon2id( codon2 ) ); // reporterr( "score=%f\n", score ); id1 = codon2id( codon1 ); id2 = codon2id( codon2 ); if( id1 < 0 || id2 < 0 ) { reporterr( "Cannot use codon pair %s - %s: Use small letter, a, c, g, t (instead of u)\n", codon1, codon2 ); exit( 1 ); } scoremtx[id1][id2] = scoremtx[id2][id1] = score; i++; } free( buf ); for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) { char codon[3]; if( scoremtx[i][j] == -99999 ) { id2codon( i, codon ); reporterr( "\nCodon score for %s", codon ); id2codon( j, codon ); reporterr( "-%s (id%d-id%d) is not given.\n", codon, i, j ); exit( 1 ); } // reporterr( "id%d-id%d = %f\n", i, j, scoremtx[i][j] ); } return( scoremtx ); } mafft-7.505-without-extensions/core/f2cl.c0000644000175000017500000001635214224501721020002 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 static char *comment; static char *orderfile; static int format; static int namelen; static int excludedashseq; static int extendedalphabet; static void fillspace( char *seq, int lenmax ) { int len = strlen( seq ); seq += len; lenmax -= len; while( lenmax-- ) *seq++ = ' '; *seq = 0; } void setmark_clustal( int nlen, int nseq, char **seq, char *mark ) { int i, j, k, nalpha; char firstletter; char *strong[9]; char *weaker[11]; int nstrong, nweaker; char s; if( dorp == 'd' ) { strong[0] = "TU"; nstrong = 1; weaker[0] = "AG"; weaker[1] = "CT"; weaker[2] = "CU"; nweaker = 2; nalpha = 10; } else { strong[0] = "STA"; strong[1] = "NEQK"; strong[2] = "NHQK"; strong[3] = "NDEQ"; strong[4] = "QHRK"; strong[5] = "MILV"; strong[6] = "MILF"; strong[7] = "HY"; strong[8] = "FYW"; nstrong = 9; weaker[0] = "CSA"; weaker[1] = "ATV"; weaker[2] = "SAG"; weaker[3] = "STNK"; weaker[4] = "STPA"; weaker[5] = "SGND"; weaker[6] = "SNDEQK"; weaker[7] = "NDEQHK"; weaker[8] = "NEQHRK"; weaker[9] = "FVLIM"; weaker[10] = "HFY"; nweaker = 11; nalpha = 20; } for( i=0; i= nalpha || amino_n[(unsigned char)firstletter] < 0 ) continue; for( j=0; j 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'c': comment = *++argv; fprintf( stderr, "comment = %s\n", comment ); --argc; goto nextoption; case 'r': orderfile = *++argv; fprintf( stderr, "orderfile = %s\n", orderfile ); --argc; goto nextoption; case 'n': namelen = myatoi( *++argv ); fprintf( stderr, "namelen = %d\n", namelen ); --argc; goto nextoption; case 'f': format = 'f'; break; case 'd': excludedashseq = 1; break; case 'y': format = 'y'; break; case 'E': extendedalphabet = 1; nblosum = -2; break; case 'N': extendedalphabet = 0; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static int *nlen, *onlen; static char **name, **oname, **seq, **oseq, *mark; static int *order, *oorder; int i, j; FILE *infp; FILE *orderfp; char gett[B]; int nlenmin; int nout; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "rb" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen_casepreserve( infp, &nlenmin ); rewind( infp ); seq = AllocateCharMtx( njob, nlenmax*2+1 ); mark = AllocateCharVec( nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); if( orderfile ) { orderfp = fopen( orderfile, "r" ); if( !orderfp ) { fprintf( stderr, "Cannot open %s\n", orderfile ); exit( 1 ); } for( i=0; i 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); free( largeM ); free( Mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; } return( 0.0 ); } // fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty ); if( orlgth1 == 0 ) { mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( largeM ); FreeIntVec( Mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); largeM = AllocateFloatVec( ll2+2 ); Mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeIntMtx( commonJP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); commonJP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijpi = commonIP; ijpj = commonJP; #if 0 for( i=0; i", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijpipt = i - 1; *ijpjpt = mpi; } g = *prept; if( g > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "m%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijpipt = *mpjpt; *ijpjpt = j - 1; //IRU! } g = *prept; if( g > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif g = tbk + fpenalty_OP; // g = tbk; if( g > wm ) { wm = g; *ijpipt = tbki; *ijpjpt = tbkj; // fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); } // g = Mi; if( Mi > tbk ) { tbk = Mi; //error desu. tbki = i-1; tbkj = Mpi; } // g = *Mjpt; if( *Mjpt > tbk ) { tbk = *Mjpt; tbki = *Mpjpt; tbkj = j-1; } // tbk += fpenalty_EX;// + foffset; // g = *prept; if( *prept > *Mjpt ) { *Mjpt = *prept; *Mpjpt = i-1; } // *Mjpt += fpenalty_EX;// + foffset; // g = *prept; if( *prept > Mi ) { Mi = *prept; Mpi = j-1; } // Mi += fpenalty_EX;// + foffset; // fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt ); // fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] ); if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); *ijpipt = localstop; // *ijpjpt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt += wm; ijpipt++; ijpjpt++; mjpt++; Mjpt++; prept++; mpjpt++; Mpjpt++; curpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } #if DEBUG2 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif if( ijpi[endali][endalj] == localstop ) // && ijpj[endali][endalj] == localstop ) { strcpy( seq1[0], "" ); strcpy( seq2[0], "" ); *off1pt = *off2pt = 0; return( 0.0 ); } gentracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, endali, endalj ); // fprintf( stderr, "### impmatch = %f\n", *impmatch ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif return( maxwm ); } mafft-7.505-without-extensions/core/blosum.c0000644000175000017500000004604614224501721020460 0ustar nileshnilesh#define DEFAULTGOP_B -1530 #define DEFAULTGEP_B -00 #define DEFAULTOFS_B -123 /* +10 -- -50 teido ka ? */ void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp, int *rescalept ) { /* char locaminod[26] = "GASTPLIMVDNEQFYWKRHCXXX.-U"; */ // char locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-U"; char locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-J"; char locgrpd[] = { 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, 6, 6, 6, 1, }; double freqd[20] = { 0.077, 0.051, 0.043, 0.052, 0.020, 0.041, 0.062, 0.074, 0.023, 0.052, 0.091, 0.059, 0.024, 0.040, 0.051, 0.069, 0.059, 0.014, 0.032, 0.066, }; double tmpmtx30[] = { 4, -1, 8, 0, -2, 8, 0, -1, 1, 9, -3, -2, -1, -3, 17, 1, 3, -1, -1, -2, 8, 0, -1, -1, 1, 1, 2, 6, 0, -2, 0, -1, -4, -2, -2, 8, -2, -1, -1, -2, -5, 0, 0, -3, 14, 0, -3, 0, -4, -2, -2, -3, -1, -2, 6, -1, -2, -2, -1, 0, -2, -1, -2, -1, 2, 4, 0, 1, 0, 0, -3, 0, 2, -1, -2, -2, -2, 4, 1, 0, 0, -3, -2, -1, -1, -2, 2, 1, 2, 2, 6, -2, -1, -1, -5, -3, -3, -4, -3, -3, 0, 2, -1, -2, 10, -1, -1, -3, -1, -3, 0, 1, -1, 1, -3, -3, 1, -4, -4, 11, 1, -1, 0, 0, -2, -1, 0, 0, -1, -1, -2, 0, -2, -1, -1, 4, 1, -3, 1, -1, -2, 0, -2, -2, -2, 0, 0, -1, 0, -2, 0, 2, 5, -5, 0, -7, -4, -2, -1, -1, 1, -5, -3, -2, -2, -3, 1, -3, -3, -5, 20, -4, 0, -4, -1, -6, -1, -2, -3, 0, -1, 3, -1, -1, 3, -2, -2, -1, 5, 9, 1, -1, -2, -2, -2, -3, -3, -3, -3, 4, 1, -2, 0, 1, -4, -1, 1, -3, 1, 5, 0, -2, 4, 5, -2, -1, 0, 0, -2, -2, -1, 0, -2, -3, -2, 0, 0, -5, -3, -2, 5, 0, 0, -1, 0, 0, 4, 5, -2, 0, -3, -1, 1, -1, -4, 0, -1, -1, -1, -2, -3, 0, 4, 0, -1, 0, -1, -2, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, -2, -1, 0, -1, 0, -1, }; double tmpmtx45[] = { 5, -2, 7, -1, 0, 6, -2, -1, 2, 7, -1, -3, -2, -3, 12, -1, 1, 0, 0, -3, 6, -1, 0, 0, 2, -3, 2, 6, 0, -2, 0, -1, -3, -2, -2, 7, -2, 0, 1, 0, -3, 1, 0, -2, 10, -1, -3, -2, -4, -3, -2, -3, -4, -3, 5, -1, -2, -3, -3, -2, -2, -2, -3, -2, 2, 5, -1, 3, 0, 0, -3, 1, 1, -2, -1, -3, -3, 5, -1, -1, -2, -3, -2, 0, -2, -2, 0, 2, 2, -1, 6, -2, -2, -2, -4, -2, -4, -3, -3, -2, 0, 1, -3, 0, 8, -1, -2, -2, -1, -4, -1, 0, -2, -2, -2, -3, -1, -2, -3, 9, 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -3, -1, -2, -2, -1, 4, 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, 2, 5, -2, -2, -4, -4, -5, -2, -3, -2, -3, -2, -2, -2, -2, 1, -3, -4, -3, 15, -2, -1, -2, -2, -3, -1, -2, -3, 2, 0, 0, -1, 0, 3, -3, -2, -1, 3, 8, 0, -2, -3, -3, -1, -3, -3, -3, -3, 3, 1, -2, 1, 0, -3, -1, 0, -3, -1, 5, }; double tmpmtx50[] = { 5, -2, 7, -1, -1, 7, -2, -2, 2, 8, -1, -4, -2, -4, 13, -1, 1, 0, 0, -3, 7, -1, 0, 0, 2, -3, 2, 6, 0, -3, 0, -1, -3, -2, -3, 8, -2, 0, 1, -1, -3, 1, 0, -2, 10, -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, }; #if 0 double tmpmtx62[] = { 6, -2, 8, -2, -1, 8, -3, -2, 2, 9, -1, -5, -4, -5, 13, -1, 1, 0, 0, -4, 8, -1, 0, 0, 2, -5, 3, 7, 0, -3, -1, -2, -4, -3, -3, 8, -2, 0, 1, -2, -4, 1, 0, -3, 11, -2, -4, -5, -5, -2, -4, -5, -6, -5, 6, -2, -3, -5, -5, -2, -3, -4, -5, -4, 2, 6, -1, 3, 0, -1, -5, 2, 1, -2, -1, -4, -4, 7, -1, -2, -3, -5, -2, -1, -3, -4, -2, 2, 3, -2, 8, -3, -4, -4, -5, -4, -5, -5, -5, -2, 0, 1, -5, 0, 9, -1, -3, -3, -2, -4, -2, -2, -3, -3, -4, -4, -2, -4, -5, 11, 2, -1, 1, 0, -1, 0, 0, 0, -1, -4, -4, 0, -2, -4, -1, 6, 0, -2, 0, -2, -1, -1, -1, -2, -3, -1, -2, -1, -1, -3, -2, 2, 7, -4, -4, -6, -6, -3, -3, -4, -4, -4, -4, -2, -4, -2, 1, -5, -4, -4, 16, -3, -3, -3, -5, -4, -2, -3, -5, 3, -2, -2, -3, -1, 4, -4, -3, -2, 3, 10, 0, -4, -4, -5, -1, -3, -4, -5, -5, 4, 1, -3, 1, -1, -4, -2, 0, -4, -2, 6, }; #else double tmpmtx62[] = { 5.893685, -2.120252, 8.210189, -2.296072, -0.659672, 8.479856, -2.630151, -2.408668, 1.907550, 8.661363, -0.612761, -5.083814, -3.989626, -5.189966, 12.873172, -1.206025, 1.474162, 0.002529, -0.470069, -4.352838, 7.927704, -1.295821, -0.173087, -0.402015, 2.265459, -5.418729, 2.781955, 7.354247, 0.239392, -3.456163, -0.634136, -1.970281, -3.750621, -2.677743, -3.165266, 8.344902, -2.437724, -0.374792, 0.867735, -1.678363, -4.481724, 0.672051, -0.176497, -3.061315, 11.266586, -1.982718, -4.485360, -4.825558, -4.681732, -1.841495, -4.154454, -4.791538, -5.587336, -4.847345, 5.997760, -2.196882, -3.231860, -5.068375, -5.408471, -1.916207, -3.200863, -4.269723, -5.440437, -4.180099, 2.282412, 5.774148, -1.101017, 3.163105, -0.268534, -1.052724, -4.554510, 1.908859, 1.163010, -2.291924, -1.081539, -4.005209, -3.670219, 6.756827, -1.402897, -2.050705, -3.226290, -4.587785, -2.129758, -0.631437, -2.997038, -4.014898, -2.326896, 1.690191, 2.987638, -2.032119, 8.088951, -3.315080, -4.179521, -4.491005, -5.225795, -3.563219, -4.746598, -4.788639, -4.661029, -1.851231, -0.241317, 0.622170, -4.618016, 0.018880, 9.069126, -1.221394, -3.162863, -3.000581, -2.220163, -4.192770, -1.922917, -1.674258, -3.200320, -3.241363, -4.135001, -4.290107, -1.520445, -3.714633, -5.395930, 11.046892, 1.673639, -1.147170, 0.901353, -0.391548, -1.312485, -0.151708, -0.220375, -0.438748, -1.322366, -3.522266, -3.663923, -0.305170, -2.221304, -3.553533, -1.213470, 5.826527, -0.068042, -1.683495, -0.069138, -1.576054, -1.299983, -1.012997, -1.294878, -2.363065, -2.528844, -1.076382, -1.796229, -1.004336, -0.999449, -3.161436, -1.612919, 2.071710, 6.817956, -3.790328, -4.019108, -5.543911, -6.321502, -3.456164, -2.919725, -4.253197, -3.737232, -3.513238, -3.870811, -2.447829, -4.434676, -2.137255, 1.376341, -5.481260, -4.127804, -3.643382, 15.756041, -2.646022, -2.540799, -3.122641, -4.597428, -3.610671, -2.131601, -3.030688, -4.559647, 2.538948, -1.997058, -1.593097, -2.730047, -1.492308, 4.408690, -4.379667, -2.528713, -2.408996, 3.231335, 9.892544, -0.284140, -3.753871, -4.314525, -4.713963, -1.211518, -3.297575, -3.663425, -4.708118, -4.676220, 3.820569, 1.182672, -3.393535, 1.030861, -1.273542, -3.523054, -2.469318, -0.083276, -4.251392, -1.811267, 5.653391, }; #endif double tmpmtx80[] = { 7, -3, 9, -3, -1, 9, -3, -3, 2, 10, -1, -6, -5, -7, 13, -2, 1, 0, -1, -5, 9, -2, -1, -1, 2, -7, 3, 8, 0, -4, -1, -3, -6, -4, -4, 9, -3, 0, 1, -2, -7, 1, 0, -4, 12, -3, -5, -6, -7, -2, -5, -6, -7, -6, 7, -3, -4, -6, -7, -3, -4, -6, -7, -5, 2, 6, -1, 3, 0, -2, -6, 2, 1, -3, -1, -5, -4, 8, -2, -3, -4, -6, -3, -1, -4, -5, -4, 2, 3, -3, 9, -4, -5, -6, -6, -4, -5, -6, -6, -2, -1, 0, -5, 0, 10, -1, -3, -4, -3, -6, -3, -2, -5, -4, -5, -5, -2, -4, -6, 12, 2, -2, 1, -1, -2, -1, -1, -1, -2, -4, -4, -1, -3, -4, -2, 7, 0, -2, 0, -2, -2, -1, -2, -3, -3, -2, -3, -1, -1, -4, -3, 2, 8, -5, -5, -7, -8, -5, -4, -6, -6, -4, -5, -4, -6, -3, 0, -7, -6, -5, 16, -4, -4, -4, -6, -5, -3, -5, -6, 3, -3, -2, -4, -3, 4, -6, -3, -3, 3, 11, -1, -4, -5, -6, -2, -4, -4, -6, -5, 4, 1, -4, 1, -2, -4, -3, 0, -5, -3, 7, }; double tmpmtx90[] = { 5, -2, 6, -2, -1, 7, -3, -3, 1, 7, -1, -5, -4, -5, 9, -1, 1, 0, -1, -4, 7, -1, -1, -1, 1, -6, 2, 6, 0, -3, -1, -2, -4, -3, -3, 6, -2, 0, 0, -2, -5, 1, -1, -3, 8, -2, -4, -4, -5, -2, -4, -4, -5, -4, 5, -2, -3, -4, -5, -2, -3, -4, -5, -4, 1, 5, -1, 2, 0, -1, -4, 1, 0, -2, -1, -4, -3, 6, -2, -2, -3, -4, -2, 0, -3, -4, -3, 1, 2, -2, 7, -3, -4, -4, -5, -3, -4, -5, -5, -2, -1, 0, -4, -1, 7, -1, -3, -3, -3, -4, -2, -2, -3, -3, -4, -4, -2, -3, -4, 8, 1, -1, 0, -1, -2, -1, -1, -1, -2, -3, -3, -1, -2, -3, -2, 5, 0, -2, 0, -2, -2, -1, -1, -3, -2, -1, -2, -1, -1, -3, -2, 1, 6, -4, -4, -5, -6, -4, -3, -5, -4, -3, -4, -3, -5, -2, 0, -5, -4, -4, 11, -3, -3, -3, -4, -4, -3, -4, -5, 1, -2, -2, -3, -2, 3, -4, -3, -2, 2, 8, -1, -3, -4, -5, -2, -3, -3, -5, -4, 3, 0, -3, 0, -2, -3, -2, -1, -3, -3, 5, }; double tmpmtx100[] = { 8, -3,10, -4,-2,11, -5,-5, 1,10, -2,-8,-5,-8,14, -2, 0,-1,-2,-7,11, -3,-2,-2, 2,-9, 2,10, -1,-6,-2,-4,-7,-5,-6, 9, -4,-1, 0,-3,-8, 1,-2,-6,13, -4,-7,-7,-8,-3,-6,-7,-9,-7, 8, -4,-6,-7,-8,-5,-5,-7,-8,-6, 2, 8, -2, 3,-1,-3,-8, 2, 0,-5,-3,-6,-6,10, -3,-4,-5,-8,-4,-2,-5,-7,-5, 1, 3,-4,12, -5,-6,-7,-8,-4,-6,-8,-8,-4,-2, 0,-6,-1,11, -2,-5,-5,-5,-8,-4,-4,-6,-5,-7,-7,-3,-5,-7,12, 1,-3, 0,-2,-3,-2,-2,-2,-3,-5,-6,-2,-4,-5,-3, 9, -1,-3,-1,-4,-3,-3,-3,-5,-4,-3,-4,-3,-2,-5,-4, 2, 9, -6,-7,-8,-10,-7,-5,-8,-7,-5,-6,-5,-8,-4, 0,-8,-7,-7,17, -5,-5,-5,-7,-6,-4,-7,-8, 1,-4,-4,-5,-5, 4,-7,-5,-5, 2,12, -2,-6,-7,-8,-3,-5,-5,-8,-7, 4, 0,-5, 0,-3,-6,-4,-1,-5,-5, 8, }; double tmpmtx0[] = { 2.4, -0.6, 4.7, -0.3, 0.3, 3.8, -0.3, -0.3, 2.2, 4.7, 0.5, -2.2, -1.8, -3.2, 11.5, -0.2, 1.5, 0.7, 0.9, -2.4, 2.7, 0.0, 0.4, 0.9, 2.7, -3.0, 1.7, 3.6, 0.5, -1.0, 0.4, 0.1, -2.0, -1.0, -0.8, 6.6, -0.8, 0.6, 1.2, 0.4, -1.3, 1.2, 0.4, -1.4, 6.0, -0.8, -2.4, -2.8, -3.8, -1.1, -1.9, -2.7, -4.5, -2.2, 4.0, -1.2, -2.2, -3.0, -4.0, -1.5, -1.6, -2.8, -4.4, -1.9, 2.8, 4.0, -0.4, 2.7, 0.8, 0.5, -2.8, 1.5, 1.2, -1.1, 0.6, -2.1, -2.1, 3.2, -0.7, -1.7, -2.2, -3.0, -0.9, -1.0, -2.0, -3.5, -1.3, 2.5, 2.8, -1.4, 4.3, -2.3, -3.2, -3.1, -4.5, -0.8, -2.6, -3.9, -5.2, -0.1, 1.0, 2.0, -3.3, 1.6, 7.0, 0.3, -0.9, -0.9, -0.7, -3.1, -0.2, -0.5, -1.6, -1.1, -2.6, -2.3, -0.6, -2.4, -3.8, 7.6, 1.1, -0.2, 0.9, 0.5, 0.1, 0.2, 0.2, 0.4, -0.2, -1.8, -2.1, 0.1, -1.4, -2.8, 0.4, 2.2, 0.6, -0.2, 0.5, 0.0, -0.5, 0.0, -0.1, -1.1, -0.3, -0.6, -1.3, 0.1, -0.6, -2.2, 0.1, 1.5, 2.5, -3.6, -1.6, -3.6, -5.2, -1.0, -2.7, -4.3, -4.0, -0.8, -1.8, -0.7, -3.5, -1.0, 3.6, -5.0, -3.3, -3.5, 14.2, -2.2, -1.8, -1.4, -2.8, -0.5, -1.7, -2.7, -4.0, 2.2, -0.7, 0.0, -2.1, -0.2, 5.1, -3.1, -1.9, -1.9, 4.1, 7.8, 0.1, -2.0, -2.2, -2.9, 0.0, -1.5, -1.9, -3.3, -2.0, 3.1, 1.8, -1.7, 1.6, 0.1, -1.8, -1.0, 0.0, -2.6, -1.1, 3.4, }; int i, j, count; double av; double *tmpmtx; if( n == 30 ) tmpmtx = tmpmtx30; else if( n == 45 ) tmpmtx = tmpmtx45; else if( n == 50 ) tmpmtx = tmpmtx50; else if( n == 62 ) tmpmtx = tmpmtx62; else if( n == 80 ) tmpmtx = tmpmtx80; else if( n == 90 ) tmpmtx = tmpmtx90; else if( n == 100 ) tmpmtx = tmpmtx100; else if( n == 0 ) tmpmtx = tmpmtx0; else if( n == -1 ) tmpmtx = loadaamtx( rescalept ); else { fprintf( stderr, "blosum %d ?\n", n ); exit( 1 ); } count = 0; for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { matrix[i][j] = matrix[j][i] = (double)tmpmtx[count++]; } } if( n == -1 && tmpmtx[400] != -1.0 ) { for( i=0; i<20; i++ ) freq[i] = tmpmtx[400+i]; av = 0.0; for( i=0; i<20; i++ ) av += freq[i]; for( i=0; i<20; i++ ) freq[i] /= av; } else for( i=0; i<20; i++ ) freq[i] = freqd[i]; if( n == -1 ) free( tmpmtx ); #if 0 av = 0.0; for( i=0; i<20; i++ ) av += matrix[i][i]; av /= 20; fprintf( stdout, "av = %f\n", av ); for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) matrix[i][j] /= av; av = wav = 0; count = 0; wcount = 0.0; tmptmp = 0.0; for( i=0; i<20; i++ ) { fprintf( stdout, "freq[%d] = %f\n", i, freq[i] ); tmptmp += freq[i]; for( j=0; j<20; j++ ) { av += matrix[i][j]; wav += freq[i] * freq[j] * matrix[i][j]; count++; wcount += freq[i] * freq[j]; } } av /= count; wav /= wcount; fprintf( stdout, "av = %f\n", av ); fprintf( stdout, "wav = %f\n", wav ); fprintf( stdout, "wcount = %f\n", wcount ); fprintf( stdout, "tmptmp = %f\n", tmptmp ); for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { fprintf( stderr, "## %d-%d, %f\n", i, j, matrix[i][j] ); } } exit( 1 ); #endif for( i=0; i<26; i++ ) amino[i] = locaminod[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i]; } static int checkchar( int i ) { if( i > 0xff || i <= 0x00 || i == 0x3E || i == 0x3D || i == 0x3C || i == 0x2D || i == 0x20 || i == 0x0d || i == 0x0a ) return( 1 ); return( 0 ); } static void overridematrix( double **matrix ) { char buf[500]; FILE *fp; unsigned int i1, i2; double v; char *bpt; int nread; fp = fopen( "_aamtx", "r" ); if( fp == NULL ) { fprintf( stderr, "warning: cannot open scorematrix. Use the default one.\n" ); // f2cl.c de tomaranai youni // exit( 1 ); return; } while( 1 ) { fgets( buf, 499, fp ); if( feof( fp ) ) break; if( ( bpt = strchr( buf, '#' ) ) ) *bpt = 0; i1 = i2 = 0; nread = sscanf( buf, "%x %x %lf", &i1, &i2, &v ); if( nread == EOF ) continue; if( nread != 3 ) { reporterr( "Format error in this line?\n" ); reporterr( "%s\n", buf ); // reporterr( "To set a score, 100, for a match of 0x41 and 0x42, \n" ); // reporterr( "0x41 0x42 100 \n" ); exit( 1 ); } if( checkchar( i1 ) ) { reporterr( "%c=0x%x cannot be used (1)\n", i1, i1 ); exit( 1 ); } if( checkchar( i2 ) ) { reporterr( "%c=0x%x cannot be used (2)\n", i2, i2 ); exit( 1 ); } reporterr( "Score(%c=0x%x,%c=0x%x)=%f\n", i1, i1, i2, i2, v ); matrix[i1][i2] = v; matrix[i2][i1] = v; // 2018/May/11 } fclose( fp ); } void extendedmtx( double **matrix, double *freq, unsigned char *amino, char *amino_grp ) { int i; int j; for( i=0; i -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { // *matchpt = 0.0; // add dakara for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif free( scarr ); } #if 0 // [seq][alphabet] static void match_calc( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; int count = 0; double *matchpt; double **cpmxpdpt; int **cpmxpdnpt; int cpkd; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { *matchpt = 0.0; for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif free( scarr ); } #endif // [alphabet][seq] static void match_calc_alphabet_seq( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int start2, int lgth2, double **doublework, int **intwork, int initialize ) { #if FASTMATCHCALC // fprintf( stderr, "\nmatch_calc... %d", i1 ); int j, l, p; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *matchpt, *cpmxpdpt, **cpmxpdptpt; int *cpmxpdnpt, **cpmxpdnptpt; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // reporterr( "lgth2=%d. j=%d-%d, p=%d-%d\n", lgth2, 0, lgth2, start2, start2+lgth2 ); if( initialize ) { int count = 0; for( j=0,p=start2; j-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); // fprintf( stderr, "done\n" ); #else int j, k, l, p; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0,p=start2; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void createcpmxresult( double **cpmxresult, double eff1, double eff2, double **cpmx1, double **cpmx2, char *gaptable1, char *gaptable2 ) { int i, j, p; int alen = strlen( gaptable1 ); // reporterr( "eff1 = %f, eff2=%f\n", eff1, eff2 ); #if 1 // sukoshi osoi for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } #if 0 else if( jen == fulllen2-1 ) { fprintf( stderr, "searching lastverticalw\n" ); wm = lastverticalw[0]; for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } } else if( ien == fulllen1-1 ) { fprintf( stderr, "searching lasthorizontalw\n" ); wm = lasthorizontalw[0]; for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } #endif for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } klim = gt1bk + lgth1+lgth2 - gaptable1; // reporterr( "klim = %d, strlen=%d\n", klim, strlen( gaptable1 ) ); // klim = strlen( gaptable1 ); if( strchr( gaptable1, '-' ) ) for( i=0; i 0 ) headgapfreq1 = gapfreq1f[-1]; else headgapfreq1 = headgapfreq1_g; if( jst > 0 ) headgapfreq2 = gapfreq2f[-1]; else headgapfreq2 = headgapfreq2_g; #if STOREWM char ttt1[10000], ttt2[10000]; #endif lgth1 = ien-ist+1; lgth2 = jen-jst+1; #if STOREWM strncpy( ttt1, seq1[0]+ist, lgth1 ); ttt1[lgth1] = 0; strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt2[lgth2] = 0; fprintf( stderr, "in _tanni ist,ien = %d,%d, lgth1=%d\n", ist, ien, lgth1 ); fprintf( stderr, "in _tanni jst,jen = %d,%d, lgth2=%d\n", jst, jen, lgth2 ); fprintf( stderr, "ttt1 = %s\n", ttt1 ); fprintf( stderr, "ttt2 = %s\n", ttt2 ); #endif #if 0 fprintf( stderr, "in _tanni ist,ien = %d,%d, fulllen1=%d\n", ist, ien, fulllen1 ); fprintf( stderr, "in _tanni jst,jen = %d,%d, fulllen2=%d\n", jst, jen, fulllen2 ); fprintf( stderr, "in _tanni seq1[0] = %-*.*s\n", ien-ist+1, ien-ist+1, seq1[0]+ist ); fprintf( stderr, "in _tanni seq2[0] = %-*.*s\n", jen-jst+1, jen-jst+1, seq2[0]+jst ); #endif ll1 = ( (int)(lgth1) ) + 100; ll2 = ( (int)(lgth2) ) + 100; // aseq1 = AllocateCharMtx( icyc, 0 ); // aseq2 = AllocateCharMtx( jcyc, 0 ); // aseq1bk = AllocateCharMtx( icyc, lgth1+lgth2+100 ); // aseq2bk = AllocateCharMtx( jcyc, lgth1+lgth2+100 ); // for( i=0; i", wm ); #endif g = mi + fgcp2[j-1] * gapfreq1f[i]; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = -( j - mpi ); } g = *prept + ogcp2[j] * gapfreq1f[i-1]; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1] * gapfreq2f[j]; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i] * gapfreq2f[j-1]; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; ijppt++; mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); gt1 = gt1bk = AllocateCharVec( ien-ist+jen-jst+3 ); gt2 = gt2bk = AllocateCharVec( ien-ist+jen-jst+3 ); Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, ist, ien, jst, jen, fulllen1, fulllen2, tailgp, >1, >2 ); strcpy( mgt1, gt1 ); strcpy( mgt2, gt2 ); #if 0 fprintf( stderr, "res after _tanni = %s\n", mseq1[0] ); fprintf( stderr, "res after _tanni = %s\n", mseq2[0] ); fprintf( stderr, "gt1 after _tanni = %s\n", gt1 ); fprintf( stderr, "gt1 after _tanni = %s\n", gt2 ); #endif free( gt1bk ); free( gt2bk ); // for( i=0; i 0 ) headgapfreq1 = gapfreq1f[-1]; else headgapfreq1 = headgapfreq1_g; if( jst > 0 ) headgapfreq2 = gapfreq2f[-1]; else headgapfreq2 = headgapfreq2_g; depth++; reccycle++; lgth1 = ien-ist+1; lgth2 = jen-jst+1; // if( lgth1 < 5 ) // fprintf( stderr, "\nWARNING: lgth1 = %d\n", lgth1 ); // if( lgth2 < 5 ) // fprintf( stderr, "\nWARNING: lgth2 = %d\n", lgth2 ); // #if STOREWM fprintf( stderr, "==== MSalign (depth=%d, reccycle=%d), ist=%d, ien=%d, jst=%d, jen=%d\n", depth, reccycle, ist, ien, jst, jen ); strncpy( ttt1, seq1[0]+ist, lgth1 ); strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt1[lgth1] = 0; ttt2[lgth2] = 0; fprintf( stderr, "seq1 = %s\n", ttt1 ); fprintf( stderr, "seq2 = %s\n", ttt2 ); #endif if( lgth2 <= 0 ) // lgth1 <= 0 ha? { // fprintf( stderr, "\n\n==== jimei\n\n" ); // exit( 1 ); for( i=0; i", wm ); #endif g = mi + fgcp2[j-1] * gapfreq1f[i]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j] * gapfreq1f[i-1]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1] * gapfreq2f[j]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i] * gapfreq2f[j-1]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } #else for( j=lgth2-1; j>-1; --j ) { m[j] = currentw[j+1] + fgcp1[lgth1-2] * gapfreq2f[j+1]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } #endif // for( j=0; j=imid; i-- ) firstm = -9999999.9; // firstmp = lgth1-1; firstmp = lgth1; for( i=lgth1-2; i>-1; i-- ) { #ifdef enablemultithread // fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref ); if( chudanpt && *chudanpt != chudanref ) { // fprintf( stderr, "\n\n## CHUUDAN!!! kouhan\n" ); *chudanres = 1; freearrays_rec1 ( w1, w2, initverticalw, lastverticalw, midw, midm, midn, jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj, m, mp, doublework, intwork #if STOREWM , WMMTX, WMMTX2 #endif ); freearrays_rec2( gaps, aseq1, aseq2 ); return( -1.0 ); } #endif wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; // match_calc( currentw, seq1, seq2, i, lgth2 ); // match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 ); match_calc_alphabet_seq( n_dynamicmtx, currentw, cpmx1pt, cpmx2pt, ist+i, jst, lgth2, doublework, intwork, 0 ); currentw[lgth2-1] = initverticalw[i]; // m[lgth2] = fgcp1[i]; // WMMTX2[i][lgth2] += m[lgth2]; // fprintf( stderr, "m[] = %f\n", m[lgth2] ); mi = previousw[lgth2-1] + fgcp2[lgth2-2] * gapfreq1f[i+1]; // mi = previousw[lgth2-1]; mpi = lgth2 - 1; mjpt = m + lgth2 - 2; prept = previousw + lgth2 - 1; curpt = currentw + lgth2 - 2; mpjpt = mp + lgth2 - 2; for( j=lgth2-2; j>-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1] * gapfreq1f[i]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j] * gapfreq1f[i+1]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1] * gapfreq2f[j]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i] * gapfreq2f[j+1]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j+1] += *mjpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if STOREWM fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j 100 ) // naze 100 if( imid < firstmp-1 ) // naze 100 { jumpi = firstmp; imid = firstmp+1; } #if 0 else { jumpi = 0; imid = 1; } #endif #endif } #if 0 else if( jmid == lgth2 ) { fprintf( stderr, "CHUI1!\n" ); jumpi=0; jumpj=0; imid=jumpforwi[0]; jmid=lgth2-1; } #else // 060414 else if( jmid >= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { // fprintf( stderr, "#### CHUI3!\n" ); imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; if( imid == jumpi ) jumpi = imid-1; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif // fprintf( stderr, "#### FINAL i=%d, jumpi N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif freearrays_rec1 ( w1, w2, initverticalw, lastverticalw, midw, midm, midn, jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj, m, mp, doublework, intwork #if STOREWM , WMMTX, WMMTX2 #endif ); // fprintf( stderr, "==== calling myself (first), depth=%d\n", depth ); #if 0 fprintf( stderr, "seq1[0] = %.*s\n", lgth1, seq1[0] ); fprintf( stderr, "seq2[0] = %.*s\n", lgth2, seq2[0] ); #endif value = MSalignmm_rec( n_dynamicmtx, icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1pt, cpmx2pt, ist, ist+jumpi, jst, jst+jumpj, alloclen, fulllen1, fulllen2, aseq1, aseq2, agt1, agt2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada #if 0 reporterr( "length1=%d -> %d? %d?\n", lgth1, strlen(seq1[0]), strlen(aseq1[0]) ); reporterr( "after first _rec\n" ); if( strlen( aseq1[0] ) != strlen( agt1 ) ) reporterr( "WARNING\n" ); fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] ); fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] ); fprintf( stderr, "agt1 = %s\n", agt1 ); fprintf( stderr, "agt2 = %s\n", agt2 ); #endif #if MEMSAVE #else for( i=0; i 0 ) { // for( i=0; i 0 ) { // for( i=0; i 1 || maxwm - value > 1 ) { fprintf( stderr, "WARNING value = %f, but maxwm = %f\n", value, maxwm ); for( i=0; i1-%d\n%s\n", i, mseq1[i] ); fprintf( stderr, "%s\n", aseq1[i] ); } for( i=0; i2-%d\n%s\n", i, mseq2[i] ); fprintf( stderr, "%s\n", aseq2[i] ); } // exit( 1 ); } else { fprintf( stderr, "value = %.0f, maxwm = %.0f -> ok\n", value, maxwm ); } #endif #if MEMSAVE #else for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalignmm_rec( n_dynamicmtx, icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1pt, cpmx2pt, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, mgt1, mgt2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp, headgapfreq1, headgapfreq2 ); #ifdef enablemultithread if( chudanres && *chudanres ) { // fprintf( stderr, "\n\n## CHUUDAN!!! relay\n" ); *chudanres = 1; freearrays( ogcp1, ogcp2, ogcp1o, ogcp2o, fgcp1, fgcp2, fgcp1o, fgcp2o, cpmx1, cpmx2, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2, mgt1, mgt2 ); return( -1.0 ); } #endif #if 1 if( cpmxresult ) { if( icyc + jcyc > 20 ) // if( 0 ) // if( 1 ) { #if 1 // marume gosa wo teigen suru tame double totaleff1 = 0.0; double totaleff2 = 0.0; for( i=0; i0.001 || fabs(totaleff2-1.0)>0.001 ) { reporterr( "Warning: rounding error may be large. totaleff1 = %50.40f\n", totaleff1 ); reporterr( "Warning: rounding error may be large. totaleff2 = %50.40f\n", totaleff2 ); exit( 1 ); } totaleff1 = totaleff1 * orieff1 / (orieff1 + orieff2); totaleff2 = totaleff2 * orieff2 / (orieff1 + orieff2); #else double totaleff1 = orieff1 / ( orieff1 + orieff2 ); double totaleff2 = orieff2 / ( orieff1 + orieff2 ); #endif *cpmxresult = AllocateDoubleMtx( nalphabets+3, strlen( mgt1 )+1 ); // gapcount, opg, fng no bun de +3 createcpmxresult( *cpmxresult, totaleff1, totaleff2, cpmx1pt, cpmx2pt, mgt1, mgt2 ); #if ATO creategapfreqresult( (*cpmxresult)[nalphabets], totaleff1, totaleff2, gapfreq1pt, gapfreq2pt, mgt1, mgt2 ); createogresult( (*cpmxresult)[nalphabets+1], totaleff1, totaleff2, ogcp1o, ogcp2o, gapfreq1pt, gapfreq2pt, mgt1, mgt2 ); createfgresult( (*cpmxresult)[nalphabets+2], totaleff1, totaleff2, fgcp1o, fgcp2o, gapfreq1pt, gapfreq2pt, mgt1, mgt2 ); #endif #if 0 reporterr( "\n" ); for( j=0; j 0 ) headgapfreq1 = gapfreq1f[-1]; else headgapfreq1 = headgapfreq1_g; if( jst > 0 ) headgapfreq2 = gapfreq2f[-1]; else headgapfreq2 = headgapfreq2_g; #if STOREWM char ttt1[10000], ttt2[10000]; #endif lgth1 = ien-ist+1; lgth2 = jen-jst+1; #if STOREWM strncpy( ttt1, seq1[0]+ist, lgth1 ); ttt1[lgth1] = 0; strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt2[lgth2] = 0; fprintf( stderr, "in _tanni ist,ien = %d,%d, lgth1=%d\n", ist, ien, lgth1 ); fprintf( stderr, "in _tanni jst,jen = %d,%d, lgth2=%d\n", jst, jen, lgth2 ); fprintf( stderr, "ttt1 = %s\n", ttt1 ); fprintf( stderr, "ttt2 = %s\n", ttt2 ); #endif #if 0 fprintf( stderr, "in _tanni ist,ien = %d,%d, fulllen1=%d\n", ist, ien, fulllen1 ); fprintf( stderr, "in _tanni jst,jen = %d,%d, fulllen2=%d\n", jst, jen, fulllen2 ); fprintf( stderr, "in _tanni seq1[0] = %-*.*s\n", ien-ist+1, ien-ist+1, seq1[0]+ist ); fprintf( stderr, "in _tanni seq2[0] = %-*.*s\n", jen-jst+1, jen-jst+1, seq2[0]+jst ); #endif ll1 = ( (int)(lgth1) ) + 100; ll2 = ( (int)(lgth2) ) + 100; // aseq1 = AllocateCharMtx( icyc, 0 ); // aseq2 = AllocateCharMtx( jcyc, 0 ); // aseq1bk = AllocateCharMtx( icyc, lgth1+lgth2+100 ); // aseq2bk = AllocateCharMtx( jcyc, lgth1+lgth2+100 ); // for( i=0; i", wm ); #endif g = mi + fgcp2[j-1] * gapfreq1f[i]; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = -( j - mpi ); } g = *prept + ogcp2[j] * gapfreq1f[i-1]; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1] * gapfreq2f[j]; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i] * gapfreq2f[j-1]; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; ijppt++; mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, ist, ien, jst, jen, fulllen1, fulllen2, tailgp, NULL, NULL ); #if 0 fprintf( stderr, "res in _tanni mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "res in _tanni mseq2[0] = %s\n", mseq2[0] ); #endif // for( i=0; i 0 ) headgapfreq1 = gapfreq1f[-1]; else headgapfreq1 = headgapfreq1_g; if( jst > 0 ) headgapfreq2 = gapfreq2f[-1]; else headgapfreq2 = headgapfreq2_g; depth++; reccycle++; lgth1 = ien-ist+1; lgth2 = jen-jst+1; // if( lgth1 < 5 ) // fprintf( stderr, "\nWARNING: lgth1 = %d\n", lgth1 ); // if( lgth2 < 5 ) // fprintf( stderr, "\nWARNING: lgth2 = %d\n", lgth2 ); // #if STOREWM fprintf( stderr, "==== MSalign (depth=%d, reccycle=%d), ist=%d, ien=%d, jst=%d, jen=%d\n", depth, reccycle, ist, ien, jst, jen ); strncpy( ttt1, seq1[0]+ist, lgth1 ); strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt1[lgth1] = 0; ttt2[lgth2] = 0; fprintf( stderr, "seq1 = %s\n", ttt1 ); fprintf( stderr, "seq2 = %s\n", ttt2 ); #endif if( lgth2 <= 0 ) // lgth1 <= 0 ha? { // fprintf( stderr, "\n\n==== jimei\n\n" ); // exit( 1 ); for( i=0; i", wm ); #endif g = mi + fgcp2[j-1] * gapfreq1f[i]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j] * gapfreq1f[i-1]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1] * gapfreq2f[j]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i] * gapfreq2f[j-1]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } #else for( j=lgth2-1; j>-1; --j ) { m[j] = currentw[j+1] + fgcp1[lgth1-2] * gapfreq2f[j+1]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } #endif // for( j=0; j=imid; i-- ) firstm = -9999999.9; // firstmp = lgth1-1; firstmp = lgth1; for( i=lgth1-2; i>-1; i-- ) { #ifdef enablemultithread // fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref ); if( chudanpt && *chudanpt != chudanref ) { // fprintf( stderr, "\n\n## CHUUDAN!!! kouhan\n" ); *chudanres = 1; freearrays_rec1_variousdist ( w1, w2, initverticalw, lastverticalw, midw, midm, midn, jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj, m, mp, doublework, intwork #if STOREWM , WMMTX, WMMTX2 #endif ); freearrays_rec2( gaps, aseq1, aseq2 ); return( -1.0 ); } #endif wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; #if 0 match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 ); #else fillzero( currentw, lgth2 ); for( c=0; c-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1] * gapfreq1f[i]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j] * gapfreq1f[i+1]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1] * gapfreq2f[j]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i] * gapfreq2f[j+1]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j+1] += *mjpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if STOREWM fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j 100 ) // naze 100 if( imid < firstmp-1 ) // naze 100 { jumpi = firstmp; imid = firstmp+1; } #if 0 else { jumpi = 0; imid = 1; } #endif #endif } #if 0 else if( jmid == lgth2 ) { fprintf( stderr, "CHUI1!\n" ); jumpi=0; jumpj=0; imid=jumpforwi[0]; jmid=lgth2-1; } #else // 060414 else if( jmid >= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { // fprintf( stderr, "#### CHUI3!\n" ); imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; if( imid == jumpi ) jumpi = imid-1; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif // fprintf( stderr, "#### FINAL i=%d, jumpi N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif freearrays_rec1_variousdist ( w1, w2, initverticalw, lastverticalw, midw, midm, midn, jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj, m, mp, doublework, intwork #if STOREWM , WMMTX, WMMTX2 #endif ); // fprintf( stderr, "==== calling myself (first)\n" ); value = MSalignmm_rec_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, ist, ist+jumpi, jst, jst+jumpj, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada #if 0 fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] ); fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] ); #endif #if MEMSAVE #else for( i=0; i 0 ) { // for( i=0; i 0 ) { // for( i=0; i 1 || maxwm - value > 1 ) { fprintf( stderr, "WARNING value = %f, but maxwm = %f\n", value, maxwm ); for( i=0; i1-%d\n%s\n", i, mseq1[i] ); fprintf( stderr, "%s\n", aseq1[i] ); } for( i=0; i2-%d\n%s\n", i, mseq2[i] ); fprintf( stderr, "%s\n", aseq2[i] ); } // exit( 1 ); } else { fprintf( stderr, "value = %.0f, maxwm = %.0f -> ok\n", value, maxwm ); } #endif #if MEMSAVE #else for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalignmm_rec_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp, headgapfreq1, headgapfreq2 ); #ifdef enablemultithread if( chudanres && *chudanres ) { // fprintf( stderr, "\n\n## CHUUDAN!!! relay\n" ); *chudanres = 1; freearrays_variousdist( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1s, cpmx2s, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 ); return( -1.0 ); } #endif #if 0 fprintf( stderr, "\n" ); fprintf( stderr, " seq1[0] = %s\n", seq1[0] ); fprintf( stderr, " seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] ); fprintf( stderr, "\n" ); #endif // fprintf( stderr, "wm = %f\n", wm ); for( i=0; iwm > s2->wm ) return( -1 ); else if ( s1->wm < s2->wm ) return( 1 ); else return( 0 ); } static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 ) { int j; for( j=0; j lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; // by D.Mathog, double g; double *currentw, *previousw; #if 1 double *wtmp; int *ijpipt; int *ijpjpt; double *mjpt, *Mjpt, *prept, *curpt; int *mpjpt, *Mpjpt; #endif static double mi, *m; static double Mi, *largeM; static int **ijpi; static int **ijpj; static int mpi, *mp; static int Mpi, *Mp; static double *w1, *w2; // static double *match; static double *initverticalw; /* kufuu sureba iranai */ static double *lastverticalw; /* kufuu sureba iranai */ static char **mseq1; static char **mseq2; static double **cpmx1; static double **cpmx2; static int **intwork; static double **doublework; static int orlgth1 = 0, orlgth2 = 0; double maxwm; double tbk; int tbki, tbkj; int endali, endalj; // double localthr = 0.0; // double localthr2 = 0.0; double fpenalty = (double)penalty; double fpenalty_OP = (double)penalty_OP; double fpenalty_ex = (double)penalty_ex; // double fpenalty_EX = (double)penalty_EX; double foffset = (double)offset; double localthr = -foffset; double localthr2 = -foffset; static Shuryoten *shuryo = NULL; int numshuryo; double minshuryowm = 0.0; // by D.Mathog int minshuryopos = 0; // by D.Mathog double resf; // fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty ); fprintf( stderr, "in suboptalign11\n" ); if( !shuryo ) { shuryo = (Shuryoten *)calloc( 100, sizeof( Shuryoten ) ); } for( i=0; i<100; i++ ) { shuryo[i].i = -1; shuryo[i].j = -1; shuryo[i].wm = 0.0; } numshuryo = 0; if( orlgth1 == 0 ) { } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); fprintf( stderr, "in suboptalign11 step 1\n" ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; fprintf( stderr, "in suboptalign11 step 1.3\n" ); if( orlgth1 > 0 && orlgth2 > 0 ) { fprintf( stderr, "in suboptalign11 step 1.4\n" ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); // FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); fprintf( stderr, "in suboptalign11 step 1.5\n" ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( largeM ); FreeIntVec( Mp ); fprintf( stderr, "in suboptalign11 step 1.6\n" ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); fprintf( stderr, "in suboptalign11 step 1.7\n" ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); // match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); largeM = AllocateFloatVec( ll2+2 ); Mp = AllocateIntVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); mseq1 = AllocateCharMtx( njob, ll1+ll2 ); mseq2 = AllocateCharMtx( njob, ll1+ll2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } fprintf( stderr, "in suboptalign11 step 1.6\n" ); fprintf( stderr, "in suboptalign11 step 2\n" ); if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeIntMtx( commonJP ); FreeIntMtx( used ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif used = AllocateIntMtx( ll1+10, ll2+10 ); commonIP = AllocateIntMtx( ll1+10, ll2+10 ); commonJP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijpi = commonIP; ijpj = commonJP; #if 0 for( i=0; i", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijpipt = i - 1; *ijpjpt = mpi; } g = *prept; if( g > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "m%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijpipt = *mpjpt; // *ijpjpt = j - 1; } g = *prept; if( g > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif g = tbk + fpenalty_OP; // g = tbk; if( g > wm ) { wm = g; *ijpipt = tbki; *ijpjpt = tbkj; // fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); } g = Mi; if( g > tbk ) { tbk = g; tbki = i-1; tbkj = Mpi; } g = *Mjpt; if( g > tbk ) { tbk = g; tbki = *Mpjpt; tbkj = j-1; } // tbk += fpenalty_EX;// + foffset; g = *prept; if( g > *Mjpt ) { *Mjpt = g; *Mpjpt = i-1; } // *Mjpt += fpenalty_EX;// + foffset; g = *prept; if( g > Mi ) { Mi = g; Mpi = j-1; } // Mi += fpenalty_EX;// + foffset; // fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt ); // fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] ); if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( numshuryo < 100 ) { shuryo[numshuryo].i = i; shuryo[numshuryo].j = j; shuryo[numshuryo].wm = wm; if( minshuryowm > wm ) { minshuryowm = wm; minshuryopos = numshuryo; } numshuryo++; } else { if( wm > minshuryowm ) { shuryo[minshuryopos].i = i; shuryo[minshuryopos].j = j; shuryo[minshuryopos].wm = wm; minshuryowm = wm; for( k=0; k<100; k++ ) // muda { if( shuryo[k].wm < minshuryowm ) { minshuryowm = shuryo[k].wm; minshuryopos = k; break; } } } } #endif #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); *ijpipt = localstop; // *ijpjpt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt += wm; ijpipt++; ijpjpt++; mjpt++; Mjpt++; prept++; mpjpt++; Mpjpt++; curpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } for( k=0; k<100; k++ ) { fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); } #if 1 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif qsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo ); for( k=0; k<100; k++ ) { fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); } lasti = lgth1+1; for( i=0; i\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif } for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stderr, "%2d ", used[i][j] ); } fprintf( stderr, "\n" ); } // fprintf( stderr, "### impmatch = %f\n", *impmatch ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } return( wm ); } mafft-7.505-without-extensions/core/nj.c0000644000175000017500000001155114224501721017557 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 void topolcpy( int s1[], int s2[], int *mpt1, int *mpt2 ) { int i; *mpt1 = *mpt2; for( i=0; i<*mpt2; i++ ) { s1[i] = s2[i]; } } void topolcat( int s1[], int s2[], int *mpt1, int *mpt2 ) { int i; for( i=*mpt1; i<*mpt1+*mpt2; i++ ) { s1[i] = s2[i-*mpt1]; } *mpt1 += *mpt2; } void topolsort( int m, int s[] ) { int i, j, im; int sm; for( j=0; j2; n--, m=nseq-n ) { t = 0.0; for( i=0; i 0 ) { topol[m][0][count] = l; count++; } mem[m][0] = count; for( l=0, count=0; l 0 ) { topol[m][1][count] = l; count++; } mem[m][1] = count; for( l=0; l 0 ); if( n > 3 ) reduc( mtx, nseq, im, jm ); } for( i=0; i 0 ) { topol[m][0][count] = l; count++; } mem[m][0] = count; /* printf( " total length == %f\n", totallen ); */ topolcpy( topol[nseq-2][1], topol[nseq-3][0], mem[nseq-2]+1, mem[nseq-3] ); topolcat( topol[nseq-2][1], topol[nseq-3][1], mem[nseq-2]+1, mem[nseq-3]+1 ); topolsort( mem[nseq-2][1], topol[nseq-2][1] ); if( topol[nseq-2][0][0] > topol[nseq-2][1][0] ) topolswap( topol[nseq-2][0], topol[nseq-2][1], mem[nseq-2], mem[nseq-2]+1 ); } mafft-7.505-without-extensions/core/mafftash_premafft.tmpl0000644000175000017500000002435314224501721023363 0ustar nileshnilesh#!/usr/bin/perl ##################################################################### # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) # # Ver. Date Changelog ##################################################################### # 1.0 07.26.13 Initial release # 2.0 09.03.13 Added extensive warnings and error messages # 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs # 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output # 3.2 12.08.14 Removed 5-char restriction for own structure files # ##################################################################### use strict; use Getopt::Long; use File::Path qw(make_path remove_tree); use LWP::Simple; use LWP::UserAgent; # to prevent error 'Header line too long (limit is 8192)' [v3.1] use LWP::Protocol::http; push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft"; my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE ); GetOptions ( 'd=s' => \$WORKDIR, 'p=s' => \$PDBLIST, 'o=s' => \$OWNLIST, 'h=s' => \$HAT3FILE, 'i=s' => \$INSTRFILE, ); print STDERR "[MAFFTash-premafft]\n"; # set temp directory my $TMP = "/tmp/mapremafft$$"; make_path($TMP) unless -d $TMP; ###### # validation &help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST); &help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR; $HAT3FILE = "hat3" unless defined $HAT3FILE; $INSTRFILE = "instr" unless defined $INSTRFILE; chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g; ###### # prepare inputs print STDERR "Preparing inputs for service request...\n"; my @files = (); push(@files, "strweight" => "0.5"); push(@files, "premafft" => "1"); # pdb entries if ( defined $PDBLIST ) { print STDERR "PDB List defined!\n"; &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST; my $listfile = "$TMP/pdblist.inp"; open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!"); open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!"); while() { chomp; if ( /^(\w{5})$/ ) { print OUTF ">PDBID\n$1\n"; } } close OUTF; close INPF; push(@files, "inputfile" => ["$listfile"]); } # upload own structures my %ownids = (); if ( defined $OWNLIST ) { print STDERR "OWN List defined!\n"; &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST; open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!"); while() { chomp; if ( /^(\S+)$/ ) { my $fileref = "$WORKDIR/$1.pdb"; unless (-e $fileref) { close OWNINPF; &bail("Error: File $fileref does not exists!"); } push(@files, "inputownfile[]" => ["$fileref"]); $ownids{$1} = 1; } } close OWNINPF; } ###### # start rest service print STDERR "Sending service request...\n"; my $browser = LWP::UserAgent->new; $browser->timeout(0); # post: running a mafftash job my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' ); &bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success); # get response from post request my ($status, $mafftashid) = &parseResponse($postResponse->content); my $MAXTRIES = 3; my $STIMER = 4; my $longtimer = 0; print STDERR "Request sent! Waiting for response...[$mafftashid]\n"; # wait for results until it becomes available while(1) { $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER; sleep $longtimer; # get: get results for mafftash job my $getResponse = $browser->get("$BASEURL/$mafftashid"); if ( $getResponse->is_success ) { # get response from get request ($status, $mafftashid) = &parseResponse($getResponse->content); next unless ( $status eq "done" ); # if job is finished and ready print STDERR "Results found!\n"; my $csfile = "$TMP/checksum.tar.gz"; my $try1 = 1; while(1) { print STDERR "Fetching Results... [Trial $try1]\n"; if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile ) { # get response from get request my $checklist = &extractchecksum($csfile); &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); foreach my $id ( keys %$checklist ) { my $checkfile = "$TMP/$id"; my $checkid = $checklist->{$id}; my $try2 = 1; while(1) { unlink $checkfile if -e $checkfile; if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) { my $hashid = &getchecksum($checkfile); #print STDERR "[hashid]$hashid [checkid]$checkid\n"; if ($hashid ne "" && $hashid ne $checkid ) { unlink $checkfile if -e $checkfile; &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; $try2++; sleep $STIMER; } else { last; } } else { &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; $try2++; sleep $STIMER; } } } last; } else { &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; $try1++; sleep $STIMER; } } last; } else { &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content))); } } # make sure outputs were generated # decompress print STDERR "Assembling final results...\n"; &backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/"); &backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr"; &backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3"; # sometimes no hat3 file is generated [v3.1] #&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE; &bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE; # warn if some ownids were ommitted if ( scalar keys(%ownids) > 0 ) { my %instrids = (); open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!"); while() { chomp; if ( /^>\d+_(\S+)$/ ) { $instrids{$1} = 1; } } close INSTRF; foreach my $id ( keys %ownids ) { warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id}; } } &cleanup(); #################### #################### sub parseResponse { my $response = shift; #"status":"wait","mafftashid":"Ma8211432R" my $status = ""; my $mafftashid = ""; if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) { $mafftashid = $1; $status = $2; } return ($status, $mafftashid); } sub extractchecksum { my $infile = shift; my %dataset = (); open CSUM, "tar -zxf $infile -O|" or return \%dataset; while() { chomp; if ( /^(\S+)\s+(\S+)$/ ) { $dataset{$2} = $1; } } close CSUM; return \%dataset; } sub parseError { my $response = shift; #"error":"Invalid number of inputs found." my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : ""; return $errorstr; } sub getchecksum { my $infile = shift; # md5 binary check my $MD5BIN = ""; if ( -x "/usr/bin/md5sum" ) { $MD5BIN = "/usr/bin/md5sum"; } elsif ( -x "/sbin/md5" ) { $MD5BIN = "/sbin/md5 -q"; } return "" if $MD5BIN eq ""; my $checksum = ""; open MD5EXE, "$MD5BIN $infile|" or return ""; while() { if (/^(\S+)\s+(\S+)$/) { $checksum = $1; last; } elsif (/^(\S+)$/) { $checksum = $1; last; } } close MD5EXE; return $checksum; } sub backticks { my $command = shift; `$command`; return ($? == -1) ? 0 : 1; } sub bail { my $str = shift; print STDERR "$str\n" if defined $str; &cleanup(); exit(1); } sub cleanup { return if ($TMP eq "" || !-d $TMP); opendir(MAINDIR, $TMP); my @files = readdir(MAINDIR); closedir(MAINDIR); foreach my $file (@files) { unlink "$TMP/$file" if -e "$TMP/$file"; } remove_tree($TMP); } sub help { my $str = shift; print <<'HELPME'; USAGE ./mafftash_premafft.pl -p [FILE] ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY] ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY] PARAMETERS -p [FILE] FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format -o [FILE] -d [DIRECTORY] FILE contains a list of IDs from your own structure/pdb files (one entry per line) for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY -h [HATFILE] save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory -i [INSTRFILE] save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory HELPME &bail($str); } mafft-7.505-without-extensions/core/newick2mafft.rb0000644000175000017500000000506214224501721021711 0ustar nileshnilesh#! /usr/bin/env ruby #version 2, 2009/Jan/24 #version 3, 2015/Dec/8 if ARGV.length == 1 scale = 1.0 elsif ARGV.length == 2 scale = ARGV.shift.to_f else STDERR.puts "USAGE: newick2mafft.rb scale input_tree > output" exit end if scale <= 0.0 then STDERR.puts "Inappropriate scale, #{scale.to_s}" exit end STDERR.puts "scale = " + scale.to_s infp = File.open( ARGV.shift, "r" ) tree = "" while line = infp.gets tree += line.strip break if tree =~ /;$/ end infp.close #tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "").gsub(/:\-[0-9\.]+/, ":0.0" ) #tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "") tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9\.]*[eE]-[0-9]*/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "") STDERR.puts "Initial tree = " + tree def resolve( tree ) while 1 # p tree tree.sub!( /\,([0-9]+):(\-?[0-9\.]+)\,([0-9]+):(\-?[0-9\.]+)/, ",XXX" ) hit1 = $1 hit2 = $2 hit3 = $3 hit4 = $4 # p hit1 # p hit2 # p hit3 # p hit4 # puts "introduce XXX" # p tree break unless tree.index(/XXX/) poshit = tree.index(/XXX/) # puts "poshit=" + poshit.to_s i = poshit height = 0 while i >= 0 break if height == 0 && tree[i..i] == '(' if tree[i..i] == ')' then height += 1 elsif tree[i..i] == '(' then height -= 1 end i -= 1 end poskakko = i # puts "poskakko = " + poskakko.to_s zenhan = tree[0..poskakko] zenhan = "" if poskakko == -1 # puts "zenhan = " + zenhan treelen = tree.length tree = zenhan + "(" + tree[poskakko+1..treelen] # puts "add (" # p tree tree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" ) # p tree end return tree end memi = [-1,-1] leni = [-1,-1] while tree.index( /\(/ ) tree = resolve( tree ) tree.sub!( /\(([0-9]+):(\-?[0-9\.]+),([0-9]+):(\-?[0-9\.]+)\)/, "XXX" ) memi[0] = $1.to_i leni[0] = $2.to_f * scale memi[1] = $3.to_i leni[1] = $4.to_f * scale if leni[0] > 10 || leni[1] > 10 then STDERR.puts "" STDERR.puts "Please check the scale of branch length!" STDERR.puts "The unit of branch lengths must be 'substitution/site'" STDERR.puts "If the unit is 'substition' in your tree, please" STDERR.puts "use the scale argument," STDERR.puts "% newick2mafft scale in > out" STDERR.puts "where scale = 1/(alignment length)" STDERR.puts "" exit 1 end # STDERR.puts "subtree = " + $& if memi[1] < memi[0] then memi.reverse! leni.reverse! end tree.sub!( /XXX/, memi[0].to_s ) # STDERR.puts "Tree = " + tree printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] ) end mafft-7.505-without-extensions/core/Galign11.c0000644000175000017500000012270214224501721020514 0ustar nileshnilesh#include "mltaln.h" #include "dp.h" #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 1 #define TERMGAPFAC 0.0 #define TERMGAPFAC_EX 0.0 #if 1 #if 0 static void match_calc_mtx_codon( double **codonmtx, double **mtx, double *match, char **s1, char **s2, int **codonseq1, int **codonseq2, int i1, int lgth2 ) // gstart, gend wo tsukatte coding ka kakuninn suru beki { char *seq2 = s2[0]; double *doubleptr = mtx[(unsigned char)s1[0][i1]]; int codonid1, codonid2; // int codonid1p, codonid2p; // int codonid1pp, codonid2pp; double codonmatch; // int *pt0, *pt1, *pt2; int *pt0; codonid1 = codonseq1[0][i1]; // codonid1p = codonseq1[1][i1]; // codonid1pp = codonseq1[2][i1]; pt0 = codonseq2[0]; // pt1 = codonseq2[1]; // pt2 = codonseq2[2]; // posin2 = 0; while( lgth2-- ) { // reporterr( "%c-%c -> %f\n", s1[0][i1], *seq2, doubleptr[(unsigned char)*seq2] ); // track ha at de kokoromiru codonid2 = *pt0++; // codonid2p = *pt1++; // codonid2pp = *pt2++; // posin2++; // // reporterr( "%c%c%c(%d)-%c%c%c(%d)\n", s1[0][i1], s1[0][i1+1], s1[0][i1+2], codonid1, *seq2, *(seq2+1), *(seq2+2), codonid2 ); codonmatch = 0.0; if( codonid1 > -1 && codonid2 > -1 ) codonmatch = codonmtx[codonid1][codonid2] * 600.0; // if( codonid1p > -1 && codonid2p > -1 ) codonmatch -= codonmtx[codonid1p][codonid2p] * 600; // if( codonid1pp > -1 && codonid2pp > -1 ) codonmatch -= codonmtx[codonid1pp][codonid2pp] * 600; *match++ = doubleptr[(unsigned char)*seq2++] + codonmatch; } } #else static void match_calc_mtx_codon( double **codonmtx, double **mtx, double *match, char **s1, char **s2, int **codonseq1, int **codonseq2, int i1, int lgth2 ) // gstart, gend wo tsukatte coding ka kakuninn suru beki { char *seq2 = s2[0]; double *doubleptr = mtx[(unsigned char)s1[0][i1]]; int codonid1, codonid2; double codonmatch; int *pt0; codonid1 = codonseq1[0][i1]; pt0 = codonseq2[0]; while( lgth2-- ) { // reporterr( "%c-%c -> %f\n", s1[0][i1], *seq2, doubleptr[(unsigned char)*seq2] ); codonid2 = *pt0++; // reporterr( "%c%c%c(%d)-%c%c%c(%d)\n", s1[0][i1], s1[0][i1+1], s1[0][i1+2], codonid1, *seq2, *(seq2+1), *(seq2+2), codonid2 ); codonmatch = 0.0; if( codonid1 > -1 && codonid2 > -1 ) codonmatch = codonmtx[codonid1][codonid2] * 600.0; // codonid1>-1 ha iranai. *match++ = doubleptr[(unsigned char)*seq2++] + codonmatch; } } #endif static void match_calc_mtx( double **mtx, double *match, char **s1, char **s2, int i1, int lgth2 ) { char *seq2 = s2[0]; double *doubleptr = mtx[(unsigned char)s1[0][i1]]; while( lgth2-- ) *match++ = doubleptr[(unsigned char)*seq2++]; } #else static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 ) { int j; for( j=0; j= warpbase ) if( tailgp == 1 ) ; else { #if 1 // reporterr( "lastverticalw[lgth1-1] = %f\n", lastverticalw[lgth1-1] ); // reporterr( "lasthorizontalw[lgth2-1] = %f\n", lasthorizontalw[lgth2-1] ); wm = lasthorizontalw[lgth2-1] - 1.0; // lasthorizontalw[lgth2-1] yori kanarazu chiisai. for( j=lgth2-2; j>=0; j-- ) { if( (g=lasthorizontalw[j]+ ( fpenalty * TERMGAPFAC + fpenalty_ex * (lgth2-1-j) * TERMGAPFAC_EX ) ) > wm ) { wm = g; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } for( i=lgth1-2; i>=0; i-- ) { if( ( g=lastverticalw[i]+ ( fpenalty * TERMGAPFAC + fpenalty_ex * (lgth1-1-i) * TERMGAPFAC_EX ) ) > wm ) { wm = g; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } if( lasthorizontalw[lgth2-1] > wm ) // score ga onaji baai erabarenai { wm = lasthorizontalw[lgth2-1]; iin = lgth1-1; jin = lgth2-1; ijp[lgth1][lgth2] = 0; } #else wm = lastverticalw[0]; for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } #endif } mseq1[0] += lgth1+lgth2; *mseq1[0] = 0; mseq2[0] += lgth1+lgth2; *mseq2[0] = 0; iin = lgth1; jin = lgth2; limk = lgth1+lgth2 + 1; for( k=0; k= warpbase ) { // fprintf( stderr, "WARP!\n" ); ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { *--mseq1[0] = seq1[0][l]; *--mseq2[0] = *gap; k++; } l= jin; while( --l >= 0 ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][l]; k++; } break; } else { l = iin - ifi; while( --l > 0 ) { *--mseq1[0] = seq1[0][ifi+l]; *--mseq2[0] = *gap; k++; } l= jin - jfi; while( --l > 0 ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][jfi+l]; k++; } } if( iin <= 0 || jin <= 0 ) break; *--mseq1[0] = seq1[0][ifi]; *--mseq2[0] = seq2[0][jfi]; k++; iin = ifi; jin = jfi; } // fprintf( stderr, "%s\n", mseq1[0] ); // fprintf( stderr, "%s\n", mseq2[0] ); return( wm ); } double G__align11psg( double **codonmtx, double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp, double *gstart, double *gend ) { // int k; register int i, j; int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lastj; int lgth1, lgth2; int resultlen; double wm, wmo; /* int ?????? */ double g; double *currentw, *previousw; double fpenalty = (double)penalty; double fpenalty_shift = (double)penalty_shift; double fpenalty_tmp; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; double fpenalty_ex_i; #endif #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi = 0.0; static TLS double *m = NULL; static TLS int **ijp = NULL; static TLS int mpi = 0; static TLS int *mp = NULL; static TLS double *w1 = NULL; static TLS double *w2 = NULL; static TLS double *match = NULL; static TLS double *initverticalw = NULL; /* kufuu sureba iranai */ static TLS double *lastverticalw = NULL; /* kufuu sureba iranai */ static TLS char **mseq1 = NULL; static TLS char **mseq2 = NULL; static TLS char **mseq = NULL; static TLS int **intwork = NULL; static TLS double **doublework = NULL; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double **amino_dynamicmtx = NULL; // ?? static TLS int **codonseq1 = NULL; static TLS int **codonseq2 = NULL; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; if( mseq1 ) free( mseq1 ); mseq1 = NULL; if( mseq2 ) free( mseq2 ); mseq2 = NULL; if( w1 ) FreeFloatVec( w1 ); w1 = NULL; if( w2 ) FreeFloatVec( w2 ); w2 = NULL; if( match ) FreeFloatVec( match ); match = NULL; if( initverticalw ) FreeFloatVec( initverticalw ); initverticalw = NULL; if( lastverticalw ) FreeFloatVec( lastverticalw ); lastverticalw = NULL; if( m ) FreeFloatVec( m ); m = NULL; if( mp ) FreeIntVec( mp ); mp = NULL; if( mseq ) FreeCharMtx( mseq ); mseq = NULL; if( doublework ) FreeFloatMtx( doublework ); doublework = NULL; if( intwork ) FreeIntMtx( intwork ); intwork = NULL; if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; if( codonseq1 ) FreeIntMtx( codonseq1 ); codonseq1 = NULL; if( codonseq2 ) FreeIntMtx( codonseq2 ); codonseq2 = NULL; } orlgth1 = 0; orlgth2 = 0; return( 0.0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); warpbase = lgth1 + lgth2; warpis = NULL; warpjs = NULL; warpn = 0; if( trywarp ) { // fprintf( stderr, "IN G__align11\n" ); if( headgp == 0 || tailgp == 0 ) { fprintf( stderr, "At present, headgp and tailgp must be 1.\n" ); exit( 1 ); } wmrecords = AllocateFloatVec( lgth2+1 ); warpi = AllocateIntVec( lgth2+1 ); warpj = AllocateIntVec( lgth2+1 ); prevwmrecords = AllocateFloatVec( lgth2+1 ); prevwarpi = AllocateIntVec( lgth2+1 ); prevwarpj = AllocateIntVec( lgth2+1 ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); FreeDoubleMtx( amino_dynamicmtx ); if( codonseq1 ) FreeIntMtx( codonseq1 ); if( codonseq2 ) FreeIntMtx( codonseq2 ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( 2, ll1+ll2 ); // 2020/Apr doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 ); if( codonscore ) { codonseq1 = AllocateIntMtx( 3, ll1 ); // Only codonseq1[0] is used at this point codonseq2 = AllocateIntMtx( 3, ll2 ); // Only codonseq2[0] is used at this point } #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; if( codonscore ) { // codonseq[1..2] are not used yet. for( i=0; i<3&&i 0.5 ) // only at 3rd position match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, 0, lgth2 ); else match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 ); #else match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, 0, lgth2, gstart[0] == 0.5 && gend[0] > 0.5 ); #endif } else { match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 ); match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 ); } if( headgp == 1 ) { for( i=1; i 0.5 ) ) // only at 3rd position in seq1 match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, i, lgth2 ); else match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 ); #else if( codonscore ) // only at 3rd position in seq1 match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, i, lgth2, gstart[i] == 0.5 && gend[i] > 0.5 ); else match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 ); #endif #if XXXXXXX fprintf( stderr, "\n" ); fprintf( stderr, "i=%d\n", i ); fprintf( stderr, "currentw = \n" ); for( j=0; j", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty*gend[i]) > wm ) { wm = g; *ijppt = -( j - mpi ); // reporterr( "hit! jump from %d to %d: %c->%c\n", j, mpi, seq2[0][j], seq2[0][mpi] ); } if( (g=*prept+fpenalty*gstart[i-1]) >= mi ) // if( (g=*prept) > mi ) { mi = g; mpi = j-1; // reporterr( "hit! jump to %d: ->%c:%c\n", mpi, seq1[0][i-1], seq2[0][mpi] ); } #if USE_PENALTY_EX mi += fpenalty_ex_i; // mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt + fpenalty*gend[i]) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } if( (g=*prept+fpenalty*gstart[i-1]) >= *mjpt ) // if( (g=*prept) > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX if( j < lgth2 ) // 2018/May/11 m[j] += fpenalty_ex; #endif #if 1 if( trywarp ) { fpenalty_tmp = fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ); // fprintf( stderr, "fpenalty_shift = %f\n", fpenalty_tmp ); // fprintf( stderr, "\n\n\nwarp to %c-%c (%d-%d) from %c-%c (%d-%d) ? prevwmrecords[%d] = %f + %f <- wm = %f\n", seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], prevwarpi[j-1], prevwarpj[j-1], seq1[0][i], seq2[0][j], i, j, j, prevwmrecords[j-1], fpenalty_tmp, wm ); // if( (g=prevwmrecords[j-1] + fpenalty_shift )> wm ) if( ( g=*prevwmrecordspt++ + fpenalty_tmp )> wm ) // naka ha osokute kamawanai { // fprintf( stderr, "Yes! Warp!! from %d-%d (%c-%c) to %d-%d (%c-%c) fpenalty_tmp = %f! warpn = %d\n", i, j, seq1[0][i], seq2[0][j-1], prevwarpi[j-1], prevwarpj[j-1],seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], fpenalty_tmp, warpn ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } else { } curm = *curpt + wm; // fprintf( stderr, "###### curm = %f at %c-%c, i=%d, j=%d\n", curm, seq1[0][i], seq2[0][j], i, j ); // fprintf( stderr, "copy from i, j-1? %f > %f?\n", wmrecords[j-1], curm ); // if( wmrecords[j-1] > wmrecords[j] ) if( *wmrecords1pt > *wmrecordspt ) { // fprintf( stderr, "yes\n" ); // wmrecords[j] = wmrecords[j-1]; *wmrecordspt = *wmrecords1pt; // warpi[j] = warpi[j-1]; // warpj[j] = warpj[j-1]; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); // fprintf( stderr, "warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] ); } // else // { // fprintf( stderr, "no\n" ); // } // fprintf( stderr, " curm = %f at %c-%c\n", curm, seq1[0][i], seq2[0][j] ); // fprintf( stderr, " wmrecords[%d] = %f\n", j, wmrecords[j] ); // fprintf( stderr, "replace?\n" ); // if( curm > wmrecords[j] ) if( curm > *wmrecordspt ) { // fprintf( stderr, "yes at %d-%d (%c-%c), replaced warp: warpi[j]=%d, warpj[j]=%d warpn=%d, wmrecords[j] = %f -> %f\n", i, j, seq1[0][i], seq2[0][j], i, j, warpn, wmrecords[j], curm ); // wmrecords[j] = curm; *wmrecordspt = curm; // warpi[j] = i; // warpj[j] = j; *warpipt = i; *warpjpt = j; } // else // { // fprintf( stderr, "No! warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] ); // } // fprintf( stderr, "%d-%d (%c-%c) curm = %5.0f, wmrecords[j]=%f\n", i, j, seq1[0][i], seq2[0][j], curm, wmrecords[j] ); wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } if( trywarp ) { // fprintf( stderr, "\nwm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } wmo = Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, tailgp, warpis, warpjs, warpbase ); if( !tailgp ) wm = wmo; // reporterr( "wm (after tracking) = %f\n", wm ); if( warpis ) free( warpis ); if( warpjs ) free( warpjs ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "wm = %f\n", wm ); #endif return( wm ); } double G__align11( double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp ) { // int k; register int i, j; int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lastj; int lgth1, lgth2; int resultlen; double wm, wmo; /* int ?????? */ double g; double *currentw, *previousw; double fpenalty = (double)penalty; double fpenalty_shift = (double)penalty_shift; double fpenalty_tmp; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; double fpenalty_ex_i; #endif #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi = 0.0; static TLS double *m = NULL; static TLS int **ijp = NULL; static TLS int mpi = 0; static TLS int *mp = NULL; static TLS double *w1 = NULL; static TLS double *w2 = NULL; static TLS double *match = NULL; static TLS double *initverticalw = NULL; /* kufuu sureba iranai */ static TLS double *lastverticalw = NULL; /* kufuu sureba iranai */ static TLS char **mseq1 = NULL; static TLS char **mseq2 = NULL; static TLS char **mseq = NULL; static TLS int **intwork = NULL; static TLS double **doublework = NULL; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double **amino_dynamicmtx = NULL; // ?? int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; if( mseq1 ) free( mseq1 ); mseq1 = NULL; if( mseq2 ) free( mseq2 ); mseq2 = NULL; if( w1 ) FreeFloatVec( w1 ); w1 = NULL; if( w2 ) FreeFloatVec( w2 ); w2 = NULL; if( match ) FreeFloatVec( match ); match = NULL; if( initverticalw ) FreeFloatVec( initverticalw ); initverticalw = NULL; if( lastverticalw ) FreeFloatVec( lastverticalw ); lastverticalw = NULL; if( m ) FreeFloatVec( m ); m = NULL; if( mp ) FreeIntVec( mp ); mp = NULL; if( mseq ) FreeCharMtx( mseq ); mseq = NULL; if( doublework ) FreeFloatMtx( doublework ); doublework = NULL; if( intwork ) FreeIntMtx( intwork ); intwork = NULL; if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL; } orlgth1 = 0; orlgth2 = 0; return( 0.0 ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); warpbase = lgth1 + lgth2; warpis = NULL; warpjs = NULL; warpn = 0; if( trywarp ) { // fprintf( stderr, "IN G__align11\n" ); if( headgp == 0 || tailgp == 0 ) { fprintf( stderr, "At present, headgp and tailgp must be 1.\n" ); exit( 1 ); } wmrecords = AllocateFloatVec( lgth2+1 ); warpi = AllocateIntVec( lgth2+1 ); warpj = AllocateIntVec( lgth2+1 ); prevwmrecords = AllocateFloatVec( lgth2+1 ); prevwarpi = AllocateIntVec( lgth2+1 ); prevwarpj = AllocateIntVec( lgth2+1 ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); FreeDoubleMtx( amino_dynamicmtx ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( 2, ll1+ll2 ); // 2020/Apr doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; *ijppt = -( j - mpi ); } if( (g=*prept) >= mi ) // if( (g=*prept) > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex_i; // mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt + fpenalty) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } if( (g=*prept) >= *mjpt ) // if( (g=*prept) > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX if( j < lgth2 ) // 2018/May/11 m[j] += fpenalty_ex; #endif #if 1 if( trywarp ) { fpenalty_tmp = fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ); // fprintf( stderr, "fpenalty_shift = %f\n", fpenalty_tmp ); // fprintf( stderr, "\n\n\nwarp to %c-%c (%d-%d) from %c-%c (%d-%d) ? prevwmrecords[%d] = %f + %f <- wm = %f\n", seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], prevwarpi[j-1], prevwarpj[j-1], seq1[0][i], seq2[0][j], i, j, j, prevwmrecords[j-1], fpenalty_tmp, wm ); // if( (g=prevwmrecords[j-1] + fpenalty_shift )> wm ) if( ( g=*prevwmrecordspt++ + fpenalty_tmp )> wm ) // naka ha osokute kamawanai { // fprintf( stderr, "Yes! Warp!! from %d-%d (%c-%c) to %d-%d (%c-%c) fpenalty_tmp = %f! warpn = %d\n", i, j, seq1[0][i], seq2[0][j-1], prevwarpi[j-1], prevwarpj[j-1],seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], fpenalty_tmp, warpn ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } else { } curm = *curpt + wm; // fprintf( stderr, "###### curm = %f at %c-%c, i=%d, j=%d\n", curm, seq1[0][i], seq2[0][j], i, j ); // fprintf( stderr, "copy from i, j-1? %f > %f?\n", wmrecords[j-1], curm ); // if( wmrecords[j-1] > wmrecords[j] ) if( *wmrecords1pt > *wmrecordspt ) { // fprintf( stderr, "yes\n" ); // wmrecords[j] = wmrecords[j-1]; *wmrecordspt = *wmrecords1pt; // warpi[j] = warpi[j-1]; // warpj[j] = warpj[j-1]; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); // fprintf( stderr, "warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] ); } // else // { // fprintf( stderr, "no\n" ); // } // fprintf( stderr, " curm = %f at %c-%c\n", curm, seq1[0][i], seq2[0][j] ); // fprintf( stderr, " wmrecords[%d] = %f\n", j, wmrecords[j] ); // fprintf( stderr, "replace?\n" ); // if( curm > wmrecords[j] ) if( curm > *wmrecordspt ) { // fprintf( stderr, "yes at %d-%d (%c-%c), replaced warp: warpi[j]=%d, warpj[j]=%d warpn=%d, wmrecords[j] = %f -> %f\n", i, j, seq1[0][i], seq2[0][j], i, j, warpn, wmrecords[j], curm ); // wmrecords[j] = curm; *wmrecordspt = curm; // warpi[j] = i; // warpj[j] = j; *warpipt = i; *warpjpt = j; } // else // { // fprintf( stderr, "No! warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] ); // } // fprintf( stderr, "%d-%d (%c-%c) curm = %5.0f, wmrecords[j]=%f\n", i, j, seq1[0][i], seq2[0][j], curm, wmrecords[j] ); wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } if( trywarp ) { // fprintf( stderr, "\nwm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } wmo = Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, tailgp, warpis, warpjs, warpbase ); if( !tailgp ) wm = wmo; // reporterr( "wm (after tracking) = %f\n", wm ); if( warpis ) free( warpis ); if( warpjs ) free( warpjs ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "wm = %f\n", wm ); #endif return( wm ); } double G__align11_noalign( double **n_dynamicmtx, int penal, int penal_ex, char **seq1, char **seq2, int alloclen ) /* warp mitaiou */ { // int k; register int i, j; int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; // int resultlen; double wm; /* int ?????? */ double g; double *currentw, *previousw; double fpenalty = (double)penal; #if USE_PENALTY_EX double fpenalty_ex = (double)penal_ex; double fpenalty_ex_i; #endif #if 1 double *wtmp; double *mjpt, *prept, *curpt; // int *mpjpt; #endif static TLS double mi, *m; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS int **intwork; static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double **amino_dynamicmtx; if( seq1 == NULL ) { if( orlgth1 > 0 && orlgth2 > 0 ) { orlgth1 = 0; orlgth2 = 0; FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); free( m ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); FreeDoubleMtx( amino_dynamicmtx ); } return( 0.0 ); } wm = 0.0; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 if( lgth1 <= 0 || lgth2 <= 0 ) { fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 ); } #endif if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); FreeDoubleMtx( amino_dynamicmtx ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); // amino_dynamicmtx = AllocateDoubleMtx( 0x80, 0x80 ); amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 ); // 2017/Nov. constants.c no 'charsize' wo global hensuu nishita houga yoi? #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=mi+fpenalty) > wm ) { wm = g; } // if( (g=*prept) >= mi ) if( (g=*prept) > mi ) // onaji hazu { mi = g; } #if USE_PENALTY_EX mi += fpenalty_ex_i; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( (g=*mjpt + fpenalty) > wm ) { wm = g; } // if( (g=*prept) >= *mjpt ) if( (g=*prept) > *mjpt ) // onaji hazu { *mjpt = g; } #if USE_PENALTY_EX if( j < lgth2 ) // 2018/May/11 m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; mjpt++; prept++; } lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error } #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); fprintf( stderr, "wm (noalign) = %f\n", wm ); #endif return( wm ); } mafft-7.505-without-extensions/core/getlag.c0000644000175000017500000002557514224501721020426 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 void arguments( int argc, char *argv[] ) { int c; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'C'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; scoremtx = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'D': scoremtx = -1; break; case 'P': scoremtx = 0; break; case 'i': contin = 1; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 's': treemethod = 's'; break; case 'x': treemethod = 'x'; break; case 'p': treemethod = 'p'; break; case 'a': alg = 'a'; break; case 'A': alg = 'A'; break; case 'S': alg = 'S'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } readOtherOptions( &ppid, &fftThreshold, &fftWinSize ); } void treebase( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double **mtx, int ***topol, double **len, double **eff, int alloclen ) { int i, j, l; int clus1, clus2; int s1, s2, r1, r2; double pscore; static char *indication1, *indication2; static char **name1, **name2; static double **partialmtx = NULL; static int ***partialtopol = NULL; static double **partiallen = NULL; static double **partialeff = NULL; static double *effarr = NULL; static double *effarr1 = NULL; static double *effarr2 = NULL; #if 0 char pair[njob][njob]; #else static char **pair; #endif if( partialtopol == NULL ) { partialmtx = AllocateDoubleMtx( njob, njob ); partialtopol = AllocateIntCub( njob, 2, njob ); partialeff = AllocateDoubleMtx( njob, njob ); partiallen = AllocateDoubleMtx( njob, 2 ); effarr = AllocateDoubleVec( njob ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( njob*3+100 ); indication2 = AllocateCharVec( njob*3+100 ); name1 = AllocateCharMtx( njob, B+1 ); name2 = AllocateCharMtx( njob, B+1 ); #if 0 #else pair = AllocateCharMtx( njob, njob ); #endif } if( checkC ) for( i=0; i-1; i++ ) if( pair[s1][r1] != 1 ) exit( 1 ); s2 = topol[l][1][0]; for( i=0; (r2=topol[l][1][i])>-1; i++ ) if( pair[s2][r2] != 1 ) exit( 1 ); clus1 = conjuction( pair, s1, aseq, mseq1, effarr1, effarr, name, name1, indication1 ); clus2 = conjuction( pair, s2, aseq, mseq2, effarr2, effarr, name, name2, indication2 ); fprintf( trap_g, "\nSTEP-%d\n", l ); fprintf( trap_g, "group1 = %s\n", indication1 ); fprintf( trap_g, "group2 = %s\n", indication2 ); fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 ); fprintf( stderr, "group1 = %.66s", indication1 ); if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); if( checkC ) for( i=0; i-1; i++ ) { pair[s1][r2] = 1; pair[s2][r2] = 0; } writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); fprintf( stderr, "\n" ); } } static void WriteOptions( FILE *fp ) { fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "Dayhoff( machigai ga aru )\n" ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); else if( scoremtx == -1 ) fprintf( fp, "DNA\n" ); if( scoremtx == 0 || scoremtx == -1 ) fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); else fprintf( fp, "Gap Penalty = %+5.2f\n", (double)ppenalty/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Apgorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( scoremtx == -1 ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double **pscore; static double **eff; static double **node0, **node1; int i, j; static int ***topol; static double **len; FILE *prep; char c; int alloclen; arguments( argc, argv ); getnumlen( stdin ); rewind( stdin ); name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax*5+1 ); aseq = AllocateCharMtx( njob, nlenmax*5+1 ); bseq = AllocateCharMtx( njob, nlenmax*5+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*5; topol = AllocateIntCub( njob, 2, njob ); len = AllocateDoubleMtx( njob, 2 ); pscore = AllocateDoubleMtx( njob, njob ); eff = AllocateDoubleMtx( njob, njob ); node0 = AllocateDoubleMtx( njob, njob ); node1 = AllocateDoubleMtx( njob, njob ); #if 0 Read( name, nlen, seq ); #else readData_pointer( stdin, name, nlen, seq ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } writePre( njob, name, nlen, seq, 0 ); if( tbutree == 0 ) { for( i=1; i output\n" ); reporterr( "=== \n" ); reporterr( "========================================================================= \n" ); reporterr( "========================================================================= \n" ); return( (int)(*seq)[i] ); } } seq++; } return( 0 ); } void intcat( int *s1, int *s2 ) { while( *s1 != -1 ) s1++; while( *s2 != -1 ) { // reporterr( "copying %d\n", *s2 ); *s1++ = *s2++; } *s1 = -1; } void intcpy( int *s1, int *s2 ) { while( *s2 != -1 ) { // reporterr( "copying %d\n", *s2 ); *s1++ = *s2++; } *s1 = -1; } void intncpy( int *s1, int *s2, int n ) { while( n-- ) *s1++ = *s2++; } void fltncpy( double *s1, double *s2, int n ) { while( n-- ) *s1++ = *s2++; } static int countmem( int *s ) { int v = 0; while( *s++ != -1 ) v++; return( v ); } static int lastmem( int *s ) { while( *s++ != -1 ) ; return( *(s-2) ); } void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx ) { int i, j, lgth; lgth = strlen( aseq[0] ); for( j=0; j DISPSEQF ) imax = DISPSEQF; else imax = nseq; reporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); for( i=0; i<+imax; i++ ) { strncpy( b, seq[i]+DISPSITEI, 120 ); b[120] = 0; reporterr( "%3d %s\n", i+1, b ); } } void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; unsigned char ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == '-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(unsigned char)mseq2[++k]) == '-' ) ; // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // reporterr( "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) ; // tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // reporterr( "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_multimtx( int **whichmtx, double ***scoringmatrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k, c; int len2 = len - 2; int mn1, mn2; double tmpscore; char *mseq1, *mseq2; double efficient; int gapnum = amino_n['-']; double gaptmpscore; double gapscore = 0.0; // reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( mn2 == gapnum ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; tmpscore += (double)scoringmatrices[c][mn1][mn2]; // tmpscore += (double)scoringmtx[mn1][mn2]; while( (mn2=amino_n[(unsigned char)mseq2[++k]]) == gapnum ) tmpscore += (double)scoringmatrices[c][mn1][mn2]; // tmpscore += (double)scoringmtx[mn1][mn2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } // reporterr( "done." ); #if 0 reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; unsigned char ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; double gaptmpscore; double gapscore = 0.0; // reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == '-' ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(unsigned char)mseq2[++k]) == '-' ) // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } #if 0 reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ { int i, j, k; double c; int len = strlen( seq[0] ); double score; double tmpscore; char *mseq1, *mseq2; double efficient; #if DEBUG FILE *fp; #endif score = 0.0; c = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; /* fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient ); */ } /* fprintf( stdout, "total score = %f\n", score ); */ for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; } } /* reporterr( "score in score_calc5 = %f\n", score ); */ return( (double)score ); /* fprintf( trap_g, "score by fast = %f\n", (double)score ); tmpscore = score = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[24][0]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ); */ score += (double)tmpscore * efficient; } } score /= c; return( (double)score ); } void upg2( int nseq, double **eff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; static char **pair = NULL; if( !pair ) { pair = AllocateCharMtx( njob, njob ); } for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } #define BLOCKSIZE 100 #define LARGEBLOCKSIZE 100 typedef struct _generaltdistarrthread_arg { int para; int njob; // int thread_no; int m; int *nlen; char **seq; int **skiptable; int **pointt; int *ttable; int *tselfscore; int *posshared; int *joblist; double *result; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } generaldistarrthread_arg_t; static void *generalkmerdistarrthread( void *arg ) // enablemultithread == 0 demo tsukau { generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg; int njob = targ->njob; int para = targ->para; int m = targ->m; int *nlen = targ->nlen; int **pointt = targ->pointt; int *ttable = targ->ttable; int *tselfscore = targ->tselfscore; int *joblist = targ->joblist; int *posshared = targ->posshared; double *result = targ->result; // double **partmtx = targ->partmtx; int i, posinjoblist, n; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += LARGEBLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int m = targ->m; int *tselfscore = targ->tselfscore; char **seq = targ->seq; int **skiptable = targ->skiptable; int *joblist = targ->joblist; int *posshared = targ->posshared; double *result = targ->result; // double **partmtx = targ->partmtx; int i, posinjoblist, n; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif return( NULL ); } posinjoblist = *posshared; *posshared += LARGEBLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; n", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; // result = calloc( nseq, sizeof( double ) ); // joblist = calloc( nseq, sizeof( int ) ); for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } if( j ) { // reporterr( "resetting in parallel!! j=%d\n", j ); // exit( 1 ); int posshared; generaldistarrthread_arg_t *targ; #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); posshared = 0; pthread_mutex_init( &mutex, NULL ); for( i=0; inext; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } } *mindisfrompt = mindisfrom; *nearestpt = nearest; // free( joblist ); // free( result ); } #else static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *resultnotused, int *joblistnotused ) { int j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // reporterr( "resetnearest..\r" ); // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; // else if( seq ) // tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); else tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; // else if( seq ) // tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); else tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } // printf( "mindisfrom = %f\n", mindisfrom ); *mindisfrompt = mindisfrom; *nearestpt = nearest; } #endif #if 1 static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *result, int *joblist ) { int i, j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; // double *result; // int *joblist; mindisfrom = 999.9; nearest = -1; // reporterr( "resetnearest..\r" ); // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; // result = calloc( nseq, sizeof( double ) ); // joblist = calloc( nseq, sizeof( int ) ); // for( acptj=acpt,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } if( j ) { // reporterr( "resetting in parallel!! j=%d\r", j ); // exit( 1 ); int posshared; generaldistarrthread_arg_t *targ; posshared = 0; #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; inext; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } } // printf( "mindisfrom = %f\n", mindisfrom ); *mindisfrompt = mindisfrom; *nearestpt = nearest; // free( joblist ); // free( result ); } #else static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *resultnotused, int *joblistnotused ) { int j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // reporterr( "resetnearest..\r" ); // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; else tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); // else // tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; else tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); // else // tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } // printf( "mindisfrom = %f\n", mindisfrom ); *mindisfrompt = mindisfrom; *nearestpt = nearest; } #endif static int getdensest( int *m, double *d ) { int i; double dmax = -100.0; int pmax = -1; for( i=0; m[i]>-1; i++ ) { if( d[m[i]] > dmax ) { dmax = d[m[i]]; pmax = m[i]; } } return( pmax ); } static void setdensity( int nseq, Bchain *acpt, double **eff, double *density, int pos ) { int j; double tmpdouble; // double **effptpt; Bchain *acptj; // printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt ); // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; tmpdouble = 0.0; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( eff[pos][j-pos] < 1.0 ) tmpdouble += (2.0-eff[pos][j-pos]); } // effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; if( eff[j][pos-j] < 1.0 ) tmpdouble += (2.0-eff[j][pos-j]); } *density = tmpdouble; // printf( "p=%d, d=%f \n", pos, *density ); } static void setnearest( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { int j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt ); // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=*effpt++) < *mindisfrompt ) if( (tmpdouble=eff[pos][j-pos]) < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } // effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpdouble=eff[j][pos-j]) < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } *mindisfrompt = mindisfrom; *nearestpt = nearest; // printf( "%f, %d \n", pos, *mindisfrompt, *nearestpt ); } static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { int j; double tmpdouble; double **effptpt; Bchain *acptj; *mindisfrompt = 999.9; *nearestpt = -1; // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=*effpt++) < *mindisfrompt ) if( (tmpdouble=eff[pos][j]) < *mindisfrompt ) { *mindisfrompt = tmpdouble; *nearestpt = j; } } effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpdouble=eff[j][pos]) < *mindisfrompt ) { *mindisfrompt = tmpdouble; *nearestpt = j; } } } static void loadtreeoneline( int *ar, double *len, FILE *fp ) { static char gett[1000]; int res; char *p; p = fgets( gett, 999, fp ); if( p == NULL ) { reporterr( "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } res = sscanf( gett, "%d %d %lf %lf", ar, ar+1, len, len+1 ); if( res != 4 ) { reporterr( "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } ar[0]--; ar[1]--; if( ar[0] >= ar[1] ) { reporterr( "\n\nIncorrect guide tree\n" ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } // reporterr( "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); // reporterr( "len[0] = %f, len[1] = %f\n", len[0], len[1] ); } void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep *dep ) { int i, j, k, minijm, maxijm; int *intpt, *intpt2; int *hist = NULL; Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; int *pt1, *pt2, *pt11, *pt22; int *nmemar; int nmemim, nmemjm; char **tree; char *treetmp; char *nametmp, *nameptr, *tmpptr; char namec; FILE *fp; int node[2]; double *height; double clusterdist; int mpair, mi, mj; fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( nseq ); ac = (Bchain *)malloc( nseq * sizeof( Bchain ) ); nmemar = AllocateIntVec( nseq ); // treetmp = AllocateCharVec( nseq*50 ); treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( nseq, nseq*50 ); tree = AllocateCharMtx( nseq, 0 ); height = AllocateFloatVec( nseq ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } #else len[k][0] = len[k][1] = -1.0; loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { reporterr( "\n\nCheck the guide tree.\n" ); reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } #endif prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } nmemjm = nmemar[jm]; prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } // len[k][0] = ( minscore - tmptmplen[im] ); // len[k][1] = ( minscore - tmptmplen[jm] ); // len[k][0] = -1; // len[k][1] = -1; hist[im] = k; nmemar[im] = nmemim + nmemjm; if( len[k][0] == -1 || len[k][1] == -1 ) { reporterr( "Re-computing the length of branch %d..\n", k ); clusterdist = 0.0; mpair = 0; for( i=0; (mi=topol[k][0][i])>-1; i++ ) for( j=0; (mj=topol[k][1][j])>-1; j++ ) { minijm = MIN(mi,mj); maxijm = MAX(mi,mj); clusterdist += mtx[minijm][maxijm-minijm]; mpair += 1; } clusterdist /= (double)mpair; reporterr( "clusterdist = %f\n", clusterdist ); if( len[k][0] == -1 ) len[k][0] = clusterdist/2.0 - height[im]; if( len[k][1] == -1 ) len[k][1] = clusterdist/2.0 - height[im]; fprintf( stderr, "len0 = %f\n", len[k][0] ); fprintf( stderr, "len1 = %f\n\n", len[k][1] ); } #if 0 fprintf( stderr, "vSTEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); fprintf( stderr, "\n" ); #endif height[im] += len[k][0]; // for ig tree, 2015/Dec/25 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25 // reporterr( "##### dep[%d].distfromtip = %f\n", k, height[im] ); treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // reporterr( "calling setnearest\n" ); // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fprintf( fp, "#by loadtop\n" ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); free( hist ); free( (char *)ac ); free( (void *)nmemar ); free( height ); } static void shufflelennum( Lennum *ary, int size ) { int i; for(i=0;ilen - ((Lennum *)p)->len ); } static int compfuncpair( const void *p, const void *q ) { return( ((Pairnum *)q)->npairs - ((Pairnum *)p)->npairs ); } void limitlh( int *uselh, Lennum *in, int size, int limit ) { int i; // for(i=0;i size ) limit = size; // reporterr( "numpairs=%llu, ULLONG_MAX=%llu, nn=%lld, INT_MAX=%d, n=%d\n", numpairs, ULLONG_MAX, nn, INT_MAX, n ); for(i=0;i INT_MAX ) nn = INT_MAX; n = (int)nn; if( n > size ) n = size; // reporterr( "numpairs=%llu, ULLONG_MAX=%llu, nn=%lld, INT_MAX=%d, n=%d\n", numpairs, ULLONG_MAX, nn, INT_MAX, n ); for(i=0;idep)[pos].child0 == -1 ) { *order++ = (tdpglobal->topol)[pos][0][0]; *order = -1; } else { order = topolorder_lessargs( order, (tdpglobal->dep)[pos].child0 ); } if( (tdpglobal->dep)[pos].child1 == -1 ) { *order++ = (tdpglobal->topol)[pos][1][0]; *order = -1; } else { order = topolorder_lessargs( order, (tdpglobal->dep)[pos].child1 ); } return( order ); } int *topolorderz( int *order, int ***topol, Treedep *dep, int pos, int nchild ) { #if 0 TopDep td; td.topol = topol; td.dep = dep; tdpglobal = &td; #else tdpglobal = (TopDep *)calloc( sizeof( TopDep ), 1 ); tdpglobal->topol = topol; tdpglobal->dep = dep; #endif int child; if( nchild == 0 || nchild == 2 ) { if( (child=(dep)[pos].child0) == -1 ) { *order++ = (topol)[pos][0][0]; *order = -1; } else { // order = topolorder_lessargs( order, &td, child ); order = topolorder_lessargs( order, child ); } } if( nchild == 1 || nchild == 2 ) { if( (child=(dep)[pos].child1) == -1 ) { *order++ = (topol)[pos][1][0]; *order = -1; } else { // order = topolorder_lessargs( order, &td, child ); order = topolorder_lessargs( order, child ); } } #if 1 free( tdpglobal ); tdpglobal = NULL; #endif return ( order ); } #if RECURSIVETOP #else static void topolorder_mudaari( int nseq, int *n1, int *n2, int *order1, int *order2, int ***topol, Treedep *dep, int pos ) // memhist[][] wo free sezu, recalcpairs4thread() ni wataseba, kono kansuu ha iranai. -> V7.383 // memhist[][] no memory shiyou ryou ha, saiaku no baai O(N^2) { int **memhist, **localmem; int i, s1, s2, c1, c2; memhist = AllocateIntMtx( pos, 0 ); localmem = AllocateIntMtx( 2, 0 ); for( i=0; i<=pos; i++ ) memhist[i] = NULL; for( i=0; i<=pos; i++ ) { c1 = dep[i].child0; c2 = dep[i].child1; if( c1 == -1 ) { localmem[0] = calloc( sizeof( int ), 2 ); localmem[0][0] = topol[i][0][0]; localmem[0][1] = -1; s1 = 1; } else { localmem[0] = memhist[c1]; s1 = intlen( localmem[0] ); } if( c2 == -1 ) { localmem[1] = calloc( sizeof( int ), 2 ); localmem[1][0] = topol[i][1][0]; localmem[1][1] = -1; s2 = 1; } else { localmem[1] = memhist[c2]; s2 = intlen( localmem[1] ); } if( i == pos ) { intcpy( order1, localmem[0] ); intcpy( order2, localmem[1] ); *n1 = s1; *n2 = s2; } else { memhist[i] = calloc( sizeof( int ), s1+s2+1 ); intcpy( memhist[i], localmem[0] ); intcpy( memhist[i]+s1, localmem[1] ); memhist[i][s1+s2] = -1; } free( localmem[0] ); free( localmem[1] ); if( c1 != -1 ) memhist[c1] = NULL; if( c2 != -1 ) memhist[c2] = NULL; // reporterr( "freeing memhist[%d]\n", dep[i].child0 ); // reporterr( "freeing memhist[%d]\n", dep[i].child1 ); } for( i=0; i<=pos; i++ ) { if( memhist[i] ) free( memhist[i] ); memhist[i] = NULL; } free( memhist ); free( localmem ); } #endif #if CANONICALTREEFORMAT void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed ) { FILE *fp; int i, j; double l, ll; int treelen; char **tree; char *instanttree; int posinit; // char *treetmp, *tt; char *nametmp, *nameptr, *tmpptr; char namec; int *order; int im, jm, mm; if( treeout ) { // treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi tree = AllocateCharMtx( nseq, 0 ); treelen = nseq; for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); treelen += strlen( tree[i] ) + 20; } instanttree = calloc( treelen, sizeof( char ) ); posinit = 0; for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); treelen += strlen( tree[i] ) + 20; } instanttree = calloc( treelen, sizeof( char ) ); posinit = 0; for( i=0; i k ) { fprintf( fp, "%d %d %f %f\n", k+1, jm+1, len[i][0], len[i][1] ); } else { fprintf( fp, "%d %d %f %f\n", jm+1, k+1, len[i][1], len[i][0] ); k = jm; } } #endif fclose( fp ); free( order ); } #endif void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int *hist = NULL; Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; int nmemim, nmemjm; char **tree; char *treetmp; char *nametmp, *nameptr, *tmpptr; char namec; FILE *fp; int node[2]; double *height; fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } reporterr( "Loading a tree\n" ); if( !hist ) { hist = AllocateIntVec( nseq ); ac = (Bchain *)malloc( nseq * sizeof( Bchain ) ); nmemar = AllocateIntVec( nseq ); // treetmp = AllocateCharVec( nseq*50 ); if( dep ) height = AllocateFloatVec( nseq ); } if( treeout ) { treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( nseq, nseq*50 ); tree = AllocateCharMtx( nseq, 0 ); for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } #else len[k][0] = len[k][1] = -1.0; loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; // if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) if( im > nseq-1 || jm > nseq-1 ) { reporterr( "\n\nCheck the guide tree.\n" ); reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } if( len[k][0] == -1.0 || len[k][1] == -1.0 ) { reporterr( "\n\nERROR: Branch length is not given.\n" ); exit( 1 ); } if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; #endif prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } nmemjm = nmemar[jm]; prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } // len[k][0] = ( minscore - tmptmplen[im] ); // len[k][1] = ( minscore - tmptmplen[jm] ); // len[k][0] = -1; // len[k][1] = -1; hist[im] = k; nmemar[im] = nmemim + nmemjm; // mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } } } if( treeout ) { treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); } // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // reporterr( "calling setnearest\n" ); // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stderr, "vSTEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); fprintf( stderr, "\n" ); #endif if( dep ) { height[im] += len[k][0]; // for ig tree, 2015/Dec/25 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25 // reporterr( "##### dep[%d].distfromtip = %f\n\n", k, height[im] ); } // reporterr( "dep[%d].child0 = %d\n", k, dep[k].child0 ); // reporterr( "dep[%d].child1 = %d\n", k, dep[k].child1 ); // reporterr( "dep[%d].distfromtip = %f\n", k, dep[k].distfromtip ); } fclose( fp ); if( treeout ) { fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fprintf( fp, "#by loadtree\n" ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); } free( hist ); free( (char *)ac ); free( (void *)nmemar ); if( dep ) free( height ); } int check_guidetreefile( int *seed, int *npick, double *limitram ) { char string[100]; char *sizestring; FILE *fp; double tanni; double tmpd; *seed = 0; *npick = 200; *limitram = 10.0 * 1000 * 1000 * 1000; // 10GB fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } fgets( string, 999, fp ); fclose( fp ); if( !strncmp( string, "shuffle", 7 ) ) { sscanf( string+7, "%d", seed ); reporterr( "shuffle, seed=%d\n", *seed ); return( 's' ); } else if( !strncmp( string, "pileup", 6 ) ) { reporterr( "pileup.\n" ); return( 'p' ); } else if( !strncmp( string, "auto", 4 ) ) { sscanf( string+4, "%d %d", seed, npick ); reporterr( "auto, seed=%d, npick=%d\n", *seed, *npick ); if( *npick < 2 ) { reporterr( "Check npick\n" ); exit( 1 ); } return( 'a' ); } else if( !strncmp( string, "test", 4 ) ) { sscanf( string+4, "%d %d", seed, npick ); reporterr( "calc, seed=%d, npick=%d\n", *seed, *npick ); if( *npick < 2 ) { reporterr( "Check npick\n" ); exit( 1 ); } return( 't' ); } else if( !strncmp( string, "compact", 7 ) ) { sizestring = string + 7; reporterr( "sizestring = %s\n", sizestring ); if( strchr( sizestring, 'k' ) || strchr( sizestring, 'k' ) ) tanni = 1.0 * 1000; // kB else if( strchr( sizestring, 'M' ) || strchr( sizestring, 'm' ) ) tanni = 1.0 * 1000 * 1000; // GB else if( strchr( sizestring, 'G' ) || strchr( sizestring, 'g' ) ) tanni = 1.0 * 1000 * 1000 * 1000; // GB else if( strchr( sizestring, 'T' ) || strchr( sizestring, 't' ) ) tanni = 1.0 * 1000 * 1000 * 1000 * 1000; // TB else { reporterr( "\nSpecify initial ram usage by '--initialramusage xGB'\n\n\n" ); exit( 1 ); } sscanf( sizestring, "%lf", &tmpd ); *limitram = tmpd * tanni; reporterr( "Initial RAM usage = %10.3fGB\n", *limitram/1000/1000/1000 ); return( 'c' ); } else if( !strncmp( string, "very compact", 12 ) ) { reporterr( "very compact.\n" ); return( 'C' ); } else if( !strncmp( string, "stepadd", 7 ) ) { reporterr( "stepwise addition (disttbfast).\n" ); return( 'S' ); } else if( !strncmp( string, "youngestlinkage", 15 ) ) { reporterr( "youngest linkage (disttbfast).\n" ); return( 'Y' ); } else if( !strncmp( string, "nodepair", 8 ) ) { reporterr( "Use nodepair.\n" ); return( 'n' ); } else { reporterr( "loadtree.\n" ); return( 'l' ); } } static double sueff1, sueff05; //static double sueff1_double, sueff05_double; static double cluster_mix_double( double d1, double d2 ) { return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); } static double cluster_average_double( double d1, double d2 ) { return( ( d1 + d2 ) * 0.5 ); } static double cluster_minimum_double( double d1, double d2 ) { return( MIN( d1, d2 ) ); } #if 0 static double cluster_mix_double( double d1, double d2 ) { return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); } static double cluster_average_double( double d1, double d2 ) { return( ( d1 + d2 ) * 0.5 ); } static double cluster_minimum_double( double d1, double d2 ) { return( MIN( d1, d2 ) ); } #endif static void increaseintergroupdistanceshalfmtx( double **eff, int ngroup, int **groups, int nseq ) { int nwarned = 0; int i, k, m, s1, s2, sl, ss; int *others, *tft; double maxdist, *dptr, dtmp; tft = calloc( nseq, sizeof( int * ) ); others = calloc( nseq, sizeof( int * ) ); // for( m=0; m-1; m++ ) tft[s1] = 1; for( m=0,k=0; m-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dtmp = eff[ss][sl-ss]; if( dtmp > maxdist ) maxdist = dtmp; } // reporterr( "maxdist = %f\n", maxdist ); for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ ) { if( s2 > s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dptr = eff[ss] + sl-ss; if( *dptr < maxdist ) { if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 ); *dptr = maxdist; } } // for( m=0; m 100 ) reporterr( "# Sequenc.... (more pairs)\n" ); free( tft ); free( others ); } static void increaseintergroupdistancesfullmtx( double **eff, int ngroup, int **groups, int nseq ) { int nwarned = 0; int i, k, m, s1, s2, sl, ss; int *others, *tft; double maxdist, *dptr, dtmp; tft = calloc( nseq, sizeof( int * ) ); others = calloc( nseq, sizeof( int * ) ); reporterr( "\n" ); // Hitsuyou desu. for( i=0; i-1; m++ ) tft[s1] = 1; for( m=0,k=0; m-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dtmp = eff[ss][sl]; if( dtmp > maxdist ) maxdist = dtmp; } // reporterr( "maxdist = %f\n", maxdist ); for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ ) { if( s2 > s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dptr = eff[ss] + sl; if( *dptr < maxdist ) { if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 ); *dptr = maxdist; } } } if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" ); // for( m=0; m _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!! ninconsistentpairs = 0; firsttime = 1; while( 1 ) { if( firsttime ) { firsttime = 0; minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } } else { minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[i][j] && (tmpdouble=eff[i][j-i]) < minscore ) { minscore = tmpdouble; im = i; jm = j; } } for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[j][i] && (tmpdouble=eff[j][i-j]) < minscore ) { minscore = tmpdouble; im = j; jm = i; } } } } allinconsistent = 1; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { if( inconsistent[acpti->pos][acptj->pos] == 0 ) { allinconsistent = 0; goto exitloop_f; } } } exitloop_f: if( allinconsistent ) { reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); exit( 1 ); } #if 1 intpt = testtopol; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } *intpt = -1; // reporterr( "testtopol = \n" ); // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 ); // reporterr( "\n" ); #endif for( i=0; i-1; j++ ) reporterr( " %03d", groups[i][j]+1 ); // reporterr( "\n" ); if( overlapmember( groups[i], testtopol ) ) { if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) { if( !warned[i] ) { warned[i] = 1; reporterr( "\n###################################################################\n" ); reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); reporterr( "###################################################################\n" ); } inconsistent[im][jm] = 1; if( maxinconsistentpairs < ninconsistentpairs+1 ) { inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) ); for( j=maxinconsistentpairs; j *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; eff[im][jm-im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif #if 1 if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } #endif } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 reporterr( "\noSTEP-%03d:\n", k+1 ); reporterr( "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 ); reporterr( "\n" ); reporterr( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 ); reporterr( "\n\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); free( testtopol ); FreeIntMtx( inconsistent ); for( i=0; ithread_no; int para = targ->para; int im = targ->im; int nseq = targ->nseq; double **partmtx = targ->partmtx; double *mindist = targ->mindist; int *nearest = targ->nearest; char **seq = targ->seq; int **skiptable = targ->skiptable; int *tselfscore = targ->tselfscore; double *result = targ->result; int *joblist = targ->joblist; Bchain **acpt = targ->acpt; Bchain *ac = targ->ac; Bchain *acptbk; Bchain *acptinit; int i; acptinit = *acpt; while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *acpt == NULL ) { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } acptbk = *acpt; *acpt = (*acpt)->next; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif i = acptbk->pos; if( nearest[i] == im ) { if( partmtx[im][i] > mindist[i] ) { msaresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, result, joblist ); } } } } static void *kmerresetnearestthread( void *arg ) { resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg; // int thread_no = targ->thread_no; int para = targ->para; int im = targ->im; int nseq = targ->nseq; double **partmtx = targ->partmtx; double *mindist = targ->mindist; int *nearest = targ->nearest; int *tselfscore = targ->tselfscore; int **pointt = targ->pointt; int *nlen = targ->nlen; double *result = targ->result; int *joblist = targ->joblist; Bchain **acpt = targ->acpt; Bchain *ac = targ->ac; int *singlettable1; Bchain *acptbk; Bchain *acptinit; int i; acptinit = *acpt; while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *acpt == NULL ) { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } acptbk = *acpt; *acpt = (*acpt)->next; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif i = acptbk->pos; if( nearest[i] == im ) { if( partmtx[im][i] > mindist[i] ) { if( pointt ) // kmer { singlettable1 = (int *)calloc( tsize, sizeof( int ) ); makecompositiontable_global( singlettable1, pointt[i] ); } kmerresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, tselfscore, pointt, nlen, singlettable1, result, joblist ); if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer if( pointt ) commonsextet_p( NULL, NULL ); } } } } typedef struct _compactdistarrthread_arg { int para; int njob; // int thread_no; int im; int jm; int *nlen; char **seq; int **skiptable; int **pointt; int *table1; int *table2; int *tselfscore; Bchain **acpt; int *posshared; double *mindist; double *newarr; double **partmtx; int *nearest; int *joblist; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } compactdistarrthread_arg_t; static void *verycompactkmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau { compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg; int njob = targ->njob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *nlen = targ->nlen; int **pointt = targ->pointt; int *table1 = targ->table1; int *table2 = targ->table2; int *tselfscore = targ->tselfscore; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; // double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *nlen = targ->nlen; int **pointt = targ->pointt; int *table1 = targ->table1; int *table2 = targ->table2; int *tselfscore = targ->tselfscore; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *tselfscore = targ->tselfscore; char **seq = targ->seq; int **skiptable = targ->skiptable; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; // double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *tselfscore = targ->tselfscore; char **seq = targ->seq; int **skiptable = targ->skiptable; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nrep1 == -1 ) return; if( pt->child0 ) reformat_younger0_rec( ori, pt->child0, n, lastappear, topol, len, dep, pos ); if( pt->child1 ) reformat_younger0_rec( ori, pt->child1, n, lastappear, topol, len, dep, pos ); topol[*pos][0] = (int *)realloc( topol[*pos][0], ( 2 ) * sizeof( int ) ); topol[*pos][1] = (int *)realloc( topol[*pos][1], ( 2 ) * sizeof( int ) ); topol[*pos][0][1] = -1; topol[*pos][1][1] = -1; if( pt->rep0 < pt->rep1 ) { topol[*pos][0][0] = pt->rep0; topol[*pos][1][0] = pt->rep1; len[*pos][0] = pt->len0; len[*pos][1] = pt->len1; dep[*pos].child0 = lastappear[pt->rep0]; dep[*pos].child1 = lastappear[pt->rep1]; } else { topol[*pos][1][0] = pt->rep0; topol[*pos][0][0] = pt->rep1; len[*pos][1] = pt->len0; len[*pos][0] = pt->len1; dep[*pos].child1 = lastappear[pt->rep0]; dep[*pos].child0 = lastappear[pt->rep1]; } lastappear[pt->rep0] = *pos; lastappear[pt->rep1] = *pos; dep[*pos].distfromtip = pt->height; // reporterr( "STEP %d\n", *pos ); // reporterr( "%d %f\n", topol[*pos][0][0], len[*pos][0] ); // reporterr( "%d %f\n", topol[*pos][1][0], len[*pos][1] ); (*pos)++; } #else static void reformat_rec( Treept *ori, Treept *pt, int n, int *lastappear, int ***topol, double **len, Treedep *dep, int *pos ) { if( pt->rep1 == -1 ) return; if( pt->child0 ) reformat_rec( ori, pt->child0, n, lastappear, topol, len, dep, pos ); if( pt->child1 ) reformat_rec( ori, pt->child1, n, lastappear, topol, len, dep, pos ); topol[*pos][0] = (int *)realloc( topol[*pos][0], ( 2 ) * sizeof( int ) ); topol[*pos][1] = (int *)realloc( topol[*pos][1], ( 2 ) * sizeof( int ) ); topol[*pos][0][0] = pt->rep0; topol[*pos][0][1] = -1; topol[*pos][1][0] = pt->rep1; topol[*pos][1][1] = -1; len[*pos][0] = pt->len0; len[*pos][1] = pt->len1; dep[*pos].child0 = lastappear[pt->rep0]; dep[*pos].child1 = lastappear[pt->rep1]; lastappear[pt->rep0] = *pos; lastappear[pt->rep1] = *pos; dep[*pos].distfromtip = pt->height; // reporterr( "STEP %d\n", *pos ); // reporterr( "%d %f\n", topol[*pos][0][0], len[*pos][0] ); // reporterr( "%d %f\n", topol[*pos][1][0], len[*pos][1] ); (*pos)++; } #endif static char *reformat_rec_newick( char **subtree, Treept *pt ) { char *newick, *newick0, *newick1; if( pt->rep1 == -1 ) return( subtree[pt->rep0] ); newick0 = reformat_rec_newick( subtree, pt->child0 ); newick1 = reformat_rec_newick( subtree, pt->child1 ); newick = calloc( strlen( newick0 ) + strlen( newick1 ) + 100, sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( pt->rep0 < pt->rep1 ) sprintf( newick, "(%s:%7.5f,%s:%7.5f)", newick0, pt->len0, newick1, pt->len1 ); else sprintf( newick, "(%s:%7.5f,%s:%7.5f)", newick1, pt->len1, newick0, pt->len0 ); free( newick0 ); free( newick1 ); return( newick ); } static void reformattree( Treept *root, Treept *ori, int n, int ***topol, double **len, Treedep *dep, char **name, int treeout ) { int i, pos; char *newick; int *lastappear; // int rootpos; // for( rootpos=n*2-2; rootpos>n; rootpos-- ) if( ori[rootpos].parent == NULL ) break; // reporterr( "Reformat, i=%d\n", i ); // reporterr( "njob=%d, treept[%d].parent,child0,child1 - self = %p,%p,%p - %p\n", n, i, treept[i].parent,treept[i].child0,treept[i].child1,treept+i ); if( treeout ) { FILE *fp; int j; char namec, *nametmp, *tmpptr, **tree, *nameptr; nametmp = AllocateCharVec( 1000 ); // nagasugi tree = AllocateCharMtx( njob, 0 ); for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } free( nametmp ); newick = reformat_rec_newick( tree, root ); // tree[] ha free sareru fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", newick ); fclose( fp ); free( tree ); // free[] ha free sareteiru free( newick ); // FreeCharMtx( tree ); } lastappear = (int *)malloc( sizeof( int ) * n ); if( lastappear == NULL ) { reporterr( "Cannot allocate lastappear\n" ); exit( 1 ); } for( i=0; iopt == -1.0 || uselh[i] == 0 || uselh[j] == 0 ) if( uselh[i] == 0 && uselh[j] == 0 ) { return; } #if DISPPAIRID fprintf( *fpp, "node %d: %d (%d) - %d (%d) (%c)\n", n, i+1, ii, j+1, jj, tmpint ); #endif opt = lh->opt; size = 0; for( tmpptr=lh; tmpptr; tmpptr=tmpptr->next ) size++; #if HAT3SORTED if( fwrite( &size, sizeof( int ), 1, *fpp ) != 1 || fwrite( &opt, sizeof( double ), 1, *fpp ) != 1 ) #else if( fwrite( &ii, sizeof( int ), 1, *fpp ) != 1 || fwrite( &jj, sizeof( int ), 1, *fpp ) != 1 || fwrite( &size, sizeof( int ), 1, *fpp ) != 1 || fwrite( &opt, sizeof( double ), 1, *fpp ) != 1 ) #endif { reporterr( "write error, n=%d\n", n ); exit( 1 ); } for( tmpptr=lh; tmpptr; tmpptr=tmpptr->next ) { len = tmpptr->end1-tmpptr->start1; if( fwrite( &(tmpptr->start1), sizeof( int ), 1, *fpp ) != 1 || fwrite( &(tmpptr->start2), sizeof( int ), 1, *fpp ) != 1 || fwrite( &len, sizeof( int ), 1, *fpp ) != 1 ) { reporterr( "write error, n=%d\n", n ); exit( 1 ); } // reporterr( "reg1:%d-%d, reg2:%d-%d, len=%d, score=%f\n", tmpptr->start1, tmpptr->start1+len, tmpptr->start2, tmpptr->start2+len, len, opt ); } c = '\n'; fwrite( &c, sizeof( char ), 1, *fpp ); } } } typedef struct _calcnearestthread_arg { char **bseq; int thread_no; int *posshared; int alloclen; int nlim; double *selfscore; double *mindists; int *neighbors; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } calcnearestthread_arg_t; static void *calcnearestthread( void *arg ) { calcnearestthread_arg_t *targ = (calcnearestthread_arg_t *)arg; char **bseq = targ->bseq; int thread_no = targ->thread_no; int *posshared = targ->posshared; int alloclen = targ->alloclen; int nlim = targ->nlim; double *selfscore = targ->selfscore; double *mindists = targ->mindists; int *neighbors = targ->neighbors; #ifdef enablemultithread pthread_mutex_t *mutex = targ->mutex; #endif int pos; double tmpdist, mindist; int progress; int neighbor, i; double (*distfunc)( char *, char *, double, double, int ); if( alg == 'A' ) distfunc = distdp_noalign; else if( alg == 'L' ) distfunc = distdpL_noalign; else if( alg == 'N' ) distfunc = distdpN_noalign; while( 1 ) { #ifdef enablemultithread pthread_mutex_lock( mutex ); #endif #if TREE7325 if( *posshared > nlim ) #else if( *posshared < 1 ) // ? 2017/Apr/26 #endif { #ifdef enablemultithread pthread_mutex_unlock( mutex ); #endif // reporterr( "freeing tmpseq1\n" ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11_noalign( NULL, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); return( NULL ); } pos = *posshared; #if TREE7325 *posshared += 1; #else *posshared -= 1; #endif #ifdef enablemultithread pthread_mutex_unlock( mutex ); #endif if( (nlim-pos) % 100 == 0 ) { //progress = ( (unsigned long long)pos * (unsigned long long)nlim - (unsigned long long)pos*((unsigned long long)pos-1.0)*0.5 ) / ( (unsigned long long)nlim * ((unsigned long long)nlim-1.0) *0.5 ) * 100; // progress = ( (double)pos * nlim - pos*(pos-1.0)*0.5 ) / ( nlim * (nlim-1.0) * 0.5 ) * 100; progress = ( (double)(nlim-pos) * nlim - (nlim-pos)*((nlim-pos)-1.0)*0.5 ) / ( nlim * (nlim-1.0) * 0.5 ) * 100; reporterr( "Step %d (%d%%), thread %d \r", (nlim-pos), progress, thread_no ); } mindist = 999.9; #if TREE7325 for( i=pos+1; i-1; i-- ) #endif { #if 0 tmpdist = 0.0; // test! #else tmpdist = distfunc( bseq[pos], bseq[i], selfscore[pos], selfscore[i], alloclen ); #endif if( mindist > tmpdist ) { mindist = tmpdist; neighbor = i; } } mindists[pos] = mindist; neighbors[pos] = neighbor; } } typedef struct _jobplan { int node; int start; int end; int subid; int divided; unsigned long long npairs; } Jobplan; typedef struct _recalcpairs4thread_arg { // int thread_no; int nseq; int numjob; Jobplan *jobplan; char **bseq; #if EXACTLYSAMEASPAIRLOCALALIGN char **dseq; #endif int *joborder; int *posshared; int *uselh; double *selfscore; int alloclen; int ***topol; Treedep *dep; unsigned long long *done; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } recalcpairs4thread_arg_t; static void *recalcpairs4thread( void *arg )// no TLS { recalcpairs4thread_arg_t *targ = (recalcpairs4thread_arg_t *)arg; char **bseq = targ->bseq; int nseq=targ->nseq; int numjob=targ->numjob; int *posshared = targ->posshared; int *joborder = targ->joborder; int *uselh = targ->uselh; // int thread_no = targ->thread_no; int ***topol = targ->topol; Jobplan *jobplan=targ->jobplan; Treedep *dep = targ->dep; int alloclen = targ->alloclen; double *selfscore = targ->selfscore;; unsigned long long *done = targ->done; #ifdef enablemultithread pthread_mutex_t *mutex = targ->mutex; #endif int i, j, m0, m1, m00, m11, n, step, istart, iend, n1, n0, subid; int prevn; char *tmpseq1, *tmpseq2; LocalHom *localhomtable; int *mem0, *mem1; FILE *localfp; char *fn; int progress = 0; unsigned long long totalpairs = (unsigned long long)nseq*(nseq-1)/2; int tmpnodepairs; double **dynamicmtx = NULL; double **mtxptr; double (*distfunc)( double **, char *, char *, LocalHom *, double, double, int ); if( alg == 'A' ) distfunc = distdp; else if( alg == 'L' ) distfunc = distdpL; else if( alg == 'N' ) distfunc = distdpN; else { reporterr( "alg %c is not yet supported\n", alg ); exit( 1 ); } #if EXACTLYSAMEASPAIRLOCALALIGN double tmpdist; char **dseq = targ->dseq; double (*distfunc_noalign)( char *, char *, double, double, int ); if( alg == 'A' ) distfunc_noalign = distdp_noalign; else if( alg == 'L' ) distfunc_noalign = distdpL_noalign; else if( alg == 'N' ) distfunc_noalign = distdpN_noalign; #endif mem0 = calloc( sizeof( int ), njob ); mem1 = calloc( sizeof( int ), njob ); tmpseq1 = calloc( sizeof( char ), alloclen ); tmpseq2 = calloc( sizeof( char ), alloclen ); localhomtable = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); freelocalhom1( localhomtable ); if( specificityconsideration > 0.0 ) { dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); mtxptr = dynamicmtx; } else mtxptr = n_dis_consweight_multi; prevn = -1; while( 1 ) { #ifdef enablemultithread pthread_mutex_lock( mutex ); #endif // if( *posshared <= -1 ) if( *posshared >= numjob ) { #ifdef enablemultithread pthread_mutex_unlock( mutex ); #endif // reporterr( "freeing tmpseq1\n" ); free( tmpseq1 ); tmpseq1 = NULL; free( tmpseq2 ); tmpseq2 = NULL; free( mem0 ); mem0 = NULL; free( mem1 ); mem1 = NULL; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; G__align11( NULL, NULL, NULL, 0, 0, 0 ); L__align11( NULL, 0.0, NULL, NULL, 0, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); #if EXACTLYSAMEASPAIRLOCALALIGN L__align11_noalign( NULL, NULL, NULL ); G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); #endif free( localhomtable ); if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); return( NULL ); } n = jobplan[step=joborder[*posshared]].node; // *posshared -= 1; *posshared += 1; *done += jobplan[step].npairs; // reporterr( "### nodenum=%d, step=%d, npairs=%lld\n", n, step, jobplan[step].npairs ); #ifdef enablemultithread pthread_mutex_unlock( mutex ); #endif istart = jobplan[step].start; iend = jobplan[step].end; subid = jobplan[step].subid; #if EXACTLYSAMEASPAIRLOCALALIGN #else if( specificityconsideration > 0.0 ) makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[n].distfromtip ); #endif if( step%100==0 ) // if( *done % 100 == 0 ) { progress = (int)( (double)*done / totalpairs * 100 ); reporterr( "Node %06d-%03d (%d%%) \r", n, subid, progress ); } if( n != prevn ) { // reporterr( "compute mem1 and mem0. n=%d, prevn=%d\n", n, prevn ); prevn = n; #if N0LOOPFIRST #if RECURSIVETOP mem0[0] = -1; n0 = topolorderz( mem0, topol, dep, n, 1 ) - mem0; mem1[0] = -1; n1 = topolorderz( mem1, topol, dep, n, 0 ) - mem1; #else topolorder_mudaari( njob, &n1, &n0, mem1, mem0, topol, dep, n ); #endif #else #if RECURSIVETOP mem0[0] = -1; n0 = topolorderz( mem0, topol, dep, n, 0 ) - mem0; mem1[0] = -1; n1 = topolorderz( mem1, topol, dep, n, 1 ) - mem1; #else topolorder_mudaari( njob, &n0, &n1, mem0, mem1, topol, dep, n ); #endif #endif } // else reporterr( "reuse mem1 and mem0. n=%d\n", n ); #if 0 reporterr( "mem0 = \n" ); for( i=0; i m11 ) { m0 = m11; m1= m00; } else { m0 = m00; m1= m11; } if( nadd ) { if( m1 < njob-nadd ) continue; // if( m0 >= njob-nadd || m1 < njob-nadd ) continue; // oosugi! } tmpnodepairs++; // reporterr( "node%d, %d x %d\n", n, m0+1, m1+1 ); strcpy( tmpseq1, bseq[m0] ); strcpy( tmpseq2, bseq[m1] ); #if EXACTLYSAMEASPAIRLOCALALIGN if( specificityconsideration > 0.0 ) { tmpdist = distfunc_noalign( dseq[m0], dseq[m1], selfscore[m0], selfscore[m1], alloclen ); makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * tmpdist ); // upgma ni awaseru. distfunc( dynamicmtx, tmpseq1, tmpseq2, localhomtable, selfscore[m0], selfscore[m1], alloclen ); } else { distfunc( n_dis_consweight_multi, tmpseq1, tmpseq2, localhomtable, selfscore[m0], selfscore[m1], alloclen ); } #else distfunc( mtxptr, tmpseq1, tmpseq2, localhomtable, selfscore[m0], selfscore[m1], alloclen ); #endif // reporterr( "tmpdist = %f\n", tmpdist ); #if N0LOOPFIRST writehat3node_noaddress( n, m0, m1, j, i, &localfp, 'n', localhomtable, uselh ); #else writehat3node_noaddress( n, m0, m1, i, j, &localfp, 'n', localhomtable, uselh ); #endif freelocalhom1( localhomtable ); } } fclose( localfp ); // reporterr( "node = %d, tmpnodepairs = %d\n", n, tmpnodepairs ); } } static void calcnearest_para( int njob, double *selfscore, char **bseq, int alloclen, int *neighbors, double *mindists ) { int i; calcnearestthread_arg_t *targ; #ifdef enablemultithread pthread_t *handle; pthread_mutex_t mutex; #endif int posshared; #if REPORTCOSTS time_t starttime, startclock; starttime = time(NULL); startclock = clock(); #endif #if TREE7325 posshared = 0; #else posshared = njob-1; #endif targ = calloc( nthread, sizeof( calcnearestthread_arg_t ) ); #ifdef enablemultithread handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); #endif for( i=0; i 7.383 // qsort( npairs, njob-1, sizeof( Pairnum ), compfuncpair ); if( nadd ) free( addmem ); // for( i=0; i-1; n-- ) // for( k=0; k=0; k-- ) { n = npairs[k].num; if( mergeoralign[n] == 'n' ) continue; // reporterr( "n=%d, go!\n", n ); n0 = npairs[k].n0; n1 = npairs[k].n1; { done += (double)npairs[k].npairs; #if 0 // blocksize0 = LHBLOCKSIZE/n1; blocksize0 = (int)(sizeav * LHBLOCKFACTOR)/n1; if( blocksize0 == 0 ) blocksize0 = 1; #else if ( (double)n0*n1*lenav*lenav > (double)MINBLOCKLEN2 ) { blocksize0 = (int)( (double)MINBLOCKLEN2/n1/lenav/lenav ); if( blocksize0 == 0 ) blocksize0 = 1; // reporterr( "dividing node %d, size=%f, size*len2=%f > %f\n", n, (double)n0*n1, (double)n0*n1*lenav*lenav, MINBLOCKLEN2 ); // reporterr( "blocksize0=%d, n0=%d, n1=%d\n", blocksize0, n0, n1 ); } else { blocksize0 = n0; // reporterr( "did not divide node %d, size=n1*n2=%f, size*len2=%f < %f\n", n, (double)n0*n1, (double)n0*n1*lenav*lenav, MINBLOCKLEN2 ); } #endif } // reporterr( "blocksize0 = %d, n0=%d, n1=%d\n", blocksize0, n0, n1 ); if( numjob + n0/blocksize0+1 > nallocated ) { nallocated += n0/blocksize0+1; // reporterr( "new nallocated = %d\n", nallocated ); jobplan = realloc( jobplan, nallocated*sizeof( Jobplan ) ); } if( n0 > blocksize0 ) { nnodesdivided++; // reporterr( "node %d will be divided, because n0, %d > blocksize0, %d. nnodesdivided=%d, numjob=%d\n", n, n0, blocksize0, nnodesdivided, numjob ); } subid = 0; for( b=0; b blocksize0 ) jobplan[numjob].divided = 1; else jobplan[numjob].divided = 0; numjob++; subid++; } nfilesfornode[n] = subid; } reporterr( "numjob=%d, nnodesdivided=%d, njob=%d\n", numjob, nnodesdivided, njob ); reporterr( "Divided %d nodes to %d jobs\n", nnodesdivided, numjob-(njob-1) ); joborder = calloc( sizeof( int ), numjob ); for( i=0; i-1; i-- ) #else tmpdist = mindists[1]; treept[0].parent = treept+n; treept[1].parent = treept+n; treept[n].child0 = treept+0; treept[n].child1 = treept+1; treept[n].height = tmpdist * 0.5; treept[n].len0 = tmpdist * 0.5; treept[n].len1 = tmpdist * 0.5; treept[n].parent = NULL; treept[n].rep0 = 0; treept[n].rep1 = 1; root = treept+n; for( i=2; iparent; p!=NULL; b=p,p=p->parent ) { // reporterr( "checking %p->%p (height=%f)\n", b, p, p->height ); if( p->height > mindist * 0.5 ) break; } if( p == NULL ) { treept[n].parent = NULL; root = treept+n; } else if( p->child0 == b ) { p->child0 = treept+n; p->len0 = p->height-mindist*0.5; treept[n].parent = p; } else if( p->child1 == b ) { p->child1 = treept+n; p->len1 = p->height-mindist*0.5; treept[n].parent = p; } else { reporterr( "okashii\n" ); exit( 1 ); } treept[i].parent = treept+n; b->parent = treept+n; treept[n].child0 = b; treept[n].child1 = treept+i; treept[n].height = mindist * 0.5; treept[n].rep0 = b->rep0; treept[n].rep1 = treept[i].rep0; treept[n].len0 = mindist*0.5-b->height; treept[n].len1 = mindist*0.5; } reformattree( root, treept, njob, topol, len, dep, name, treeout ); free( treept ); // free( neighbors ); // free( mindists ); } void compacttreedpdist( int njob, char **bseq, char **dseq, double *selfscore, int ***topol, double **len, char **name, Treedep *dep, int treeout, int alloclen, int *uselh, int *nfilesfornode, int treegiven ) { int i, neighbor, n; double tmpdist; double mindist; // int *commonanc; if( !treegiven ) { Treept *treept = NULL; Treept *p, *b; Treept *root; int *neighbors; double *mindists; neighbors = calloc( sizeof( int ), njob ); mindists = calloc( sizeof( double ), njob ); calcnearest_para( njob, selfscore, dseq, alloclen, neighbors, mindists ); treept = (Treept *)calloc( sizeof( Treept ), njob*2 ); // for( i=0; i-1; i-- ) #else // commonanc = (int *)calloc( njob, sizeof( int ) ); tmpdist = mindists[1]; // reporterr( "tmpdist = %f\n", tmpdist ); // reporterr( "%f ?= %f\n", tmpdist, distfromfile( njob, njob-2, njob-1, hat2fp ) ); // reporterr( "%f ?= %f\n", tmpdist, mindists[njob-2] ); treept[0].parent = treept+n; treept[1].parent = treept+n; treept[n].child0 = treept+0; treept[n].child1 = treept+1; treept[n].height = tmpdist * 0.5; treept[n].len0 = tmpdist * 0.5; treept[n].len1 = tmpdist * 0.5; treept[n].parent = NULL; treept[n].rep0 = 0; treept[n].rep1 = 1; root = treept+n; for( i=2; iparent; p!=NULL; b=p,p=p->parent ) { // reporterr( "checking %p->%p (height=%f)\n", b, p, p->height ); if( p->height > mindist * 0.5 ) break; } if( p == NULL ) { treept[n].parent = NULL; root = treept+n; } else if( p->child0 == b ) { p->child0 = treept+n; p->len0 = p->height-mindist*0.5; treept[n].parent = p; } else if( p->child1 == b ) { p->child1 = treept+n; p->len1 = p->height-mindist*0.5; treept[n].parent = p; } else { reporterr( "okashii\n" ); exit( 1 ); } treept[i].parent = treept+n; b->parent = treept+n; treept[n].child0 = b; treept[n].child1 = treept+i; treept[n].height = mindist * 0.5; treept[n].rep0 = b->rep0; treept[n].rep1 = treept[i].rep0; treept[n].len0 = mindist*0.5-b->height; treept[n].len1 = mindist*0.5; } reformattree( root, treept, njob, topol, len, dep, name, treeout ); free( treept ); free( neighbors ); free( mindists ); } else { reporterr( "treegiven\n" ); } #if EXACTLYSAMEASPAIRLOCALALIGN recalcpairs_para4( njob, topol, dep, bseq, dseq, selfscore, alloclen, uselh, nfilesfornode ); #else recalcpairs_para4( njob, topol, dep, bseq, selfscore, alloclen, uselh, nfilesfornode ); #endif #if HAT3SORTED // recalcpairs_para2( njob, topol, dep, bseq, selfscore, alloclen, hat3node, fd0, fd1, uselh ); #else // recalcpairs_para3( njob, topol, dep, bseq, selfscore, alloclen, hat3node, fd0, fd1, uselh ); #endif // recalcpairs( njob, topol, dep, bseq, selfscore, alloclen, hat3node, fd0, fd1, uselh ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // 20130603 if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; // free( commonanc ); // commonsextet_p( NULL, NULL ); // distdppairs_para( 0, 0, NULL, NULL, 0, 0, NULL, 0, NULL, NULL ); // distdppairsthread( NULL ); } void compacttree_memsaveselectable( int nseq, double **partmtx, int *nearest, double *mindist, int **pointt, int *tselfscore, char **seq, int **skiptable, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int howcompact, int memsave ) { int i, j, k; // int miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; // double tmpdouble; // double eff1, eff0; double *tmptmplen = NULL; //static? int *hist = NULL; //static? Bchain *ac = NULL; //static? int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; //static? int nmemim, nmemjm; double minscore; char **tree; //static? char *treetmp; //static? char *nametmp, *nameptr, *tmpptr; //static? FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; int *singlettable1 = NULL; int *singlettable2 = NULL; double *newarr; void *(*distarrfunc)( void * ); void *(*resetnearestfunc)( void * ); int numfilled; compactdistarrthread_arg_t *distarrarg; resetnearestthread_arg_t *resetarg; int *joblist, nactive, posshared; double *result; sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( howcompact == 2 ) { if( seq ) { // distarrfunc = verycompactmsadistarrthread; distarrfunc = verycompactmsadistarrthreadjoblist; resetnearestfunc = NULL; } else { // distarrfunc = verycompactkmerdistarrthread; distarrfunc = verycompactkmerdistarrthreadjoblist; resetnearestfunc = NULL; } } else { if( seq ) { distarrfunc = msadistarrthreadjoblist; resetnearestfunc = msaresetnearestthread; } else { distarrfunc = kmerdistarrthreadjoblist; resetnearestfunc = kmerresetnearestthread; } } distarrarg = calloc( MAX( nthreadpair, 1 ), sizeof( compactdistarrthread_arg_t ) ); resetarg = calloc( MAX( nthreadpair, 1 ), sizeof( resetnearestthread_arg_t ) ); joblist = calloc( njob, sizeof( int ) ); if( howcompact != 2 ) result = calloc( njob, sizeof( double ) ); else result = NULL; if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); if( treeout ) { treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi tree = AllocateCharMtx( njob, 0 ); } } if( treeout ) { for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindist[i] ); if( mindist[i] < minscore ) // muscle { im = i; minscore = mindist[i]; } } // printf( "minscore=%f\n", minscore ); jm = nearest[im]; // printf( "im=%d\n", im ); // printf( "jm=%d\n", jm ); if( jm < im ) { j=jm; jm=im; im=j; } if( partmtx[im] == NULL && howcompact != 2 ) numfilled++; if( partmtx[jm] != NULL ) numfilled--; prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; if( memsave ) intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave else intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); // memsave if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } if( memsave ) { *intpt++ = *pt11; *intpt = -1; } else { reporterr( "This version supports memsave=1 only\n" ); // fukkatsu saseru tokiha pt22 wo dainyu. exit( 1 ); for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; if( memsave ) intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave else intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); // memsave if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } if( memsave ) { *intpt++ = *pt11; *intpt = -1; } else { reporterr( "This version supports memsave=1 only\n" ); // fukkatsu saseru tokiha pt22 wo dainyu. exit( 1 ); for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } } minscore *= 0.5; // printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] ); len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindist[im] = 999.9; if( pointt ) // kmer { singlettable1 = (int *)calloc( tsize, sizeof( int ) ); singlettable2 = (int *)calloc( tsize, sizeof( int ) ); makecompositiontable_global( singlettable1, pointt[im] ); makecompositiontable_global( singlettable2, pointt[jm] ); } newarr = calloc( nseq, sizeof( double ) ); for( acpti=ac,nactive=0; acpti!=NULL; acpti=acpti->next ) joblist[nactive++] = acpti->pos; // sukoshi muda... #ifdef enablemultithread if( nthreadpair > 0 ) { compactdistarrthread_arg_t *targ; pthread_t *handle; pthread_mutex_t mutex; posshared = 0; // targ = calloc( nthreadpair, sizeof( compactdistarrthread_arg_t ) ); targ = distarrarg; handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); if( k % 100 == 0 ) reporterr( " (%d threads, nactive=%d, nfilled=%d) \r", nthreadpair, nactive, numfilled ); for( i=0; inext ) // antei sei no tame { i = acpti->pos; if( i != im && i != jm ) { // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii. // if( newarr[i] < mindist[i] ) // { // mindist[i] = newarr[i]; // nearest[i] = im; // } if( newarr[i] < mindist[im] ) { mindist[im] = newarr[i]; nearest[im] = i; } // if( nearest[i] == jm ) // { // nearest[i] = im; // } } } #endif } else #endif { if( k % 100 == 0 ) reporterr( " (serial, nactive=%d, nfilled=%d) \r", nactive, numfilled ); compactdistarrthread_arg_t *targ; posshared = 0; // targ = calloc( 1, sizeof( compactdistarrthread_arg_t ) ); targ = distarrarg; for( i=0; i<1; i++ ) { targ[i].para = 0; targ[i].njob = nactive; // targ[i].thread_no = i; targ[i].im = im; targ[i].jm = jm; targ[i].tselfscore = tselfscore; targ[i].nlen = nlen; targ[i].seq = seq; targ[i].skiptable = skiptable; targ[i].pointt = pointt; targ[i].table1 = singlettable1; targ[i].table2 = singlettable2; targ[i].joblist = joblist; targ[i].posshared = &posshared; targ[i].mindist = mindist; targ[i].nearest = nearest; targ[i].newarr = newarr; targ[i].partmtx = partmtx; distarrfunc( targ+i ); // pthread_create( handle, NULL, distarrfunc, (void *)(targ) ); } // free( targ ); } for( acpti=ac; acpti!=NULL; acpti=acpti->next ) // antei sei no tame { i = acpti->pos; if( i != im && i != jm ) { // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii. // if( newarr[i] < mindist[i] ) // { // mindist[i] = newarr[i]; // nearest[i] = im; // } if( newarr[i] < mindist[im] ) { mindist[im] = newarr[i]; nearest[im] = i; } // if( nearest[i] == jm ) // { // nearest[i] = im; // } } } // printf( "im=%d, jm=%d\n", im, jm ); #if 0 printf( "matrix = \n" ); for( i=0; inext = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; #if 0 // muscle seems to miss this. // int nwork = 0; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindist[i] ); if( nearest[i] == im ) { // printf( "reset nearest, i=%d, k=%d\n", i, k ); if( partmtx[im][i] > mindist[i] ) { // nwork++; // printf( "go\n" ); if( pointt ) // kmer { singlettable1 = (int *)calloc( tsize, sizeof( int ) ); makecompositiontable_global( singlettable1, pointt[i] ); } resetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, pointt, nlen, singlettable1 ); if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer if( pointt ) commonsextet_p( NULL, NULL ); } } } // reporterr( "nwork = %d\n", nwork ); #else if( howcompact == 2 ) continue; #if 0 if( 0 && nthreadpair > 0 ) { resetnearestthread_arg_t *targ; pthread_t *handle; pthread_mutex_t mutex; Bchain *acshared; acshared = ac; // targ = calloc( nthreadpair, sizeof( resetnearestthread_arg_t ) ); targ = resetarg; handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); printf( "\n" ); #endif } if( treeout ) { fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); } for( im=0; im _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindisfrom[i] ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } // printf( "minscore=%f\n", minscore ); jm = nearest[im]; // printf( "im=%d\n", im ); // printf( "jm=%d\n", jm ); if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 // memsave *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 // memsave *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } minscore *= 0.5; // printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] ); len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); // printf( "tmpdouble=%f, eff0=%f, eff1=%f\n", tmpdouble, eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // printf( "im=%d, jm=%d\n", im, jm ); #if 0 printf( "matrix = \n" ); for( i=0; ij ) { minijm=j; maxijm=i; } else { minijm=i; maxijm=j; } printf( "%f ", eff[minijm][maxijm-minijm] ); } printf( "\n" ); } #endif treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; // Ato de fukkatsu } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindisfrom[i] ); if( nearest[i] == im ) { // printf( "reset nearest, i=%d, k=%d\n", i, k ); if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) { // printf( "go\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } } #else reporterr( "CHUUI!\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); if( treeout == 2 ) { int *mem = calloc( sizeof( int ), nseq ); fprintf( fp, "\nDensity:" ); for( k=0; k-1; i++ ) fprintf( fp, " %03d", topol[k][0][i]+1 ); fprintf( fp, "%d:", getdensest( mem, density )+1 ); for( i=0; mem[i]>-1; i++ ) fprintf( fp, " %d", mem[i]+1 ); fprintf( fp, "\n" ); topolorderz( mem, topol, dep, k, 1 ); // fprintf( fp, "len1 = %f\n", len[k][1] ); // for( i=0; topol[k][1][i]>-1; i++ ) fprintf( fp, " %03d", topol[k][1][i]+1 ); fprintf( fp, "%d:", getdensest( mem, density )+1 ); for( i=0; mem[i]>-1; i++ ) fprintf( fp, " %d", mem[i]+1 ); fprintf( fp, "\n" ); } free( mem ); } fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); if( treeout == 2 ) free( density ); } void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; double *tmptmplen = NULL; //static? int *hist = NULL; //static? Bchain *ac = NULL; //static? int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; //static? int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess char **tree; //static? char *treetmp; //static? char *nametmp, *nameptr, *tmpptr; //static? FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #else reporterr( "chuui!\n" ); #endif #if 0 printf( "\nooSTEP-%03d:\n", k+1 ); printf( "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); printf( "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp, *nameptr, *tmpptr; FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; sueff1 = 1.0 - sueff_global; sueff05 = sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } #if 0 if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*150 ); nametmp = AllocateCharVec( 91 ); tree = AllocateCharMtx( njob, njob*150 ); } for( i=0; i _ no tame sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } #else if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } #endif for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; nmemim = nmemar[im]; // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); intpt = topol[k][0]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 tmpdouble = eff[miniim][maxiim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } #if 0 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #else treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); #endif acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim] > mindisfrom[i] ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "\nvSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); #if 0 FreeCharMtx( tree ); #else free( tree[0] ); free( tree ); #endif free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti, *acptj; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp, *nameptr, *tmpptr; FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; int *testtopol, **inconsistent; int **inconsistentpairlist; int ninconsistentpairs; int maxinconsistentpairs; int *warned; int allinconsistent; int firsttime; increaseintergroupdistancesfullmtx( eff, ngroup, groups, nseq ); sueff1 = 1 - sueff_global; sueff05 = sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } #if 0 if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*150 ); nametmp = AllocateCharVec( 91 ); tree = AllocateCharMtx( njob, njob*150 ); } for( i=0; i _ no tame sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } #else if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); testtopol = AllocateIntVec( njob + 1 ); inconsistent = AllocateIntMtx( njob, njob ); // muda // inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda inconsistentpairlist = AllocateIntMtx( 1, 2 ); warned = AllocateIntVec( ngroup ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } #endif for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } } else { minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[i][j] && (tmpdouble=eff[i][j]) < minscore ) { minscore = tmpdouble; im = i; jm = j; } } for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[j][i] && (tmpdouble=eff[j][i]) < minscore ) { minscore = tmpdouble; im = j; jm = i; } } } } allinconsistent = 1; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { if( inconsistent[acpti->pos][acptj->pos] == 0 ) { allinconsistent = 0; goto exitloop_d; } } } exitloop_d: if( allinconsistent ) { reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); exit( 1 ); } #if 1 intpt = testtopol; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } *intpt = -1; // reporterr( "testtopol = \n" ); // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 ); // reporterr( "\n" ); #endif for( i=0; i-1; j++ ) reporterr( " %03d", groups[i][j]+1 ); // reporterr( "\n" ); if( overlapmember( testtopol, groups[i] ) ) { if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) { if( !warned[i] ) { warned[i] = 1; reporterr( "\n###################################################################\n" ); reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); reporterr( "###################################################################\n" ); } inconsistent[im][jm] = 1; if( maxinconsistentpairs < ninconsistentpairs+1 ) { inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) ); for( j=maxinconsistentpairs; j *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; eff[im][jm] = 999.9; // eff[im][jm-im] = 999.9; // bug?? for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 tmpdouble = eff[miniim][maxiim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif #if 1 if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } #endif } } #if 0 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #else treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); #endif acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim] > mindisfrom[i] ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "\ncSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); #if 0 FreeCharMtx( tree ); #else free( tree[0] ); free( tree ); #endif free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); free( testtopol ); FreeIntMtx( inconsistent ); for( i=0; i local, 2012/02/25 int *hist = NULL; // static TLS -> local, 2012/02/25 Bchain *ac = NULL; // static TLS -> local, 2012/02/25 int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11; int *nmemar; // static TLS -> local, 2012/02/25 int nmemim, nmemjm; double minscore; int *nearest = NULL; // by Mathog, a guess double *mindisfrom = NULL; // by Mathog, a guess double (*clusterfuncpt[1])(double,double); sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; tmpdouble = eff[miniim][maxiim-miniim] = #if 0 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_musclesupg_double_realloc_nobk_halfmtx( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; double *tmptmplen = NULL; // static TLS -> local, 2012/02/25 int *hist = NULL; // static TLS -> local, 2012/02/25 Bchain *ac = NULL; // static TLS -> local, 2012/02/25 int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; // static TLS -> local, 2012/02/25 int nmemim, nmemjm; double minscore; int *nearest = NULL; // by Mathog, a guess double *mindisfrom = NULL; // by Mathog, a guess double (*clusterfuncpt[1])(double,double); sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; tmpdouble = eff[miniim][maxiim-miniim] = #if 0 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double eff1, eff0; int *hist = NULL; Achain *ac = NULL; double minscore; char **tree; char *treetmp; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; FILE *fp; int node[2]; double lenfl[2]; char *nametmp, *nameptr, *tmpptr; //static? char namec; fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { // treetmp = AllocateCharVec( njob*50 ); treetmp = NULL; // tree = AllocateCharMtx( njob, njob*50 ); tree = AllocateCharMtx( njob, 0 ); nametmp = AllocateCharVec( 1000 ); // nagasugi hist = AllocateIntVec( njob ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; i nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { reporterr( "\n\nCheck the guide tree.\n" ); reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } // reporterr( "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); if( lenfl[0] == -1.0 || lenfl[1] == -1.0 ) { reporterr( "\n\nWARNING: Branch length is not given.\n" ); exit( 1 ); } if( lenfl[0] < 0.0 ) lenfl[0] = 0.0; if( lenfl[1] < 0.0 ) lenfl[1] = 0.0; #endif // reporterr( "im=%d, jm=%d\n", im, jm ); intpt = topol[k][0]; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; #if 0 len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; #else len[k][0] = lenfl[0]; len[k][1] = lenfl[1]; #endif hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + ( eff0 + eff1 ) * 0.5 * sueff_global; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); // strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); // fprintf( fp, "by veryfastsupg_double_loadtree\n" ); fclose( fp ); #if 1 reporterr( "\n" ); free( hist ); free( (char *)ac ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif #if 0 // reporterr( "reconstructing eff[][]\n" ); // Tsune ni hat2 ha aru node koreha iranai. for( k=0; k-1; i++ ) { reporterr( " %03d", im ); } fprintf( stdout, "\n" ); for( i=0; (jm=topol[k][1][i])>-1; i++ ) { reporterr( " %03d", jm ); } for( i=0; (im=topol[k][0][i])>-1; i++ ) for( j=0; (jm=topol[k][1][j])>-1; j++ ) { eff[im][jm] += len[k][0] + len[k][1]; eff[jm][im] += len[k][0] + len[k][1]; } } #endif } #if 0 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + ( eff0 + eff1 ) * 0.5 * sueff_global; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 reporterr( "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } #endif void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; static char **tree; static char *treetmp; static char *nametmp; FILE *fpout; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; double (*clusterfuncpt[1])(double,double); sueff1 = 1 - sueff_global; sueff05 = sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); nametmp = AllocateCharVec( 31 ); } // for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fpout = fopen( "infile.tree", "w" ); fprintf( fpout, "%s;\n", treetmp ); // fprintf( fpout, "by veryfastsupg_double_outtree\n" ); fclose( fpout ); #if 1 reporterr( "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif } void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int tmpint; int eff1, eff0; static double *tmptmplen = NULL; static int **eff = NULL; static int *hist = NULL; static Achain *ac = NULL; int minscore; double minscoref; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !eff ) { eff = AllocateIntMtx( njob, njob ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } #else intpt = topol[k][0]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; #endif len[k][0] = minscoref - tmptmplen[im]; len[k][1] = minscoref - tmptmplen[jm]; tmptmplen[im] = minscoref; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + // int?? ( eff0 + eff1 ) * 0.5 * sueff_global; // int?? } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 FreeIntMtx( eff ); eff = NULL; free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } void fastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static double *tmplen; int *intpt; double tmpdouble; double eff1, eff0; static double **eff = NULL; static char **pair = NULL; static Achain *ac; double minscore; int im = -1, jm = -1; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); ac = (Achain *)calloc( njob, sizeof( Achain ) ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; minscore /= 2.0; len[k][0] = (double)minscore - tmplen[im]; len[k][1] = (double)minscore - tmplen[jm]; tmplen[im] = (double)minscore; for( i=0; i 0 ); for( i=0; i-1; i++ ) reporterr( " %03d", topol[k][0][i] ); reporterr( "\n" ); reporterr( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i] ); reporterr( "\n" ); #endif } reporterr( "\n" ); // FreeFloatMtx( eff ); // FreeCharMtx( pair ); // FreeFloatVec( tmplen ); // free( ac ); } void supg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static double *tmplen; int *intpt; double **doubleptpt; double *doublept; double tmpdouble; double eff1, eff0; static double **eff = NULL; static char **pair = NULL; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = (double)minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } void spg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else double **eff = NULL; char **pair = NULL; if( !eff ) { eff = AllocateDoubleMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); } #endif for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } double ipower( double x, int n ) /* n > 0 */ { double r; r = 1; while( n != 0 ) { if( n & 1 ) r *= x; x *= x; n >>= 1; } return( r ); } void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ { int i, j, k, s1, s2; static double rootnode[M]; if( nseq-2 < 0 ) { reporterr( "Too few sequence for countnode: nseq = %d\n", nseq ); exit( 1 ); } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } } void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ { int i, j, k, s1, s2; int rootnode[M]; for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=localmem[1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } free( localmem[0] ); free( localmem[1] ); } free( localmem ); free( memhist[nseq-2] ); free( memhist ); for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(unsigned char)seq2[++k]) == '-' ) tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } return( tmpscore ); } double score_calc1( char *seq1, char *seq2 ) /* method 1 */ { int k; double score = 0.0; int count = 0; int len = strlen( seq1 ); for( k=0; k 1 ) { if( utree == 0 ) { for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco1+i, "%c", ch+0x61 ); } sco1[len] = 0; for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); tmp = ( tmp - 400 * !scoremtx ) * 2; if( tmp < 0 ) tmp = 0; ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco2+i, "%c", ch+0x61 ); sco[i] = tmp; } sco2[len] = 0; for( i=WIN; i= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog while( str2 >= bk ) { *str2-- = *str1--; } } int isaligned( int nseq, char **seq ) { int i; int len = strlen( seq[0] ); for( i=1; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[0][24]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore / (double)c; #if DEBUG printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); printf( "tmpscore / c = %f\n", tmpscore/(double)c ); #endif } } reporterr( "raw score = %f\n", score ); score /= (double)nseq * ( nseq-1.0 ) / 2.0; score += 400.0; #if DEBUG printf( "score in mltaln9.c = %f\n", score ); #endif return( (double)score ); } void doublencpy( double *vec1, double *vec2, int len ) { while( len-- ) *vec1++ = *vec2++; } double score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ { int i, j, k; int gb1, gb2, gc1, gc2; int cob; int nglen; int len = strlen( seq[0] ); double score; score = 0; nglen = 0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore; } } return( score ); } #define SEGMENTSIZE 150 int searchAnchors( int nseq, char **seq, Segment *seg ) { int i, j, k, kcyc; int status; double score; int value = 0; int len; int length; double *stra = NULL; int alloclen = 0; double threshold; double cumscore; len = strlen( seq[0] ); threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; stra = AllocateDoubleVec( len ); for( i=0; iskipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; length = 0; /* modified at 01/09/11 */ for( j=0; j threshold ) reporterr( "YES\n" ); else reporterr( "NO\n" ); #endif if( score > threshold ) { if( !status ) { status = 1; seg->start = i; length = 0; cumscore = 0.0; } length++; cumscore += score; } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG reporterr( "%d-%d length = %d\n", seg->start, seg->end, length ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } length = 0; cumscore = 0.0; status = 0; value++; seg++; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG reporterr( "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } FreeDoubleVec( stra ); return( value ); } void dontcalcimportance_target( int nseq, double *eff, char **seq, LocalHom **localhom, int ntarget ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt; // ptr->fimportance = (double)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; i= j ) continue; for( ptr=localhom[i]+j-i; ptr; ptr=ptr->next ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt; // ptr->fimportance = (double)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 ptr->importance = ptr->opt / ptr->overlapaa; // ptr->fimportance = (double)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, nseq1; LocalHom *ptr; #if 1 #else int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt * 0.5; // tekitou // ptr->fimportance = (double)ptr->importance; // reporterr( "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt ); #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } #if 1 #else free( nogaplen ); #endif } void calcimportance_target( int nseq, int ntarget, double *eff, char **seq, LocalHom **localhom, int *targetmap, int *targetmapr, int alloclen ) { int i, j, pos, len, ti, tj; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_double ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( alloclen ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif // for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } #if 0 reporterr( "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B } #endif // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 printf( "before averaging:\n" ); for( ti=0; tinext ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%30.25f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // reporterr( "average?\n" ); // for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } for( ti=0; tinext ) { if( tmpptr1->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance ); // imp = 1.0 * ( tmpptr1->importance ); tmpptr1->importance = imp; // tmpptr1->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 printf( "after averaging:\n" ); for( ti=0; tinext ) { if( tmpptr->end1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } //exit( 1 ); #endif free( importance ); free( nogaplen ); free( ieff ); } void calcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom, int alloclen ) { int i, j, pos, len; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_double ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( alloclen ); // reporterr("alloclen=%d, nlenmax=%d\n", alloclen, nlenmax ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { // reporterr( "pos=%d, alloclen=%d\n", pos, alloclen ); if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } else { for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } } #if 0 reporterr( "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } } else { if( localhom[j][i-j].opt == -1.0 ) continue; for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->rimportance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } } // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 printf( "before averaging:\n" ); for( i=0; inext ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } else { printf( "%d-%d\n", i, j ); for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->rimportance, tmpptr->opt ); } } } #endif #if 1 // reporterr( "average?\n" ); for( i=0; inext) { if( tmpptr1->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr1->rimportance ); tmpptr1->importance = tmpptr1->rimportance = imp; // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 printf( "after averaging:\n" ); for( i=0; inext ) { if( tmpptr->end1 && tmpptr->start1 != -1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } else for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->end2 && tmpptr->start2 != -1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } exit( 1 ); #endif free( importance ); free( nogaplen ); free( ieff ); } void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, pos, len; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_double ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( nlenmax ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } #if 0 reporterr( "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B } #endif // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 printf( "before averaging:\n" ); for( i=0; inext ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // reporterr( "average?\n" ); for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 printf( "after averaging:\n" ); for( i=0; inext ) { if( tmpptr->end1 && tmpptr->start1 != -1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } exit( 1 ); #endif free( importance ); free( nogaplen ); free( ieff ); } static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) { // dokka machigatteru if( pt != lh ) // susumeru { pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); pt = pt->next; pt->next = NULL; lh->last = pt; } else // sonomamatsukau { lh->last = pt; } lh->nokori++; // reporterr( "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); pt->start1 = sti; pt->start2 = stj; pt->end1 = eni; pt->end2 = enj; pt->opt = opt; pt->extended = interm; pt->overlapaa = overlp; #if 0 reporterr( "i: %d-%d\n", sti, eni ); reporterr( "j: %d-%d\n", stj, enj ); reporterr( "opt=%f\n", opt ); reporterr( "overlp=%d\n", overlp ); #endif } void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) { int overlp, plim; int i, j, k; int pi, pj, pk, len; int status, sti, stj; int *ipt; int co; static int *ini = NULL; static int *inj = NULL; LocalHom *pt; sti = 0; // by D.Mathog, a guess stj = 0; // by D.Mathog, a guess if( ini == NULL ) { ini = AllocateIntVec( nlenmax+1 ); inj = AllocateIntVec( nlenmax+1 ); } for( i=0; i dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; ipt = ini; co = nlenmax+1; while( co-- ) *ipt++ = -1; ipt = inj; co = nlenmax+1; while( co-- ) *ipt++ = -1; overlp = 0; { for( pt=localhom[i]+k; pt; pt=pt->next ) { // reporterr( "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); if( pt->opt == -1 ) { reporterr( "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pi = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = ini + pk; while( len-- ) *ipt++ = pi++; } } { for( pt=localhom[j]+k; pt; pt=pt->next ) { if( pt->opt == -1 ) { reporterr( "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pj = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = inj + pk; while( len-- ) *ipt++ = pj++; } } #if 0 reporterr( "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; reporterr( " %d", inj[pk] ); } reporterr( "\n" ); reporterr( "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; reporterr( " %d", ini[pk] ); } reporterr( "\n" ); #endif overlp = 0; plim = nlenmax+1; for( pk = 0; pk < plim; pk++ ) if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; status = 0; plim = nlenmax+1; for( pk=0; pknext = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); pt = localhom[j][i].last; // reporterr( "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); // reporterr( "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); // reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); } } if( !status ) // else deha arimasenn. { if( ini[pk] == -1 || inj[pk] == -1 ) continue; sti = ini[pk]; stj = inj[pk]; // reporterr( "start here!\n" ); status = 1; } } // if( status ) reporterr( "end here\n" ); // exit( 1 ); // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #if 0 for( pt=localhomtable[i]+j; pt; pt=pt->next ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #endif } } } int makelocal( char *s1, char *s2, int thr ) { int start, maxstart, maxend; char *pt1, *pt2; double score; double maxscore; pt1 = s1; pt2 = s2; maxend = 0; // by D.Mathog, a guess // reporterr( "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); maxscore = 0.0; score = 0.0; start = 0; maxstart = 0; while( *pt1 ) { // reporterr( "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); if( *pt1 == '-' || *pt2 == '-' ) { // reporterr( "penalty = %d\n", penalty ); score += penalty; while( *pt1 == '-' || *pt2 == '-' ) { pt1++; pt2++; } continue; } score += ( amino_dis[(unsigned char)*pt1++][(unsigned char)*pt2++] - thr ); // score += ( amino_dis[(int)*pt1++][(int)*pt2++] ); if( score > maxscore ) { // reporterr( "score = %f\n", score ); maxscore = score; maxstart = start; // reporterr( "## max! maxstart = %d, start = %d\n", maxstart, start ); } if( score < 0.0 ) { // reporterr( "## resetting, start = %d, maxstart = %d\n", start, maxstart ); if( start == maxstart ) { maxend = pt1 - s1; // reporterr( "maxend = %d\n", maxend ); } score = 0.0; start = pt1 - s1; } } if( start == maxstart ) maxend = pt1 - s1 - 1; // reporterr( "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); s1[maxend+1] = 0; s2[maxend+1] = 0; return( maxstart ); } void resetlocalhom( int nseq, LocalHom **lh ) { int i, j; LocalHom *pt; for( i=0; inext ) pt->opt = 1.0; } } #if 0 void gapmatometeireru( int n, char **res, char **ori, char *gt ) // i loop no tame osoi { char g; int pr=0, po=0, i; while( (g = *gt++) ) { if( g == '-' ) { for( i=0; i-1&&st[j]==1; j-- ) st[j] = i-j; } } st[i] = -1; stbk = st; for( i=0; i 0 ) { // reporterr( "s=%d\n", s ); st += s-1; rp += s; // while( s-- ) // *rp++ = *newgapstr; } else *rp++ = *op++; } *rp = 0; } free( stbk ); } static void gapirerustatic( char *res, char *ori, char *gt, char gapchar ) { char g; while( (g = *gt++) ) { if( g == '-' ) { *res++ = gapchar; } else { *res++ = *ori++; } } *res = 0; } void gapmatometeireru( int n, char **res, char **ori, char *gt ) { int i; char gapchar = *newgapstr; for( i=0; i", i, gaplen, k, (*fpt)[k].freq ); (*fpt)[k].freq += feff; // reporterr( "%f\n", (*fpt)[k].freq ); gaplen = 0; } } fpt++; } } #if 1 for( j=0; jnext = ac; acori->pos = -1; ac[0].prev = acori; // for( i=0; i tmpmin ) { minscore = tmpmin; nearest = i; } } #else nearest = 0; minscore = 0.0; #endif nearesto = nearest; minscoreo = minscore; // for( i=0; i-1; j++ ) { reporterr( "%d ", topol[i][0][j]+1 ); } reporterr( "\n" ); reporterr( "len=%f\n", len[i][0] ); reporterr( "group1 = \n" ); for( j=0; topol[i][1][j]>-1; j++ ) { reporterr( "%d ", topol[i][1][j]+1 ); } reporterr( "\n" ); reporterr( "len=%f\n", len[i][1] ); reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 ); reporterr( "i = %d\n", i ); if( leaf2node[nearest] == -1 ) { reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] ); } else { reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] ); } #endif nearestnode = leaf2node[nearest]; if( nearestnode == -1 ) reflen = nogaplen[nearest]; else reflen = alnleninnode[nearestnode]; // reflen = alnleninnode[i]; // BUG!! if( noalign ) seqlengthcondition = 1; else seqlengthcondition = ( nogaplentoadd <= reflen ); //seqlengthcondition = 1; // CHUUI //seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI // if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku. if( repnorg == -1 && dep[i].distfromtip * 2 >= minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau. { // reporterr( "INSERT HERE, %d-%d\n", nearest, norg ); // reporterr( "nearest = %d\n", nearest ); // reporterr( "\n\n\nminscore = %f\n", minscore ); // reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 ); // reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] ); if( nearestnode == -1 ) { // reporterr( "INSERTING to 0!!!\n" ); // reporterr( "lastlength = %d\n", nogaplen[norg] ); // reporterr( "reflength = %d\n", nogaplen[nearest] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][0][0] = nearest; topolc[posinnew][0][1] = -1; addedlen = lenc[posinnew][0] = minscore / 2; } else { // reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) ); // reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] ); // reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) ); // reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] ); intcpy( topolc[posinnew][0], topol[nearestnode][0] ); intcat( topolc[posinnew][0], topol[nearestnode][1] ); // addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!! addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10 // fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] ); } neighbor = lastmem( topolc[posinnew][0] ); if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { reporterr( "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " distance: %f\n", minscore ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // reporterr( "INSERTING to 1!!!\n" ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][1][0] = norg; topolc[posinnew][1][1] = -1; lenc[posinnew][1] = minscore / 2; // reporterr( "STEP %d (newnew)\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); repnorg = nearest; // reporterr( "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); // reporterr( "\n len=%f\n", lenc[i][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); // reporterr( "\n len=%f\n", lenc[i][1] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); posinnew++; } // reporterr( "minscore = %f\n", minscore ); // reporterr( "distfromtip = %f\n", dep[i].distfromtip ); // reporterr( "Modify matrix, %d-%d\n", nearest, norg ); eff0 = iscorec[mem0][norg-mem0]; eff1 = iscorec[mem1][norg-mem1]; // iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 ); iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local; iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda acprev = ac[mem1].prev; acnext = ac[mem1].next; acprev->next = acnext; if( acnext != NULL ) acnext->prev = acprev; if( ( nearest == mem1 || nearest == mem0 ) ) { minscore = 9999.9; // for( j=0; j iscorec[j][norg-j] ) // { // minscore = iscorec[j][norg-j]; // nearest = j; // } // } // reporterr( "searching on modified ac " ); for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda { // reporterr( "." ); j = acpt->pos; tmpmin = iscorec[j][norg-j]; if( minscore > tmpmin ) { minscore = tmpmin; nearest = j; } } // reporterr( "done\n" ); } // reporterr( "posinnew = %d\n", posinnew ); if( topol[i][0][0] == repnorg ) { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); intcat( topolc[posinnew][0], additionaltopol ); lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip ); // fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] ); // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] ); addedlen = 0.0; } else { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); lenc[posinnew][0] = len[i][0]; } if( topol[i][1][0] == repnorg ) { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); intcat( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip ); // fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] ); // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] ); addedlen = 0.0; repnorg = topolc[posinnew][0][0]; // juuyou } else { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); lenc[posinnew][1] = len[i][1]; } // reporterr( "\nSTEP %d (new)\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); // reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] ); for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i; for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i; // reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); // // reporterr( "%s\n", treetmp ); posinnew++; } if( nstep ) { i--; topolo0 = topol[i][0]; topolo1 = topol[i][1]; } else { // i = 0; // free( topol[i][0] );//? // free( topol[i][1] );//? // topol[i][0] = calloc( 2, sizeof( int ) ); // topol[i][1] = calloc( 1, sizeof( int ) ); // topol[i][0][0] = 0; // topol[i][0][1] = -1; // topol[i][1][0] = -1; topoldum0 = calloc( 2, sizeof( int ) ); topoldum1 = calloc( 1, sizeof( int ) ); topoldum0[0] = 0; topoldum0[1] = -1; topoldum1[0] = -1; topolo0 = topoldum0; topolo1 = topoldum1; } if( repnorg == -1 ) { // topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); // intcpy( topolc[posinnew][0], topol[i][0] ); // intcat( topolc[posinnew][0], topol[i][1] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topolo0 ); intcat( topolc[posinnew][0], topolo1 ); // lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken if( nstep ) lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21 else lenc[posinnew][0] = minscore / 2; // reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip ); // reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) ); intcpy( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = minscore / 2; // neighbor = lastmem( topolc[posinnew][0] ); neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { reporterr( "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // reporterr( "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); // reporterr( "\n len=%f", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); } if( topoldum0 ) free( topoldum0 ); if( topoldum1 ) free( topoldum1 ); free( leaf2node ); free( additionaltopol ); free( ac ); free( acori ); if( treeout ) free( neighborlist ); #if 0 // create a newick tree for CHECK char **tree; char *treetmp; int im, jm; treetmp = AllocateCharVec( njob*150 ); tree = AllocateCharMtx( njob, njob*150 ); for( i=0; inext = ac; acori->pos = -1; ac[0].prev = acori; // for( i=0; i tmpmin ) { minscore = tmpmin; nearest = i; } } nearesto = nearest; minscoreo = minscore; // for( i=0; i-1; j++ ) { reporterr( "%d ", topol[i][0][j]+1 ); } reporterr( "\n" ); reporterr( "len=%f\n", len[i][0] ); reporterr( "group1 = \n" ); for( j=0; topol[i][1][j]>-1; j++ ) { reporterr( "%d ", topol[i][1][j]+1 ); } reporterr( "\n" ); reporterr( "len=%f\n", len[i][1] ); reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 ); reporterr( "i = %d\n", i ); if( leaf2node[nearest] == -1 ) { reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] ); } else { reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] ); } #endif nearestnode = leaf2node[nearest]; if( nearestnode == -1 ) reflen = nogaplen[nearest]; else reflen = alnleninnode[nearestnode]; // reflen = alnleninnode[i]; // BUG!! if( noalign ) seqlengthcondition = 1; else seqlengthcondition = ( nogaplentoadd <= reflen ); //seqlengthcondition = 1; // CHUUI //seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku. // if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau. { // reporterr( "INSERT HERE, %d-%d\n", nearest, norg ); // reporterr( "nearest = %d\n", nearest ); // reporterr( "\n\n\nminscore = %f\n", minscore ); // reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 ); // reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] ); if( nearestnode == -1 ) { // reporterr( "INSERTING to 0!!!\n" ); // reporterr( "lastlength = %d\n", nogaplen[norg] ); // reporterr( "reflength = %d\n", nogaplen[nearest] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][0][0] = nearest; topolc[posinnew][0][1] = -1; addedlen = lenc[posinnew][0] = minscore / 2; } else { // reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) ); // reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] ); // reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) ); // reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] ); intcpy( topolc[posinnew][0], topol[nearestnode][0] ); intcat( topolc[posinnew][0], topol[nearestnode][1] ); // addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!! addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10 // fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] ); } neighbor = lastmem( topolc[posinnew][0] ); if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { reporterr( "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " distance: %f\n", minscore ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // reporterr( "INSERTING to 1!!!\n" ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][1][0] = norg; topolc[posinnew][1][1] = -1; lenc[posinnew][1] = minscore / 2; // reporterr( "STEP %d (newnew)\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); repnorg = nearest; // reporterr( "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); // reporterr( "\n len=%f\n", lenc[i][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); // reporterr( "\n len=%f\n", lenc[i][1] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); posinnew++; } // reporterr( "minscore = %f\n", minscore ); // reporterr( "distfromtip = %f\n", dep[i].distfromtip ); // reporterr( "Modify matrix, %d-%d\n", nearest, norg ); eff0 = iscorec[mem0][norg-mem0]; eff1 = iscorec[mem1][norg-mem1]; // iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 ); iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local; iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda acprev = ac[mem1].prev; acnext = ac[mem1].next; acprev->next = acnext; if( acnext != NULL ) acnext->prev = acprev; if( ( nearest == mem1 || nearest == mem0 ) ) { minscore = 9999.9; // for( j=0; j iscorec[j][norg-j] ) // { // minscore = iscorec[j][norg-j]; // nearest = j; // } // } // reporterr( "searching on modified ac " ); for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda { // reporterr( "." ); j = acpt->pos; tmpmin = iscorec[j][norg-j]; if( minscore > tmpmin ) { minscore = tmpmin; nearest = j; } } // reporterr( "done\n" ); } // reporterr( "posinnew = %d\n", posinnew ); if( topol[i][0][0] == repnorg ) { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); intcat( topolc[posinnew][0], additionaltopol ); lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip ); // fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] ); // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] ); addedlen = 0.0; } else { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); lenc[posinnew][0] = len[i][0]; } if( topol[i][1][0] == repnorg ) { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); intcat( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip ); // fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] ); // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] ); addedlen = 0.0; repnorg = topolc[posinnew][0][0]; // juuyou } else { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); lenc[posinnew][1] = len[i][1]; } // reporterr( "\nSTEP %d (new)\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); // reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] ); for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i; for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i; // reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); // // reporterr( "%s\n", treetmp ); posinnew++; } if( nstep ) { i--; topolo0 = topol[i][0]; topolo1 = topol[i][1]; } else { // i = 0; // free( topol[i][0] );//? // free( topol[i][1] );//? // topol[i][0] = calloc( 2, sizeof( int ) ); // topol[i][1] = calloc( 1, sizeof( int ) ); // topol[i][0][0] = 0; // topol[i][0][1] = -1; // topol[i][1][0] = -1; topoldum0 = calloc( 2, sizeof( int ) ); topoldum1 = calloc( 1, sizeof( int ) ); topoldum0[0] = 0; topoldum0[1] = -1; topoldum1[0] = -1; topolo0 = topoldum0; topolo1 = topoldum1; } if( repnorg == -1 ) { // topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); // intcpy( topolc[posinnew][0], topol[i][0] ); // intcat( topolc[posinnew][0], topol[i][1] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topolo0 ); intcat( topolc[posinnew][0], topolo1 ); // lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken if( nstep ) lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21 else lenc[posinnew][0] = minscore / 2; // reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip ); // reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) ); intcpy( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = minscore / 2; // neighbor = lastmem( topolc[posinnew][0] ); neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { reporterr( "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // reporterr( "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); // reporterr( "\n len=%f", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); } if( topoldum0 ) free( topoldum0 ); if( topoldum1 ) free( topoldum1 ); free( leaf2node ); free( additionaltopol ); free( ac ); free( acori ); if( treeout ) free( neighborlist ); #if 0 // create a newick tree for CHECK char **tree; char *treetmp; int im, jm; treetmp = AllocateCharVec( njob*150 ); tree = AllocateCharMtx( njob, njob*150 ); for( i=0; i-1; ) { if( mem[i++] != cand[j++] ) return( 0 ); } if( cand[j] == -1 ) { return( 1 ); } else { return( 0 ); } } #else int samemember( int *mem, int *cand ) { int i, j; int nm, nc; nm = 0; for( i=0; mem[i]>-1; i++ ) nm++; nc = 0; for( i=0; cand[i]>-1; i++ ) nc++; if( nm != nc ) return( 0 ); for( i=0; mem[i]>-1; i++ ) { for( j=0; cand[j]>-1; j++ ) if( mem[i] == cand[j] ) break; if( cand[j] == -1 ) return( 0 ); } if( mem[i] == -1 ) { #if 0 reporterr( "mem = " ); for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] ); reporterr( "\n" ); reporterr( "cand = " ); for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] ); reporterr( "\n" ); #endif return( 1 ); } else { return( 0 ); } } #endif int samemembern( int *mem, int *cand, int nc ) { int i, j; int nm; nm = 0; for( i=0; mem[i]>-1; i++ ) { nm++; if( nm > nc ) return( 0 ); } if( nm != nc ) return( 0 ); for( i=0; mem[i]>-1; i++ ) { for( j=0; j-1; i++ ) { for( j=0; cand[j]>-1; j++ ) if( mem[i] == cand[j] ) break; if( cand[j] == -1 ) return( 0 ); } // reporterr( "INCLUDED! mem[0]=%d\n", mem[0] ); return( 1 ); } int overlapmember( int *mem1, int *mem2 ) { int i, j; for( i=0; mem1[i]>-1; i++ ) for( j=0; mem2[j]>-1; j++ ) if( mem1[i] == mem2[j] ) return( 1 ); return( 0 ); } void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth ) { int i, j; double fr; // for( i=0; i %f\n", i, freq[i] ); } // reporterr( "\n" ); return; } void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth ) { int i, j; double fr; // for( i=0; i 0.0 ) val = 0.0; return val; } void makedynamicmtx( double **out, double **in, double offset ) { int i, j, ii, jj; double av; offset = dist2offset( offset * 2.0 ); // offset 0..1 -> 0..2 // if( offset > 0.0 ) offset = 0.0; // reporterr( "dynamic offset = %f\n", offset ); for( i=0; i%f\n", rep0, distfromtip0, distfromtip[rep0] ); #if 0 for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%3d ", topol[i][1][j] ); reporterr( "\n" ); reporterr( "len=%f\n", len[i][1] ); #endif rep1 = topol[i][1][0]; distfromtip1 = distfromtip[rep1]; distfromtip[rep1] += len[i][1]; // reporterr( "distfromtip[%d] = %f->%f\n", rep1, distfromtip1, distfromtip[rep1] ); if( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] ) { // reporterr( "HIT 0!\n" ); *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) ); for( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ ) nmem++; // reporterr( "allocating %d\n", nmem+1 ); (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) ); (*tablept)[*nsubpt+1] = NULL; intcpy( (*tablept)[*nsubpt], topol[i][0] ); if( *maxmempt < nmem ) *maxmempt = nmem; *nsubpt += 1; } if( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] ) { // reporterr( "HIT 1!\n" ); *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) ); for( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ ) nmem++; // reporterr( "allocating %d\n", nmem+1 ); (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) ); (*tablept)[*nsubpt+1] = NULL; intcpy( (*tablept)[*nsubpt], topol[i][1] ); if( *maxmempt < nmem ) *maxmempt = nmem; *nsubpt += 1; } } if( distfromtip[0] <= threshold ) { free( distfromtip ); return( 1 ); } free( distfromtip ); return( 0 ); } double sumofpairsscore( int nseq, char **seq ) { double v = 0; int i, j; for( i=1; i 10 ) value = 10.0; // 2015/Mar/17 return( value ); } } double distcompact( int len1, int len2, int *table1, int *point2, int ss1, int ss2 ) { double longer, shorter, lenfac, value; if( len1 > len2 ) { longer=(double)len1; shorter=(double)len2; } else { longer=(double)len2; shorter=(double)len1; } lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // reporterr( "lenfac=%f\n", lenfac ); // reporterr( "commonsextet_p()=%d\n", commonsextet_p( table1, point2 ) ); // reporterr( "ss1=%d, ss2=%d\n", ss1, ss2 ); // reporterr( "val=%f\n", (1.0-(double)commonsextet_p( table1, point2 )/ss1) ); if( ss1 == 0 || ss2 == 0 ) return( 2.0 ); value = ( 1.0 - (double)commonsextet_p( table1, point2 ) / MIN(ss1,ss2) ) * lenfac * 2.0; return( value ); // 2013/Oct/17 -> 2bai } static void movereg( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt ) { char *pt; int tmpint; pt = seq1; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } *start1pt = (int)( pt - seq1 ) - 1; if( tmpptr->start1 == tmpptr->end1 ) *end1pt = *start1pt; else { while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } *end1pt = (int)( pt - seq1 ) - 1; } pt = seq2; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } *start2pt = (int)( pt - seq2 ) - 1; if( tmpptr->start2 == tmpptr->end2 ) *end2pt = *start2pt; else { while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } *end2pt = (int)( pt - seq2 ) - 1; } } static void movereg_swap( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt ) { char *pt; int tmpint; pt = seq1; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } *start1pt = (int)( pt - seq1 ) - 1; if( tmpptr->start2 == tmpptr->end2 ) *end1pt = *start1pt; else { while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } *end1pt = (int)( pt - seq1 ) - 1; } pt = seq2; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } *start2pt = (int)( pt - seq2 ) - 1; if( tmpptr->start1 == tmpptr->end1 ) *end2pt = *start2pt; else { while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } *end2pt = (int)( pt - seq2 ) - 1; } } void fillimp( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 ) { int i, j, k1, k2, start1, start2, end1, end2; double effij, effijx, effij_kozo; char *pt1, *pt2; LocalHom *tmpptr; void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); #if 0 fprintf( stderr, "eff1 in _init_strict = \n" ); for( i=0; iorinum2[j] ) movefunc = movereg_swap; else movefunc = movereg; } // effij = eff1[i] * eff2[j] * effijx; effij = eff1[i] * eff2[j] * effijx; effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx; tmpptr = localhom[i][j]; while( tmpptr ) { // fprintf( stderr, "start1 = %d\n", tmpptr->start1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; // impmtx[k1][k2] += tmpptr->fimportance * effij; if( tmpptr->korh == 'k' ) impmtx[k1][k2] += tmpptr->importance * effij_kozo; else impmtx[k1][k2] += tmpptr->importance * effij; // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } } } #if 0 printf( "orinum1=%d, orinum2=%d\n", *orinum1, *orinum2 ); if( *orinum1 == 0 ) { fprintf( stdout, "impmtx = \n" ); for( k2=0; k2nokori++ > 0 ) { tmpptr1 = localhomtable->last; tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->extended = -1; tmpptr1->next = NULL; localhomtable->last = tmpptr1; } else { tmpptr1 = localhomtable; } tmpptr1->start1 = st1; tmpptr1->start2 = st2; tmpptr1->end1 = st1 + len; tmpptr1->end2 = st2 + len; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( (double)opt + 0.00 ) / 5.8 * 600; tmpptr1->importance = ( (double)opt + 0.00 ) / 5.8 * 600; // C0 to itchi shinai tmpptr1->overlapaa = len; // tsukau toki ha chuui tmpptr1->korh = 'h'; // fprintf( stderr, " %f %d-%d %d-%d \n", tmpptr1->opt, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2 ); } free( m ); fread( &c, sizeof( char ), 1, fp ); if( c != '\n' ) { reporterr( "\n\nError in binary hat3 \n" ); exit( 1 ); } } static int readlocalhomfromfile_autofid( LocalHom *lhpt, int nodeid, FILE *fp, int o1, int o2 ) // for hat3node { // pthread_mutex_t *filemutex = h3i->filemutex; // int fidcheck; int k1, k2; // int *fds = h3i->fds; int swap; // unsigned long long k1tri; lhpt->start1 = -1; lhpt->end1 = -1; lhpt->start2 = -1; lhpt->end2 = -1; lhpt->overlapaa = -1.0; lhpt->opt = -1.0; lhpt->importance = -1.0; lhpt->next = NULL; lhpt->nokori = 0; lhpt->extended = -1; lhpt->last = lhpt; lhpt->korh = 'h'; #if 0 // specific target ni taiousuru! if( h3i->specifictarget ) { int *targetmap = h3i->targetmap; if( targetmap[o1] == -1 && targetmap[o2] == -1 ) return( -1 ); if( targetmap[o1] == -1 ) { k1 = targetmap[o2]; k2 = o1; swap = 1; } else { k1 = targetmap[o1]; k2 = o2; swap = 0; } k1tri = 0; } else #endif { if( o2 > o1 ) { k1 = o1; k2 = o2-o1; swap = 0; } else { k1 = o2; k2 = o1-o2; swap = 1; } // k1tri = (unsigned long long)k1*(k1-1)/2; } if( fp ) { readlocalhomtable2_single_bin_noseek( fp, lhpt ); } return( swap ); } static int whichpair( int *ipt, int *jpt, FILE *fp ) { if( fread( ipt, sizeof( int ), 1, fp ) < 1 ) return( 1 ); if( fread( jpt, sizeof( int ), 1, fp ) < 1 ) return( 1 ); // <1 ha nai return( 0 ); } typedef struct _readloopthread_arg { // int thread_no; int nodeid; int nfiles; double **impmtx; char **seq1; char **seq2; int *orinum1; int *orinum2; double *eff1; double *eff2; unsigned long long *ndone; int *subidpt; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } readloopthread_arg_t; static void *readloopthread( void *arg ) { readloopthread_arg_t *targ = (readloopthread_arg_t *)arg; int nodeid = targ->nodeid; // int thread_no = targ->thread_no; double **impmtx = targ->impmtx; char **seq1 = targ->seq1; char **seq2 = targ->seq2; int *orinum1 = targ->orinum1; int *orinum2 = targ->orinum2; double *eff1 = targ->eff1; double *eff2 = targ->eff2; unsigned long long *ndone = targ->ndone; int *subidpt = targ->subidpt; int nfiles = targ->nfiles; int subid = -1; #ifdef enablemultithread pthread_mutex_t *mutex = targ->mutex; #endif int i, j, k1, k2, start1, start2, end1, end2; double effij, effijx; char *pt1, *pt2; LocalHom *tmpptr; FILE *fp = NULL; char *fn; LocalHom lhsingle; int res; void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); initlocalhom1( &lhsingle ); effijx = 1.0 * fastathreshold; // void *stbuf = NULL; #if 0 int block; if( nfiles > 10*nthreadreadlh ) block=10; else block=1; #endif while( 1 ) { if( subid == -1 || whichpair( &i, &j, fp ) ) { while( 1 ) { if( fp ) fclose( fp ); #if 0 if( (subid+1)%block==0 ) { if( mutex ) pthread_mutex_lock( mutex ); subid = (*subidpt); (*subidpt) += block; if( mutex ) pthread_mutex_unlock( mutex ); } else subid++; #else #ifdef enablemultithread if( mutex ) pthread_mutex_lock( mutex ); #endif subid = (*subidpt)++; #ifdef enablemultithread if( mutex ) pthread_mutex_unlock( mutex ); #endif #endif if( subid >= nfiles ) { // if( stbuf ) free( stbuf ); // stbuf = NULL; return( NULL ); } // if( !stbuf ) // stbuf = malloc( MYBUFSIZE ); // if( !stbuf ) // { // reporterr( "Cannot allocate stbuf, size=d\n", MYBUFSIZE ); // exit( 1 ); // } fn = calloc( 100, sizeof( char ) ); sprintf( fn, "hat3dir/%d-/hat3node-%d-%d", (int)(nodeid/HAT3NODEBLOCK)*HAT3NODEBLOCK, nodeid, subid ); // sprintf( fn, "hat3dir/%d/%d/hat3node-%d-%d", (int)(nodeid/h2)*h2, (int)(nodeid/HAT3NODEBLOCK)*HAT3NODEBLOCK, nodeid, subid ); // reporterr( "fopen %s by thread %d\n", fn, thread_no ); fp = fopen( fn, "rb" ); if( fp == NULL ) { reporterr( "Cannot open %s\n", fn ); exit( 1 ); } free( fn ); // if( setvbuf( fp, stbuf, _IOFBF, MYBUFSIZE ) ) // { // reporterr( "Error in setting buffer, size=%d\n", MYBUFSIZE ); // exit( 1 ); // } setvbuf( fp, NULL, _IOFBF, MYBUFSIZE ); if( !whichpair( &i, &j, fp ) ) break; } } (*ndone)++; { // effij = eff1[i] * eff2[j] * effijx; effij = eff1[i] * eff2[j] * effijx; // effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx; res = readlocalhomfromfile_autofid( &lhsingle, nodeid, fp, orinum1[i], orinum2[j] ); if( res == -1 ) tmpptr = NULL; else if( res == 1 ) { movefunc = movereg_swap; // h3i ga arutoki swaplist ha mushi tmpptr = &lhsingle; } else { movefunc = movereg; // h3i ga arutoki swaplist ha mushi tmpptr = &lhsingle; } while( tmpptr ) { // fprintf( stderr, "start1 = %d\n", tmpptr->start1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); // if( end1 - start1 != end2 - start2 ) // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { impmtx[k1][k2] += tmpptr->importance * effij; k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } freelocalhom1( &lhsingle ); } } } void fillimp_file( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2, int *uselh, int *seedinlh1, int *seedinlh2, int nodeid, int nfiles ) { int i, j, k1, k2, start1, start2, end1, end2, m0, m1, m2; double effijx, effij_kozo; char *pt1, *pt2; LocalHom *tmpptr; unsigned long long npairs; // LocalHom lhsingle; // FILE *fp = NULL; // char *fn; // int subid, res; void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); readloopthread_arg_t *targ; #ifdef enablemultithread pthread_mutex_t mutex; pthread_t *handle; #endif double ***localimpmtx; int nth; unsigned long long *localndone; unsigned long long ndone; int subid; #if 0 fprintf( stderr, "eff1 in _init_strict = \n" ); for( i=0; i m2 ) { m0=m1; m1=m2; m2=m0; } if( m2 >= njob-nadd && ( uselh==NULL || uselh[m1] || uselh[m2] ) ) // saikentou { // reporterr( "%d x %d\n", m1, m2 ); npairs++; } } #if REPORTCOSTS // reporterr( "node %d, npairs = %d, nfiles = %d\n", nodeid, npairs, nfiles ); #endif } else if( uselh ) { // npairs = (unsigned long long)clus1 * clus2; npairs = 0; for( i=0; iorinum2[j] ) movefunc = movereg_swap; else movefunc = movereg; while( tmpptr ) { movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { if( tmpptr->korh == 'k' ) impmtx[k1][k2] += tmpptr->importance * effij_kozo; else // naihazu { reporterr( "okashii\n" ); exit( 1 ); } // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } } } } #if 0 if( 0 || nfiles < 2 ) { unsigned long long nread; readloop_serial( nodeid, impmtx, seq1, seq2, orinum1, orinum2, eff1, eff2, &nread ); npairs -= nread; } else #endif { nth = MIN(nthreadreadlh,nfiles); subid = 0; // reporterr( "nthreadlh=%d, nth=%d\n", nthreadreadlh, nth ); if( nth > 1 ) { localndone = calloc( sizeof(unsigned long long), nth ); localimpmtx = calloc( sizeof(double **), nth ); for( i=0; i 1 ) { targ[i].ndone = localndone+i; targ[i].impmtx = localimpmtx[i]; #ifdef enablemultithread targ[i].mutex = &mutex; pthread_create( handle+i, NULL, readloopthread, (void *)(targ+i) ); #else readloopthread( (void *)(targ+i) ); #endif } else { targ[i].ndone = &ndone; targ[i].impmtx = impmtx; #ifdef enablemultithread targ[i].mutex = NULL; #endif readloopthread( targ+i ); } } #ifdef enablemultithread if( nth > 1 ) { for( j=0; j 1 ) { for( i=0; ibestpos!=-1; pt++ ) { if( pt->bestpos > i ) fprintf( fp, "%d %d %f\n", i, pt->bestpos, pt->bestscore ); } } #if 1 static void readcontrafold( FILE *fp, RNApair **pairprob, int length ) { char gett[10000]; int *pairnum; char *pt; int i; int left, right; double prob; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'd': whereiscontrafold = *++argv; fprintf( stderr, "whereiscontrafold = %s\n", whereiscontrafold ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static char com[10000]; static int *nlen; int left, right; int res; static char **name, **seq, **nogap; static int **gapmap; static int *order; int i, j; FILE *infp; RNApair ***pairprob; RNApair **alnpairprob; RNApair *pairprobpt; RNApair *pt; int *alnpairnum; double prob; int adpos; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( !whereiscontrafold ) whereiscontrafold = ""; getnumlen( infp ); rewind( infp ); if( dorp != 'd' ) { fprintf( stderr, "nuc only\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*2+1 ); nogap = AllocateCharMtx( njob, nlenmax*2+1 ); gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); alnpairnum = AllocateIntVec( nlenmax ); for( i=0; iin\n%s\n", nogap[i] ); fclose( infp ); #if 0 // contrafold v1 sprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 > _contrafoldout", whereiscontrafold ); #else // contrafold v2 sprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 _contrafoldout", whereiscontrafold ); #endif res = system( com ); if( res ) { fprintf( stderr, "error in contrafold\n" ); fprintf( stderr, "=================================================================\n" ); fprintf( stderr, "=================================================================\n" ); fprintf( stderr, "==\n" ); fprintf( stderr, "== This version of MAFFT supports CONTRAfold v2.02.\n" ); fprintf( stderr, "== If you have a lower version of CONTRAfold installed in the\n" ); fprintf( stderr, "== %s directory,\n", whereiscontrafold ); fprintf( stderr, "== please update it!\n" ); fprintf( stderr, "==\n" ); fprintf( stderr, "=================================================================\n" ); fprintf( stderr, "=================================================================\n" ); exit( 1 ); } infp = fopen( "_contrafoldout", "r" ); readcontrafold( infp, pairprob[i], nlenmax ); fclose( infp ); fprintf( stdout, ">%d\n", i ); outcontrafold( stdout, pairprob[i], nlenmax ); } for( i=0; ibestpos!=-1; pairprobpt++ ) { left = gapmap[i][j]; right = gapmap[i][pairprobpt->bestpos]; prob = pairprobpt->bestscore; for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) if( pt->bestpos == right ) break; if( pt->bestpos == -1 ) { alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); adpos = alnpairnum[left]; alnpairnum[left]++; alnpairprob[left][adpos].bestscore = 0.0; alnpairprob[left][adpos].bestpos = right; alnpairprob[left][adpos+1].bestscore = -1.0; alnpairprob[left][adpos+1].bestpos = -1; pt = alnpairprob[left]+adpos; } else adpos = pt-alnpairprob[left]; pt->bestscore += prob; if( pt->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); } } return( 0 ); #if 0 fprintf( stdout, "result=\n" ); for( i=0; ibestpos!=-1; pairprobpt++ ) { pairprobpt->bestscore /= (double)njob; left = i; right = pairprobpt->bestpos; prob = pairprobpt->bestscore; fprintf( stdout, "%d-%d, %f\n", left, right, prob ); } return( 0 ); #endif } mafft-7.505-without-extensions/core/mtxutl.c0000644000175000017500000002473014224501721020510 0ustar nileshnilesh#include #include #include #include #include "mtxutl.h" void MtxuntDouble( double **mtx, int n ) { int i, j; for( i=0; i 3 ) code1 = 36; code2 = amino_n[(int)s2[j][p2]]; if( code2 > 3 ) code2 = 36; // fprintf( stderr, "'l'%c-%c: %f\n", s1[i][p1], s2[j][p2], (double)ribosumdis[code1][code2] ); val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j]; } return( val ); } static double pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 ) { double val; int i, j; int code1o, code1u, code2o, code2u, code1, code2; val = 0.0; for( i=0; i 3 ) code1 = code1o = 36; else if( code1u > 3 ) code1 = 36; else code1 = 4 + code1o * 4 + code1u; code2o = amino_n[(int)s2[j][p2]]; code2u = amino_n[(int)s2[j][c2]]; if( code2o > 3 ) code2 = code1o = 36; else if( code2u > 3 ) code2 = 36; else code2 = 4 + code2o * 4 + code2u; // fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] ); if( code1 == 36 || code2 == 36 ) val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j]; else val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j]; } return( val ); } static double pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 ) { double val; int i, j; int code1o, code1u, code2o, code2u, code1, code2; val = 0.0; for( i=0; i 3 ) code1 = code1o = 36; else if( code1u > 3 ) code1 = 36; else code1 = 4 + code1u * 4 + code1o; code2o = amino_n[(int)s2[j][p2]]; code2u = amino_n[(int)s2[j][c2]]; if( code2o > 3 ) code2 = code1o = 36; else if( code2u > 3 ) code2 = 36; else code2 = 4 + code2u * 4 + code2o; // fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] ); if( code1 == 36 || code2 == 36 ) val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j]; else val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j]; } return( val ); } #endif static void mccaskillextract( char **seq, char **nogap, int nseq, RNApair **pairprob, RNApair ***single, int **sgapmap, double *eff ) { int lgth; int nogaplgth; int i, j; int left, right, adpos; double prob; static TLS int *pairnum; RNApair *pt, *pt2; lgth = strlen( seq[0] ); pairnum = calloc( lgth, sizeof( int ) ); for( i=0; ibestpos!=-1; pt++ ) { left = sgapmap[i][j]; right = sgapmap[i][pt->bestpos]; prob = pt->bestscore; for( pt2=pairprob[left]; pt2->bestpos!=-1; pt2++ ) if( pt2->bestpos == right ) break; // fprintf( stderr, "i,j=%d,%d, left=%d, right=%d, pt=%d, pt2->bestpos = %d\n", i, j, left, right, pt-single[i][j], pt2->bestpos ); if( pt2->bestpos == -1 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); adpos = pairnum[left]; pairnum[left]++; pairprob[left][adpos].bestscore = 0.0; pairprob[left][adpos].bestpos = right; pairprob[left][adpos+1].bestscore = -1.0; pairprob[left][adpos+1].bestpos = -1; pt2 = pairprob[left]+adpos; } else adpos = pt2-pairprob[left]; pt2->bestscore += prob * eff[i]; if( pt2->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); // fprintf( stderr, "pairprob[0][0].bestpos=%d\n", pairprob[0][0].bestpos ); // fprintf( stderr, "pairprob[0][0].bestscore=%f\n", pairprob[0][0].bestscore ); } } // fprintf( stderr, "before taikakuka\n" ); for( i=0; i -1 ) { // pairprob[i][j].bestscore /= (double)nseq; // fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][j].bestpos, pairprob[i][j].bestscore, seq[0][i], seq[0][pairprob[i][j].bestpos] ); } } #if 0 for( i=0; i %d\n", i, j, right, i ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = pairprob[i][j].bestscore; pairprob[right][pairnum[right]].bestpos = i; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; } #endif free( pairnum ); } void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob ) { int lgth; int i; static TLS int *order = NULL; static TLS char **name = NULL; char gett[1000]; FILE *fp; int left, right, dumm; double prob; static TLS int pid; static TLS char fnamein[100]; static TLS char cmd[1000]; static TLS int *pairnum; lgth = strlen( seq[0] ); if( order == NULL ) { pid = (int)getpid(); sprintf( fnamein, "/tmp/_rnaalifoldin.%d", pid ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, 10 ); for( i=0; i 50.0 && prob > pairprob[left][0].bestscore ) { pairprob[left][0].bestscore = prob; pairprob[left][0].bestpos = right; #else if( prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob / 100.0; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob / 100.0; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; fprintf( stderr, "%d-%d, %f\n", left, right, prob ); #endif } } fclose( fp ); sprintf( cmd, "rm -f %s", fnamein ); system( cmd ); for( i=0; i -1 ) { pairprob[right][0].bestpos = i; pairprob[right][0].bestscore = pairprob[i][0].bestscore; } } #if 0 for( i=0; i -1 ) pairprob[i][0].bestscore = 1.0; // atode kesu #endif // fprintf( stderr, "after taikakuka in rnaalifoldcall\n" ); // for( i=0; iori\n%s\n", oseq1[0] ); fprintf( stdout, ">rev\n%s\n", oseq1r[0] ); } #endif /* similarity score */ Lalignmm_hmout( oseq1, oseq2, eff1, eff2, nseq1, nseq2, 10000, NULL, NULL, NULL, NULL, map ); if( 1 ) { if( RNAscoremtx == 'n' ) { for( i=0; ibestpos!=-1; pairpt1++ ) { // if( pairprob1[i] == NULL ) continue; for( j=0; jbestpos!=-1; pairpt2++ ) { // fprintf( stderr, "i=%d, j=%d, pn1=%d, pn2=%d\n", i, j, pairpt1-pairprob1[i], pairpt2-pairprob2[j] ); // if( pairprob2[j] == NULL ) continue; uido = pairpt1->bestpos; ujdo = pairpt2->bestpos; prob = pairpt1->bestscore * pairpt2->bestscore; // prob = 1.0; // fprintf( stderr, "i=%d->uido=%d, j=%d->ujdo=%d\n", i, uido, j, ujdo ); // fprintf( stderr, "impmtx2[%d][%d] = %f\n", i, j, impmtx2[i][j] ); // if( i < uido && j > ujdo ) continue; // if( i > uido && j < ujdo ) continue; // posdistj = abs( ujdo-j ); // if( uido > -1 && ujdo > -1 ) if( uido > -1 && ujdo > -1 && ( ( i > uido && j > ujdo ) || ( i < uido && j < ujdo ) ) ) { { impmtx2[i][j] += MAX( 0, map[uido][ujdo] ) * consweight_rna * 600 * prob; // osoi } } } } for( i=0; i 2bai static int allowlongadds; static int addtotop; static int keeplength; static int diffout; // Incomplete. Not used static int ndeleted; static int mapout; static int smoothing; static double hitout; static int tuplesize; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define D6LENFACA 0.01 #define D6LENFACB 2500 #define D6LENFACC 2500 #define D6LENFACD 0.1 #define D10LENFACA 0.01 #define D10LENFACB 1000000 #define D10LENFACC 1000000 #define D10LENFACD 0.0 typedef struct _thread_arg { int njob; int nadd; int *nlen; int *follows; char **name; char **seq; LocalHom **localhomtable; double **iscore; double **nscore; int *istherenewgap; int **newgaplist; RNApair ***singlerna; double *eff_kozo_mapped; int alloclen; Treedep *dep; int ***topol; double **len; Addtree *addtree; GapPos **deletelist; GapPos **difflist; #ifdef enablemultithread int *iaddshare; int thread_no; pthread_mutex_t *mutex_counter; #endif } thread_arg_t; #ifdef enablemultithread typedef struct _gaplist2alnxthread_arg { // int thread_no; int ncycle; int *jobpospt; int tmpseqlen; int lenfull; char **seq; int *newgaplist; int *posmap; pthread_mutex_t *mutex; } gaplist2alnxthread_arg_t; typedef struct _distancematrixthread_arg { int thread_no; int njob; int norg; int *jobpospt; int **pointt; int *nogaplen; double **imtx; double **nmtx; double *selfscore; pthread_mutex_t *mutex; } distancematrixthread_arg_t; typedef struct _jobtable2d { int i; int j; } Jobtable2d; typedef struct _dndprethread_arg { int njob; int thread_no; double *selfscore; double **mtx; char **seq; Jobtable2d *jobpospt; pthread_mutex_t *mutex; } dndprethread_arg_t; #endif typedef struct _blocktorealign { int start; int end; int nnewres; } Blocktorealign; static void cnctintvec( int *res, int *o1, int *o2 ) { while( *o1 != -1 ) *res++ = *o1++; while( *o2 != -1 ) *res++ = *o2++; *res = -1; } static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist ) { int i, regstart, regend, len1; regstart = 0; len1 = len+1; for( i=0; i lenb ) return -1; else if( lena < lenb ) return 1; else return( 0 ); } static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows ) { int i, j, k, posinold, newlen, *nmem; int n0, n1, localloclen, nhit, hit1, hit2; int *pickhistory; int nprof1, nprof2, pos, zure; char **prof1, **prof2; int *iinf0, *iinf1; int *group, *nearest, *g2n, ngroup; char ***mem; static char **tmpaln0 = NULL; static char **tmpaln1 = NULL; static char **tmpseq; int ***topolpick; int *tmpint; int *intptr, *intptrx; char *tmpseq0, *cptr, **cptrptr; localloclen = 4 * ( block->end - block->start + 1 ); // ookisugi? tmpaln0 = AllocateCharMtx( njob, localloclen ); tmpaln1 = AllocateCharMtx( njob, localloclen ); tmpseq = AllocateCharMtx( 1, *fullseqlenpt * 4 ); iinf0 = AllocateIntVec( njob ); iinf1 = AllocateIntVec( njob ); nearest = AllocateIntVec( njob ); // oosugi posinold = block->start; n0 = 0; n1 = 0; for( i=0; istart, block->end - block->start + 1 ); tmpseq[0][block->end - block->start + 1] = 0; commongappick( 1, tmpseq ); if( tmpseq[0][0] != 0 ) { if( i < norg ) { fprintf( stderr, "BUG!!!!\n" ); exit( 1 ); } strcpy( tmpaln0[n0], tmpseq[0] ); iinf0[n0] = i; nearest[n0] = follows[i-norg]; n0++; } else { strcpy( tmpaln1[n0], "" ); iinf1[n1] = i; n1++; } } mem = AllocateCharCub( n0, n0+1, 0 ); // oosugi nmem = AllocateIntVec( n0 ); // oosugi g2n = AllocateIntVec( n0 ); // oosugi group = AllocateIntVec( n0 ); // oosugi for( i=0; i %d -> group%d\n", i, nearest[i], group[i] ); // fprintf( stderr, "mem[%d][%d] = %s\n", group[i], j, mem[group[i]][j] ); } for( i=0; i newlen ) newlen = j; for( j=0; j<=i; j++ ) { for( k=0; mem[j][k]; k++ ) fillgap( mem[j][k], newlen ); } #endif } #if 0 fprintf( stderr, "After ingroupalignment (original order):\n" ); for( i=0; i-1; intptr++ ) { for( intptrx=g2n,k=0; k %d\n", k, topol[i][0][j] ); for( intptr=topol[i][1]; *intptr>-1; intptr++ ) { for( intptrx=g2n,k=0; k %d\n", k, topol[i][1][j] ); #if 0 fprintf( stderr, "\nHIT!!! \n" ); fprintf( stderr, "\nSTEP %d\n", i ); for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][0][j] ); fprintf( stderr, "\n" ); for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][1][j] ); fprintf( stderr, "\n" ); #endif } for( i=0; i-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][0][j] ); fprintf( stderr, "\n" ); for( j=0; topolpick[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][1][j] ); fprintf( stderr, "\n" ); #endif pos = 0; // for( j=0; topolpick[i][0][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][0][j]][k]); k++ ) prof1[pos++] = cptr; for( intptr=topolpick[i][0]; *intptr>-1; intptr++ ) for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) prof1[pos++] = cptr; nprof1 = pos; pos = 0; // for( j=0; topolpick[i][1][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][1][j]][k]); k++ ) prof2[pos++] = cptr; for( intptr=topolpick[i][1]; *intptr>-1; intptr++ ) for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) prof2[pos++] = cptr; nprof2 = pos; profilealignment2( nprof1, nprof2, prof1, prof2, localloclen, alg ); #if 0 for( j=0; jend - block->start + 1 - newlen ); // fprintf( stderr, "zure = %d, localloclen=%d, newlen=%d\n", zure, localloclen, newlen ); if( *fullseqlenpt < strlen( fullseq[0] ) - (block->end-block->start+1) + newlen + 1 ) { *fullseqlenpt = strlen( fullseq[0] ) * 2; fprintf( stderr, "reallocating..." ); for( i=0; iend+1; for( i=0; istart, tmpseq0 ); } for( i=0; istart, tmpseq0 ); } FreeCharMtx( tmpaln0 ); FreeCharMtx( tmpaln1 ); FreeCharMtx( tmpseq ); for( i=0; istart; n0 = 0; n1 = 0; for( i=0; istart, block->end - block->start + 1 ); tmpseq[0][block->end - block->start + 1] = 0; commongappick( 1, tmpseq ); // if( strlen( tmpseq[0] ) > 0 ) if( tmpseq[0][0] != 0 ) { if( i < norg ) { fprintf( stderr, "BUG!!!!\n" ); exit( 1 ); } strcpy( tmpaln0[n0], tmpseq[0] ); iinf0[n0] = i; n0++; } else { strcpy( tmpaln1[n0], "" ); iinf1[n1] = i; n1++; } } for( i=1; istart, tmpaln0[i], newlen ); for( i=0; istart, tmpaln1[i], newlen ); } posinold = block->end+1; posinnew = block->start + newlen; zure = ( block->end - block->start + 1 - strlen( tmpaln0[0] ) ); for( i=0; i 0 && l[i] > 0 ) { if( pg < l[i] ) { c[i] = l[i] - pg; } else { c[i] = 0; } } else { c[i] = l[i]; } prep = p[i]; } } void gaplist2alnx( int len, char *a, char *s, int *l, int *p, int lenlimit ) { int gaplen; int pos, pi, posl; int prevp = -1; int reslen = 0; char *sp; // char *abk = a; #if 0 int i; char *abk = a; fprintf( stderr, "s = %s\n", s ); fprintf( stderr, "posmap = " ); for( i=0; i lenlimit ) { fprintf( stderr, "Length over. Please recompile!\n" ); exit( 1 ); } while( gaplen-- ) *a++ = '-'; pos = prevp + 1; sp = s + pos; if( ( posl = pi - pos ) ) { if( ( reslen += posl ) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( posl-- ) *a++ = *sp++; } if( reslen++ > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } *a++ = *sp; prevp = pi; } gaplen = *l; pi = *p; if( (reslen+=gaplen) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( gaplen-- ) *a++ = '-'; pos = prevp + 1; sp = s + pos; if( ( posl = pi - pos ) ) { if( ( reslen += posl ) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( posl-- ) *a++ = *sp++; } *a = 0; // fprintf( stderr, "reslen = %d, strlen(a) = %d\n", reslen, strlen( abk ) ); // fprintf( stderr, "a = %s\n", abk ); } static void makenewgaplist( int *l, char *a ) { while( 1 ) { while( *a == '=' ) { a++; (*l)++; // fprintf( stderr, "a[] (i) = %s, *l=%d\n", a, *(l) ); } *++l = 0; if( *a == 0 ) break; a++; } *l = -1; } void arguments( int argc, char *argv[] ) { int c; nthread = 1; codonpos = 0; codonscore = 0; outnumber = 0; scoreout = 0; treein = 0; topin = 0; rnaprediction = 'm'; rnakozo = 0; nevermemsave = 0; inputfile = NULL; addfile = NULL; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; // chuui force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; sueff_global = 0.1; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; penalty_shift_factor = 1000.0; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNAppenalty_ex = NOTSPECIFIED; RNApthr = NOTSPECIFIED; TMorJTT = JTT; consweight_multi = 1.0; consweight_rna = 0.0; nadd = 0; multidist = 0; tuplesize = -1; legacygapcost = 0; allowlongadds = 0; addtotop = 0; keeplength = 0; diffout = 0; mapout = 0; smoothing = 0; distout = 0; hitout = 0.0; nwildcard = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'R': codonpos = 1; break; case 'S': codonscore = 1; break; #if 0 case 'R': rnaprediction = 'r'; break; case 's': RNAscoremtx = 'r'; break; #endif #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case '^': hitout = atof( *++argv ); --argc; goto nextoption; case 't': treeout = 1; break; case 'T': noalign = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif // case 'S': // scoreout = 1; // break; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; case 's': treemethod = 's'; break; #endif case 'X': treemethod = 'X'; sueff_global = atof( *++argv ); fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; case 'E': treemethod = 'E'; break; case 'q': treemethod = 'q'; break; case 'n' : outnumber = 1; break; #if 0 case 'a': alg = 'a'; break; case 'Q': alg = 'Q'; break; #endif case 'H': alg = 'H'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'N': nevermemsave = 1; break; case 'B': // hitsuyou! memopt -M -B no tame break; case 'F': use_fft = 1; break; case 'G': force_fft = 1; use_fft = 1; break; case 'U': treein = 1; break; case 'x': addtotop = 1; break; case 'V': allowlongadds = 1; break; case 'p': smoothing = 1; break; #if 0 case 'V': topin = 1; break; #endif case 'u': tbrweight = 0; weight = 0; break; case 'v': tbrweight = 3; break; case 'd': multidist = 1; break; case 'W': tuplesize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'd': disp = 1; break; #endif /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; #endif case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'Z': checkC = 1; break; #endif case 'L': legacygapcost = 1; break; case 'Y': keeplength = 1; break; case 'z': mapout = 2; break; case 'Z': mapout = 1; break; case '%': diffout = 1; break; case ':': nwildcard = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static double treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo ) { int i, l, m; int len1nocommongap, len2nocommongap; int len1, len2; int clus1, clus2; double pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; double *effarr1_kozo = NULL; double *effarr2_kozo = NULL; LocalHom ***localhomshrink = NULL; int *fftlog; int m1, m2; int *gaplen; int *gapmap; int *alreadyaligned; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; RNApair ***grouprna1, ***grouprna2; if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } fftlog = AllocateIntVec( nseq ); effarr1 = AllocateDoubleVec( nseq ); effarr2 = AllocateDoubleVec( nseq ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); alreadyaligned = AllocateIntVec( nseq ); if( constraint ) { localhomshrink = (LocalHom ***)calloc( nseq, sizeof( LocalHom ** ) ); #if SMALLMEMORY if( multidist ) { for( i=0; i 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // for( i=0; i 50000 || len2 > 50000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; // 2013/Jul17 commonAlloc1 = 0; commonAlloc2 = 0; } if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode { if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( codonpos ) { reporterr( "\n\nThe --codonpos option is supported only for --6merpair --addfragments at this point. Add the --6merpair flag, for now.\n\n\n" ); exit( 1 ); } if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, NULL, 1, topol[l][0], topol[l][1], NULL, NULL, NULL, -1, 0 ); if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = A__align( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, constraint, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxchild0 tsukaeru?? } else if( alg == 'Q' ) { fprintf( stderr, "Q has been disabled.\n" ); exit( 1 ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, NULL, NULL, NULL, 0.0, 0.0 ); break; case( 'A' ): pscore = A__align( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxchild0 tsukaeru?? break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); // fprintf( stderr, "aseq[last] = %s\n", aseq[nseq-1] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; #if 0 // New gaps = '=' fprintf( stderr, "Original msa\n" ); for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( mergeoralign[l] == '2' ) { // if( deleteadditionalinsertions ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, deleterecord ); adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] ); restorecommongaps( nseq, nseq-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( nseq, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' ); // for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } #if 0 free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); #endif } //for( i=0; ithread_no; int *iaddshare = targ->iaddshare; #endif int njob = targ->njob; int *follows = targ->follows; int nadd = targ->nadd; int *nlen = targ->nlen; char **name = targ->name; char **seq = targ->seq; LocalHom **localhomtable = targ->localhomtable; double **iscore = targ->iscore; double **nscore = targ->nscore; int *istherenewgap = targ->istherenewgap; int **newgaplist = targ->newgaplist; RNApair ***singlerna = targ->singlerna; double *eff_kozo_mapped = targ->eff_kozo_mapped; int alloclen = targ->alloclen; Treedep *dep = targ->dep; int ***topol = targ->topol; double **len = targ->len; Addtree *addtree = targ->addtree; GapPos **deletelist = targ->deletelist; GapPos **difflist = targ->difflist; double pscore; int *alnleninnode = NULL; char *targetseq; // fprintf( stderr, "\nPreparing thread %d\n", thread_no ); norg = njob - nadd; njobc = norg+1; #if 0 alnleninnode = AllocateIntVec( norg ); addmem = AllocateIntVec( nadd+1 ); depc = (Treedep *)calloc( njobc, sizeof( Treedep ) ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); bseq = AllocateCharMtx( njobc, alloclen ); namec = AllocateCharMtx( njob, 0 ); nlenc = AllocateIntVec( njob ); mergeoralign = AllocateCharVec( njob ); nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc ); tmpseq = calloc( alloclen, sizeof( char ) ); #else alnleninnode = AllocateIntVec( norg ); addmem = AllocateIntVec( nadd+1 ); depc = (Treedep *)calloc( njobc, sizeof( Treedep ) ); mseq1 = AllocateCharMtx( njobc, 0 ); mseq2 = AllocateCharMtx( njobc, 0 ); bseq = AllocateCharMtx( njobc, alloclen ); namec = AllocateCharMtx( njobc, 0 ); nlenc = AllocateIntVec( njobc ); mergeoralign = AllocateCharVec( njobc ); nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc ); tmpseq = calloc( alloclen, sizeof( char ) ); #endif if( allowlongadds ) // hontou ha iranai. { for( i=0; i=0; i-- ) // for( i=norg-2; i; i-- ) // BUG!!!! { // reporterr( "\nstep %d\n", i ); k = 0; for( j=0; (m=topol[i][0][j])!=-1; j++ ) { mseq1[k++] = bseq[m]; // reporterr( "%d ", m ); } for( j=0; (m=topol[i][1][j])!=-1; j++ ) { mseq1[k++] = bseq[m]; // reporterr( "%d ", m ); } // reporterr( "\n" ); commongappick( k, mseq1 ); alnleninnode[i] = strlen( mseq1[0] ); // fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] ); } } // for( i=0; imutex_counter ); iadd = *iaddshare; if( iadd == nadd ) { pthread_mutex_unlock( targ->mutex_counter ); break; } if( iadd < 500 ) fprintf( stderr, "\rSTEP %d / %d (thread %d) \r", iadd, nadd, thread_no ); else if( iadd % 100 == 0 ) fprintf( stderr, "\nSTEP %d / %d (thread %d) \n", iadd, nadd, thread_no ); ++(*iaddshare); targetseq = seq[norg+iadd]; pthread_mutex_unlock( targ->mutex_counter ); } else #endif { iadd++; if( iadd == nadd ) break; targetseq = seq[norg+iadd]; if( iadd < 500 ) fprintf( stderr, "\rSTEP %d / %d \r", iadd, nadd ); else if( iadd % 100 == 0 ) fprintf( stderr, "\nSTEP %d / %d \n", iadd, nadd ); } for( i=0; i 0 ) { for( i=0; imutex_counter ); fprintf( stdout, "\nmergeoralign (iadd=%d) = ", iadd ); for( i=0; imutex_counter ); #endif singlerna = NULL; pscore = treebase( njobc, nlenc, bseq, 1, mergeoralign, mseq1, mseq2, topolc, effc, &alloclen, localhomtablec, singlerna, eff_kozo_mapped ); #if 0 pthread_mutex_lock( targ->mutex_counter ); // fprintf( stdout, "res (iadd=%d) = %s, pscore=%f\n", iadd, bseq[norg], pscore ); // fprintf( stdout, "effc (iadd=%d) = ", iadd ); // for( i=0; imutex_counter ); #endif #if 0 fprintf( trap_g, "done.\n" ); fclose( trap_g ); #endif // fprintf( stdout, "\n>seq[%d, iadd=%d] = \n%s\n", norg+iadd, iadd, seq[norg+iadd] ); // fprintf( stdout, "\n>bseq[%d, iadd=%d] = \n%s\n", norg, iadd, bseq[norg] ); // strcpy( seq[norg+iadd], bseq[norg] ); if( diffout ) { reporterr( "Not yet written\n" ); exit( 1 ); // deletenewinsertions_difflist( norg, 1, bseq, bseq+norg, difflist+iadd ); // strcpy( targetseq, bseq[norg] ); // i = norg; // no new gap!! } else if( keeplength ) { // reporterr( "deletelist = %p\n", deletelist ); // reporterr( "deletelist+iadd = %p\n", deletelist+iadd ); ndeleted += deletenewinsertions_whole_eq( norg, 1, bseq, bseq+norg, deletelist+iadd ); // for( i=0; i\n%s\n", bseq[i] ); strcpy( targetseq, bseq[norg] ); i = norg; // no new gap!! } else { strcpy( targetseq, bseq[norg] ); rep = -1; for( i=0; i maxl ) maxl = nogaplen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); // makepointtable_nuc( pointt[i], grpseq ); // makepointtable_nuc_octet( pointt[i], grpseq ); if( tuplesize == 10 ) makepointtable_nuc_dectet( pointt[i], grpseq ); else if( tuplesize == 6 ) makepointtable_nuc( pointt[i], grpseq ); else { fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize ); exit( 1 ); } } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown ); for( i=0; i 0 ) { distancematrixthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos = 0; targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i nogaplen[j] ) { longer=(double)nogaplen[i]; shorter=(double)nogaplen[j]; } else { longer=(double)nogaplen[j]; shorter=(double)nogaplen[i]; } lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); bunbo = MIN( selfscore[i], selfscore[j] ); if( j < norg ) { if( bunbo == 0.0 ) imtx[i][j-i] = maxdist; else imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; } else { if( bunbo == 0.0 ) nmtx[i][j-norg] = maxdist; else nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; } } free( table1 ); } } fprintf( stderr, "\ndone.\n\n" ); fflush( stderr ); free( grpseq ); free( tmpseq ); FreeIntMtx( pointt ); free( nogaplen ); free( selfscore ); if( hitout<0.0 ) { fprintf( stdout, "Threshold=%f\n\n", -hitout ); for( i=0; i0.0 ) { fprintf( stdout, "Threshold=%f\n\n", hitout ); for( i=norg; i 0 ) { dndprethread_arg_t *targ; Jobtable2d jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthread, sizeof( dndprethread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 9.9 ) { fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv ); mtxv = 9.9; // exit( 1 ); // 2016/Aug/3 } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j-i] = mtxv; } } } #if TEST for( i=0; i %d\n", postoshiftfrom, postoshiftto ); for( j=0; j %d\n", postoshiftfrom, postoshiftto ); for( j=0; j 1000 || nadd > 1000 ) use_fft = 0; // if( norg > 1000 ) use_fft = 0; fullseqlen = alloclen = nlenmax*4+1; //chuui! seq = AllocateCharMtx( njob, alloclen ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); ndeleted = 0; if( multidist || tuplesize > 0 ) { iscore = AllocateFloatHalfMtx( norg ); nscore = AllocateFloatMtx( norg, nadd ); } else { iscore = AllocateFloatHalfMtx( njob ); nscore = NULL; } kozoarivec = AllocateCharVec( njob ); ordertable = AllocateIntVec( norg+1 ); if( constraint ) { #if SMALLMEMORY if( multidist ) { localhomtable = (LocalHom **)calloc( norg, sizeof( LocalHom *) ); for( i=0; i 0 ) // if mtx is internally computed { if( multidist == 1 ) { ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); // iscore ha muda. // hat2p = fopen( "hat2-1", "w" ); // WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore ); // fclose( hat2p ); dndpre( norg, seq, iscore ); // fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); // prep = fopen( "hat2i", "r" ); // if( prep == NULL ) ErrorExit( "Make hat2i." ); // readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore ); // fclose( prep ); // fprintf( stderr, "done.\n" ); // hat2p = fopen( "hat2-2", "w" ); // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore ); // fclose( hat2p ); } else { ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); } } else { if( multidist == 1 ) { fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " ); prep = fopen( "hat2n", "r" ); if( prep == NULL ) ErrorExit( "Make hat2n." ); readhat2_doublehalf_part_pointer( prep, njob, nadd, name, nscore ); fclose( prep ); fprintf( stderr, "done.\n" ); fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); prep = fopen( "hat2i", "r" ); if( prep == NULL ) ErrorExit( "Make hat2i." ); readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } else { fprintf( stderr, "Loading 'hat2' ... " ); prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_doublehalf_pointer( prep, njob, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } } #if 1 if( distout ) { fprintf( stderr, "Writing distances between new sequences and existing msa.\n" ); hat2p = fopen( "hat2", "w" ); if( multidist || tuplesize > 0 ) { for( iadd=0; iadd 0.03 ) { fprintf( stderr, "################################################################################\n" ); fprintf( stderr, "# \n" ); fprintf( stderr, "# The reference MSA has >3%% ambiguous columns.\n" ); fprintf( stderr, "# Please prepare a better reference.\n" ); fprintf( stderr, "# \n" ); fprintf( stderr, "################################################################################\n" ); exit( 1 ); } if( keeplength && mapout ) { addbk = (char **)calloc( nadd+1, sizeof( char * ) ); for( i=0; i 1 ) cnctintvec( ordertable, topol[norg-2][0], topol[norg-2][1] ); else { ordertable[0] = 0; ordertable[1] = -1; } FreeFloatHalfMtx( iscoreo, norg ); #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex_counter; thread_arg_t *targ; int *iaddsharept; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); iaddsharept = calloc( 1, sizeof(int) ); *iaddsharept = 0; for( i=0; i 0 ) { FreeFloatHalfMtx( iscore, norg ); FreeFloatMtx( nscore ); } else { FreeFloatHalfMtx( iscore, njob ); } // for( i=0; i%s (%d) \n%s\n", name[norg+i], norg+i, seq[norg+i] ); if( treeout ) { fp = fopen( "infile.tree", "a" ); if( fp == 0 ) { fprintf( stderr, "File error!\n" ); exit( 1 ); } for( i=0; i %d\n", follower[i][j]+norg, i ); } fclose( orderfp ); posmap = AllocateIntVec( lenfull+2 ); realign = calloc( lenfull+2, sizeof( Blocktorealign ) ); for( i=0; i= fullseqlen ) { fullseqlen = tmplen * 2+1; // fprintf( stderr, "Length over!\n" ); // fprintf( stderr, "strlen(tmpseq1)=%d\n", (int)strlen( tmpseq1 ) ); fprintf( stderr, "reallocating..." ); // fprintf( stderr, "alloclen=%d\n", alloclen ); // fprintf( stderr, "Please recompile!\n" ); // exit( 1 ); for( i=0; i 0 && ien > 500 ) { gaplist2alnxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; fprintf( stderr, "%d / %d (threads %d-%d)\r", iadd, nadd, 0, nthread ); targ = calloc( nthread, sizeof( gaplist2alnxthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); jobpos = 1; for( i=0; i%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 ); strcpy( seq[i], tmpseq1 ); } } } tmpseq1 = tmpseq[0]; // insertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); insertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); // fprintf( stderr, "%d = %s\n", iadd, tmpseq1 ); eq2dash( tmpseq1 ); strcpy( seq[norg+iadd], tmpseq1 ); // adjustposmap( lenfull, posmap, newgaplist_o[iadd] ); adjustposmap( lenfull, posmap, newgaplist_compact ); countnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda? // countnewres( lenfull, realign, posmap, newgaplist_compact ); // muda? } fprintf( stderr, "\r done. \n\n" ); #if 0 for( i=0; i%s\n", name[i] ); fprintf( stderr, "%s\n", seq[i] ); } #endif #if 0 fprintf( stderr, "realign[].nnewres = " ); for( i=0; i 1 ) { // fprintf( stderr, "i=%d: %d-%d\n", i, realign[i].start, realign[i].end ); fprintf( stderr, "\rRealigning %d/%d \r", i, lenfull ); // zure = dorealignment_compact( realign+i, seq, &fullseqlen, norg ); // zure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows ); zure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows ); #if 0 gappick0( check1, seq[0] ); fprintf( stderr, "check1 = %s\n", check1 ); if( strcmp( check1, check2 ) ) { fprintf( stderr, "CHANGED!!!!!\n" ); exit( 1 ); } #endif for( j=i+1; j 0 ) { reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted ); if( mapout ) reporterr( "The deleted letters are shown in the (filename).map file.\n" ); else reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" ); } return( 0 ); } mafft-7.505-without-extensions/core/replaceu.c0000644000175000017500000000533014224501721020746 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 static int seedoffset; static void replace_unusual( int n, char **seq, char *usual, char unknown, int (*uporlow)( int ) ) { int i; char *pt; for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'o': seedoffset = myatoi( *++argv ); fprintf( stderr, "seedoffset = %d\n", seedoffset ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; int nlenmin; char **name; char **seq; int *nlen; int i; char *usual; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; // dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); readData_pointer_casepreserve( infp, name, nlen, seq ); fclose( infp ); // for( i=0; i%s\n", name[i]+1 ); if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0; fprintf( origfp, "%s\n", seq[i] ); } fclose( origfp ); #endif if( dorp == 'p' ) { usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-."; replace_unusual( njob, seq, usual, 'X', toupper ); } else { usual = "ATGCUatgcuBDHKMNRSVWYXbdhkmnrsvwyx-"; replace_unusual( njob, seq, usual, 'n', tolower ); } for( i=0; i_os_%d_oe_%s\n", i+seedoffset, name[i]+1 ); fprintf( stdout, "%s\n", seq[i] ); } free( nlen ); FreeCharMtx( seq ); FreeCharMtx( name ); return( 0 ); } mafft-7.505-without-extensions/core/mafft.bat.win100000644000175000017500000000027114224501721021523 0ustar nileshnilesh@echo off setlocal set ROOTDIR="%~d0%~p0\ms" set PATH=/bin/:%PATH% set MAFFT_BINARIES=/lib/mafft set TMPDIR=%~d0%~p0/ms/tmp %ROOTDIR%\bin\sh %ROOTDIR%\bin\mafft %* :EOF mafft-7.505-without-extensions/core/mafft.10000644000175000017500000003466314224501721020174 0ustar nileshnilesh.\" Title: MAFFT .\" Author: Kazutaka Katoh .\" Generator: DocBook XSL Stylesheets v1.72.0 .\" Date: 2007-08-14 .\" Manual: Mafft Manual .\" Source: mafft 6.240 .\" .TH "MAFFT" "1" "2007\-06\-09" "mafft 6.240" "Mafft Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "THIS MANUAL IS FOR V6.2XX (2007)" Recent versions (v7.1xx; 2013 Jan.) have more features than those described below. See also the tips page at http://mafft.cbrc.jp/alignment/software/tips0.html .SH "NAME" .RS 0 .sp mafft \- Multiple alignment program for amino acid or nucleotide sequences .RE .SH "SYNOPSIS" .RS 0 .HP 6 \fBmafft\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBginsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 7 \fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBfftns\fR \fIinput\fR [>\ \fIoutput\fR] .HP 5 \fBnwns\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR] .HP 14 \fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR] .HP .sp \fIinput\fR, \fIgroup1\fR and \fIgroup2\fR must be in FASTA format. .RE .SH "DESCRIPTION" .RS 0 \fBMAFFT\fR is a multiple sequence alignment program for unix\-like operating systems. It offers a range of multiple alignment methods. .SS "Accuracy\-oriented methods:" .sp .RS 4 \h'-04'\(bu\h'+03'L\-INS\-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): .HP 6 \fBmafft\fR \fB\-\-localpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'G\-INS\-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): .HP 6 \fBmafft\fR \fB\-\-globalpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBginsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'E\-INS\-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): .HP 6 \fBmafft\fR \fB\-\-ep\fR\ \fI0\fR \fB\-\-genafpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR] .br For E\-INS\-i, the \fB\-\-ep\fR \fI0\fR option is recommended to allow large gaps. .RE .SS "Speed\-oriented methods:" .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; two cycles only): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fIinput\fR [>\ \fIoutput\fR] .HP 7 \fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; max. 1000 iterations): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-2 (fast; progressive method): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBfftns\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'FFT\-NS\-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'NW\-NS\-i (iterative refinement method without FFT approximation; two cycles only): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR] .HP 7 \fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'NW\-NS\-2 (fast; progressive method without the FFT approximation): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR] .HP 6 \fBnwns\fR \fIinput\fR [>\ \fIoutput\fR] .RE .sp .RS 4 \h'-04'\(bu\h'+03'NW\-NS\-PartTree\-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): .HP 6 \fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fB\-\-parttree\fR \fIinput\fR [>\ \fIoutput\fR] .RE .SS "Group\-to\-group alignments" .HP 6 .RS 4 \fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR] .sp or: .sp \fBmafft\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fB\-\-seed\fR\ \fIgroup1\fR \fB\-\-seed\fR\ \fIgroup2\fR /dev/null [>\ \fIoutput\fR] .RE .RE .RE .SH "OPTIONS" .SS "Algorithm" .RS 0 .PP \fB\-\-auto\fR .RS 4 Automatically selects an appropriate strategy from L\-INS\-i, FFT\-NS\-i and FFT\-NS\-2, according to data size. Default: off (always FFT\-NS\-2) .RE .PP \fB\-\-6merpair\fR .RS 4 Distance is calculated based on the number of shared 6mers. Default: on .RE .PP \fB\-\-globalpair\fR .RS 4 All pairwise alignments are computed with the Needleman\-Wunsch algorithm. More accurate but slower than \-\-6merpair. Suitable for a set of globally alignable sequences. Applicable to up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (G\-INS\-i). Default: off (6mer distance is used) .RE .PP \fB\-\-localpair\fR .RS 4 All pairwise alignments are computed with the Smith\-Waterman algorithm. More accurate but slower than \-\-6merpair. Suitable for a set of locally alignable sequences. Applicable to up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (L\-INS\-i). Default: off (6mer distance is used) .RE .PP \fB\-\-genafpair\fR .RS 4 All pairwise alignments are computed with a local algorithm with the generalized affine gap cost (Altschul 1998). More accurate but slower than \-\-6merpair. Suitable when large internal gaps are expected. Applicable to up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (E\-INS\-i). Default: off (6mer distance is used) .RE .\".PP .\"\fB\-\-fastswpair\fR .\".RS 4 .\"Distance is calculated based on a FASTA alignment. .\"FASTA is required. Default: off (6mer distance is used) .\".RE .PP \fB\-\-fastapair\fR .RS 4 All pairwise alignments are computed with FASTA (Pearson and Lipman 1988). FASTA is required. Default: off (6mer distance is used) .RE .\".PP .\"\fB\-\-blastpair\fR .\".RS 4 .\"Distance is calculated based on a BLAST alignment. BLAST is .\"required. Default: off (6mer distance is used) .\".RE .PP \fB\-\-weighti\fR \fInumber\fR .RS 4 Weighting factor for the consistency term calculated from pairwise alignments. Valid when either of \-\-globalpair, \-\-localpair, \-\-genafpair, \-\-fastapair or \-\-blastpair is selected. Default: 2.7 .RE .PP \fB\-\-retree\fR \fInumber\fR .RS 4 Guide tree is built \fInumber\fR times in the progressive stage. Valid with 6mer distance. Default: 2 .RE .PP \fB\-\-maxiterate\fR \fInumber\fR .RS 4 \fInumber\fR cycles of iterative refinement are performed. Default: 0 .RE .PP \fB\-\-fft\fR .RS 4 Use FFT approximation in group\-to\-group alignment. Default: on .RE .PP \fB\-\-nofft\fR .RS 4 Do not use FFT approximation in group\-to\-group alignment. Default: off .RE .PP \fB\-\-noscore\fR .RS 4 Alignment score is not checked in the iterative refinement stage. Default: off (score is checked) .RE .PP \fB\-\-memsave\fR .RS 4 Use the Myers\-Miller (1988) algorithm. Default: automatically turned on when the alignment length exceeds 10,000 (aa/nt). .RE .PP \fB\-\-parttree\fR .RS 4 Use a fast tree\-building method (PartTree, Katoh and Toh 2007) with the 6mer distance. Recommended for a large number (> ~10,000) of sequences are input. Default: off .RE .PP \fB\-\-dpparttree\fR .RS 4 The PartTree algorithm is used with distances based on DP. Slightly more accurate and slower than \-\-parttree. Recommended for a large number (> ~10,000) of sequences are input. Default: off .RE .PP \fB\-\-fastaparttree\fR .RS 4 The PartTree algorithm is used with distances based on FASTA. Slightly more accurate and slower than \-\-parttree. Recommended for a large number (> ~10,000) of sequences are input. FASTA is required. Default: off .RE .PP \fB\-\-partsize\fR \fInumber\fR .RS 4 The number of partitions in the PartTree algorithm. Default: 50 .RE .PP \fB\-\-groupsize\fR \fInumber\fR .RS 4 Do not make alignment larger than \fInumber\fR sequences. Valid only with the \-\-*parttree options. Default: the number of input sequences .RE .RE .SS "Parameter" .RS 0 .PP \fB\-\-op\fR \fInumber\fR .RS 4 Gap opening penalty at group\-to\-group alignment. Default: 1.53 .RE .PP \fB\-\-ep\fR \fInumber\fR .RS 4 Offset value, which works like gap extension penalty, for group\-to\-group alignment. Default: 0.123 .RE .PP \fB\-\-lop\fR \fInumber\fR .RS 4 Gap opening penalty at local pairwise alignment. Valid when the \-\-localpair or \-\-genafpair option is selected. Default: \-2.00 .RE .PP \fB\-\-lep\fR \fInumber\fR .RS 4 Offset value at local pairwise alignment. Valid when the \-\-localpair or \-\-genafpair option is selected. Default: 0.1 .RE .PP \fB\-\-lexp\fR \fInumber\fR .RS 4 Gap extension penalty at local pairwise alignment. Valid when the \-\-localpair or \-\-genafpair option is selected. Default: \-0.1 .RE .PP \fB\-\-LOP\fR \fInumber\fR .RS 4 Gap opening penalty to skip the alignment. Valid when the \-\-genafpair option is selected. Default: \-6.00 .RE .PP \fB\-\-LEXP\fR \fInumber\fR .RS 4 Gap extension penalty to skip the alignment. Valid when the \-\-genafpair option is selected. Default: 0.00 .RE .PP \fB\-\-bl\fR \fInumber\fR .RS 4 BLOSUM \fInumber\fR matrix (Henikoff and Henikoff 1992) is used. \fInumber\fR=30, 45, 62 or 80. Default: 62 .RE .PP \fB\-\-jtt\fR \fInumber\fR .RS 4 JTT PAM \fInumber\fR (Jones et al. 1992) matrix is used. \fInumber\fR>0. Default: BLOSUM62 .RE .PP \fB\-\-tm\fR \fInumber\fR .RS 4 Transmembrane PAM \fInumber\fR (Jones et al. 1994) matrix is used. \fInumber\fR>0. Default: BLOSUM62 .RE .PP \fB\-\-aamatrix\fR \fImatrixfile\fR .RS 4 Use a user\-defined AA scoring matrix. The format of \fImatrixfile\fR is the same to that of BLAST. Ignored when nucleotide sequences are input. Default: BLOSUM62 .RE .PP \fB\-\-fmodel\fR .RS 4 Incorporate the AA/nuc composition information into the scoring matrix. Default: off .RE .RE .SS "Output" .RS 0 .PP \fB\-\-clustalout\fR .RS 4 Output format: clustal format. Default: off (fasta format) .RE .PP \fB\-\-inputorder\fR .RS 4 Output order: same as input. Default: on .RE .PP \fB\-\-reorder\fR .RS 4 Output order: aligned. Default: off (inputorder) .RE .PP \fB\-\-treeout\fR .RS 4 Guide tree is output to the \fIinput\fR.tree file. Default: off .RE .PP \fB\-\-quiet\fR .RS 4 Do not report progress. Default: off .RE .RE .SS "Input" .RS 0 .PP \fB\-\-nuc\fR .RS 4 Assume the sequences are nucleotide. Default: auto .RE .PP \fB\-\-amino\fR .RS 4 Assume the sequences are amino acid. Default: auto .RE .PP \fB\-\-seed\fR \fIalignment1\fR [\fB--seed\fR \fIalignment2\fR \fB--seed\fR \fIalignment3\fR ...] .RS 4 Seed alignments given in \fIalignment_n\fR (fasta format) are aligned with sequences in \fIinput\fR. The alignment within every seed is preserved. .RE .RE .SH "FILES" .RS 0 .PP Mafft stores the input sequences and other files in a temporary directory, which by default is located in \fI/tmp\fR. .RE .SH "ENVIONMENT" .RS 0 .PP \fBMAFFT_BINARIES\fR .RS 4 Indicates the location of the binary files used by mafft. By default, they are searched in \fI/usr/local/lib/mafft\fR, but on Debian systems, they are searched in \fI/usr/lib/mafft\fR. .RE .PP \fBFASTA_4_MAFFT\fR .RS 4 This variable can be set to indicate to mafft the location to the fasta34 program if it is not in the PATH. .RE .RE .SH "SEE ALSO" .RS 0 .PP \fBmafft\-homologs\fR(1) .RE .SH "REFERENCES" .RS 0 .SS "In English" .sp .RS 4 \h'-04'\(bu\h'+03'Katoh and Toh (Bioinformatics 23:372\-374, 2007) PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences (describes the PartTree algorithm). .RE .sp .RS 4 \h'-04'\(bu\h'+03'Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment (describes [ancestral versions of] the G\-INS\-i, L\-INS\-i and E\-INS\-i strategies) .RE .sp .RS 4 \h'-04'\(bu\h'+03'Katoh, Misawa, Kuma and Miyata (Nucleic Acids Res. 30:3059\-3066, 2002) MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform (describes the FFT\-NS\-1, FFT\-NS\-2 and FFT\-NS\-i strategies) .RE .SS "In Japanese" .sp .RS 4 \h'-04'\(bu\h'+03'Katoh and Misawa (Seibutsubutsuri 46:312\-317, 2006) Multiple Sequence Alignments: the Next Generation .RE .sp .RS 4 \h'-04'\(bu\h'+03'Katoh and Kuma (Kagaku to Seibutsu 44:102\-108, 2006) Jissen\-teki Multiple Alignment .RE .RE .SH "AUTHORS" .RS 0 .PP \fBKazutaka Katoh\fR <\&kazutaka.katoh_at_aist.go.jp\&> .sp -1n .IP "" 4 Wrote Mafft. .PP \fBCharles Plessy\fR <\&charles\-debian\-nospam_at_plessy.org\&> .sp -1n .IP "" 4 Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. .RE .SH "COPYRIGHT" .RS 0 Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) .br Copyright \(co 2007 Charles Plessy (this manpage) .br .PP Mafft and its manpage are offered under the following conditions: .PP Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: .sp .RS 4 \h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. .RE .sp .RS 4 \h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. .RE .sp .RS 4 \h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. .RE .PP THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .br .RE mafft-7.505-without-extensions/core/mafft-distance.c0000644000175000017500000002061314224501721022034 0ustar nileshnilesh#include "mltaln.h" #include "mtxutl.h" #define DEBUG 0 #define TEST 0 #define END_OF_VEC -1 static char outputformat; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define DLENFACA 0.01 #define DLENFACB 2500 #define DLENFACC 2500 #define DLENFACD 0.1 void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; outputformat = 's'; scoremtx = 1; nblosum = 62; dorp = NOTSPECIFIED; nadd = 0; alg = 'X'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi(*++argv); if( nadd == 0 ) { fprintf( stderr, "nadd = %d?\n", nadd ); exit( 1 ); } --argc; goto nextoption; case 'p': outputformat = 'p'; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( inputfile == NULL ) { argc--; inputfile = *argv; fprintf( stderr, "inputfile = %s\n", inputfile ); } if( argc != 0 ) { fprintf( stderr, "Usage: mafft-distance [-PD] [-i inputfile] inputfile > outputfile\n" ); exit( 1 ); } } void seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } static int localcommonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( *pointt == -1 ) return( 0 ); if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; // fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize ); } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } int main( int argc, char **argv ) { int i, j, initj; FILE *infp; char **seq; int *grpseq; char *tmpseq; int **pointt; static char **name; static int *nlen; double *mtxself; double score; static short *table1; double longer, shorter; double lenfac; double bunbo; int norg; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } tmpseq = AllocateCharVec( nlenmax+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); mtxself = AllocateDoubleVec( njob ); pamN = NOTSPECIFIED; name = AllocateCharMtx( njob, B ); nlen = AllocateIntVec( njob ); #if 0 FRead( infp, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); if( nadd ) outputformat = 's'; norg = njob - nadd; if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); else tsize = (int)pow( 6, 6 ); if( dorp == 'd' ) { lenfaca = DLENFACA; lenfacb = DLENFACB; lenfacc = DLENFACC; lenfacd = DLENFACD; } else { lenfaca = PLENFACA; lenfacb = PLENFACB; lenfacc = PLENFACC; lenfacd = PLENFACD; } maxl = 0; for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } fprintf( stderr, "\nCalculating i-i scores ... " ); for( i=0; i nlen[j] ) { longer=(double)nlen[i]; shorter=(double)nlen[j]; } else { longer=(double)nlen[j]; shorter=(double)nlen[i]; } // lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD ); lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0; // fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter ); score = localcommonsextet_p( table1, pointt[j] ); bunbo = MIN( mtxself[i], mtxself[j] ); if( outputformat == 'p' ) { if( bunbo == 0.0 ) fprintf( stdout, " %8.6f", 1.0 ); else fprintf( stdout, " %8.6f", ( 1.0 - score / bunbo ) * lenfac ); if( j % 7 == 6 ) fprintf( stdout, "\n" ); } else { if( bunbo == 0.0 ) fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, 1.0, nlen[i], nlen[j] ); else fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / bunbo ) * lenfac, nlen[i], nlen[j] ); } // fprintf( stderr, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo ); // score = (double)localcommonsextet_p( table1, pointt[j] ); // fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / MIN( mtxself[i], mtxself[j] ) ) * 3, nlen[i], nlen[j] ); } free( table1 ); } fprintf( stderr, "\n" ); if( outputformat == 'p' ) fprintf( stdout, "\n" ); SHOWVERSION; exit( 0 ); } mafft-7.505-without-extensions/core/seq2regtable.c0000644000175000017500000000302114224501721021521 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 char *weboutfile = NULL; void arguments( int argc, char *argv[] ) { int c; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'w': weboutfile = *++argv; fprintf( stderr, "weboutfile = %s\n", weboutfile ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *weboutfp; int nlenmin; int isaligned = 0; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( weboutfile ) { weboutfp = fopen( weboutfile, "w" ); if( !weboutfp ) { fprintf( stderr, "Cannot open %s\n", weboutfile ); exit( 1 ); } } dorp = NOTSPECIFIED; if( weboutfile ) { getnumlen_nogap_outallreg_web( infp, weboutfp, &nlenmin, &isaligned ); if( isaligned ) fprintf( stdout, "Aligned\n" ); else fprintf( stdout, "Not aligned\n" ); } else getnumlen_nogap_outallreg( infp, &nlenmin ); return( 0 ); } mafft-7.505-without-extensions/core/dvtditr.c0000644000175000017500000007456114224501721020642 0ustar nileshnilesh /* Tree-dependent-iteration */ /* Devide to segments */ #include "mltaln.h" extern char **seq_g; extern char **res_g; static int subalignment; static int subalignmentoffset; static int intop; static int intree; static double autosubalignment; static void calcmaxdistclass( void ) { int c; double rep; for( c=0; c 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'K': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", niter ); --argc; goto nextoption; case 'I': niter = myatoi( *++argv ); fprintf( stderr, "niter = %d\n", niter ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); if( penalty_shift_factor < 100.0 && penalty_shift_factor != 2.0 ) { fprintf( stderr, "%f, penalty_shift is fixed to penalty x 2 in the iterative refinement phase.\n", penalty_shift_factor ); penalty_shift_factor = 2.0; } --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': bunkatsu = 0; fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; case 't': randomseed = myatoi( *++argv ); fprintf( stderr, "randomseed = %d\n", randomseed ); --argc; goto nextoption; case 'p': argkey = *++argv; if( !strcmp( argkey, "BESTFIRST" ) ) parallelizationstrategy = BESTFIRST; else if( !strcmp( argkey, "BAATARI0" ) ) parallelizationstrategy = BAATARI0; else if( !strcmp( argkey, "BAATARI1" ) ) parallelizationstrategy = BAATARI1; else if( !strcmp( argkey, "BAATARI2" ) ) parallelizationstrategy = BAATARI2; else { fprintf( stderr, "Unknown parallelization strategy, %s\n", argkey ); exit( 1 ); } // exit( 1 ); --argc; goto nextoption; case 's': specificityconsideration = (double)myatof( *++argv ); // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; #if 0 case 'S' : scoreout = 1; // for checking parallel calculation break; #else case 'S' : spscoreout = 1; // 2014/Dec/30, sp score break; #endif #if 0 case 's' : RNAscoremtx = 'r'; break; #endif #if 1 case 'a': fmodel = 1; break; #endif case 'N': nevermemsave = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'Q': alg = 'Q'; break; #endif case 'R': rnaprediction = 'r'; break; case 'O': fftNoAnchStop = 1; break; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; #endif case 'T': kobetsubunkatsu = 0; break; case 'B': bunkatsu = 0; break; #if 0 case 'c': cooling = 1; break; case 'a': alg = 'a'; break; case 's' : treemethod = 's'; break; case 'H': alg = 'H'; break; #endif case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case '@': alg = 'd'; break; case 'F': use_fft = 1; break; #if 0 case 't': weight = 4; break; #endif case 'u': weight = 0; break; case 'U': intree = 1; break; case 'V': intop = 1; break; case 'J': utree = 0; break; #if 0 case 'd': disp = 1; break; #endif case 'Z': score_check = 0; break; case 'Y': score_check = 2; break; case 'L': legacygapcost = 1; break; #if 0 case 'n' : treemethod = 'n'; break; #endif case 'n' : outnumber = 1; break; case 'X': treemethod = 'X'; sueff_global = atof( *++argv ); fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; #if 0 case 'E' : treemethod = 'E'; break; case 'q' : treemethod = 'q'; break; #endif case 'E': autosubalignment = atof( *++argv ); fprintf( stderr, "autosubalignment = %f\n", autosubalignment ); --argc; goto nextoption; case 'W': minimumweight = atof( *++argv ); fprintf( stderr, "minimumweight = %f\n", minimumweight ); --argc; goto nextoption; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case '=': specifictarget = 1; break; case ':': nwildcard = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options : Check source file!\n" ); exit( 1 ); } #if 0 if( alg == 'A' && weight == 0 ) ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" ); #endif } int main( int argc, char *argv[] ) { int identity; static int nlen[M]; static char **name, **seq, **aseq, **bseq; static Segment *segment = NULL; static int anchors[MAXSEG]; int i, j; int iseg, nseg; int ***topol; double **len; double **eff; FILE *prep; FILE *infp; FILE *orderfp; int alloclen; int returnvalue; char c; int ocut; char **seq_g_bk; LocalHom **localhomtable = NULL; // by D.Mathog RNApair ***singlerna; int nogaplen; static char **nogap1seq; static char *kozoarivec; int nkozo; int alignmentlength; int **skipthisbranch; int foundthebranch; int *reftable; int nsubalignments, maxmem; int **subtable; int *insubtable; int *preservegaps; char ***subalnpt; int ntarget, *targetmap, *targetmapr; int ilim; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( fastathreshold < 0.0001 ) constraint = 0; if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); nkozo = 0; if( njob < 2 ) { seq = AllocateCharMtx( 2, nlenmax*1+1 ); name = AllocateCharMtx( 2, B+1 ); // nlen = AllocateIntVec( 2 ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); initFiles(); gappick0( seq[1], seq[0] ); // writeData_pointer( prep_g, njob, name, nlen, seq+1 ); writeData_pointer( prep_g, njob, name, nlen, seq+1 ); reporterr( "Warning: Only %d sequence found.\n", njob ); FreeCharMtx( seq ); FreeCharMtx( name ); // free( nlen ); closeFiles(); exit( 0 ); } if( nlenmax < 1 ) { seq = AllocateCharMtx( njob, nlenmax*1+1 ); name = AllocateCharMtx( njob, B+1 ); // nlen = AllocateIntVec( 2 ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); initFiles(); for( i=0; i 30000 ) if( nlenmax > 50000 ) // version >= 6.823 { #if 0 if( constraint ) { fprintf( stderr, "\nnlenmax=%d, nagasugi!\n", nlenmax ); exit( 1 ); } if( nevermemsave ) { fprintf( stderr, "\nnevermemsave=1, nlenmax=%d, nagasugi!\n", nlenmax ); exit( 1 ); } #endif if( !constraint && !nevermemsave && alg != 'M' ) { fprintf( stderr, "\nnlenmax=%d, Switching to the memsave mode\n", nlenmax ); alg = 'M'; } } if( specificityconsideration ) calcmaxdistclass(); for( i=0; i 0.0 && subalignment == 0 ) { // reporterr( "Computing skipthisbranch..\n" ); insubtable = AllocateIntVec( njob ); preservegaps = AllocateIntVec( njob ); subtable = calloc( 1, sizeof( char * ) ); subtable[0] = NULL; // for FreeIntMtx for( i=0; i 0 ) { reftable = calloc( sizeof( int ), njob ); for( j=0; j= njob ) { fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 ); fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" ); fprintf( stderr, "#\n" ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); fprintf( stderr, "#\n" ); fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "\n" ); exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou fprintf( stderr, "\n" ); fprintf( stderr, "###############################################################################\n" ); fprintf( stderr, "# ERROR!\n" ); fprintf( stderr, "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 ); fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { fprintf( stderr, "#\n" ); fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); fprintf( stderr, "# In this case, the rule of numbering is:\n" ); fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } fprintf( stderr, "############################################################################### \n" ); fprintf( stderr, "\n" ); exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %d\n\n", skipthisbranch[i][0] ); fprintf( stderr, "group2 = " ); for( j=0; topol[i][1][j] != -1; j++ ) fprintf( stderr, "%d ", topol[i][1][j]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][1] ); } #endif for( i=0; i closeFiles() freeconstants(); devide = 0; writePre( njob, name, nlen, res_g, 1 ); #if 0 writeData( stdout, njob, name, nlen, res_g, 1 ); #endif // 2021/Sep FreeCharMtx( nogap1seq ); FreeCharMtx( seq ); FreeCharMtx( name ); free( seq_g ); // seq_g_bk de free sareteirunode FreeCharMtx( res_g ); FreeCharMtx( aseq ); FreeCharMtx( bseq ); free( segment ); closeFiles(); // 2021/Sep if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, res_g ) ); SHOWVERSION; return( 0 ); } #if 0 signed int main( int argc, char *argv[] ) { int i, nlen[M]; char b[B]; char a[] = "="; int value; gets( b ); njob = atoi( b ); /* scoremtx = 0; if( strstr( b, "ayhoff" ) ) scoremtx = 1; else if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1; else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2; else scoremtx = 0; */ if( strstr( b, "constraint" ) ) cnst = 1; nlenmax = 0; i = 0; while( i nlenmax ) nlenmax = nlen[i]; i++; } } if( nlenmax > N || njob > M ) { fprintf( stderr, "ERROR in main\n" ); exit( 1 ); } /* nlenmax = Na; */ rewind( stdin ); value = main1( nlen, argc, argv ); exit( 0 ); } #endif mafft-7.505-without-extensions/core/Lalignmm.c0000644000175000017500000015367014224501721020721 0ustar nileshnilesh#include "mltaln.h" #include "dp.h" #define MEMSAVE 1 #define DEBUG 0 #define USE_PENALTY_EX 0 #define STOREWM 1 #define DPTANNI 10 #define LOCAL 0 static int reccycle = 0; static double localthr; static void match_ribosum( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { int j, k, l; double scarr[38]; double **cpmxpd = doublework; int **cpmxpdn = intwork; int count = 0; double *matchpt; double **cpmxpdpt; int **cpmxpdnpt; int cpkd; if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { *matchpt = 0.0; for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif } static void match_calc( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; int count = 0; double *matchpt; double **cpmxpdpt; int **cpmxpdnpt; int cpkd; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { *matchpt = 0.0; for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif free( scarr ); } #if 0 static void match_add( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { int j, k, l; double scarr[nalphabets]; double **cpmxpd = doublework; int **cpmxpdn = intwork; int count = 0; double *matchpt; double **cpmxpdpt; int **cpmxpdnpt; int cpkd; if( initialize ) { for( j=0; j -1 ) *fpt2 += scarr[*ipt++] * *fpt++; fpt2++,iptpt++,fptpt++; } } for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k]; } #else matchpt = match; cpmxpdnpt = cpmxpdn; cpmxpdpt = cpmxpd; while( lgth2-- ) { // *matchpt = 0.0; // add dakara for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ ) *matchpt += scarr[cpkd] * (*cpmxpdpt)[k]; matchpt++; cpmxpdnpt++; cpmxpdpt++; } #endif } #endif #if 0 static double Atracking( char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int icyc, int jcyc, int ist, int ien, int jst, int jen ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, klim; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = ien-ist+1; lgth2 = jen-jst+1; gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i", wm ); #endif g = mi + fgcp2[j-1]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } // for( j=0; j=imid; i-- ) firstm = -9999999.9; firstmp = lgth1-1; for( i=lgth1-2; i>-1; i-- ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; // match_calc( currentw, seq1, seq2, i, lgth2 ); match_ribosum( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 ); currentw[lgth2-1] = initverticalw[i]; // m[lgth2] = fgcp1[i]; // WMMTX2[i][lgth2] += m[lgth2]; // fprintf( stderr, "m[] = %f\n", m[lgth2] ); mi = previousw[lgth2-1] + fgcp2[lgth2-2]; // mi = previousw[lgth2-1]; mpi = lgth2 - 1; mjpt = m + lgth2 - 2; prept = previousw + lgth2 - 1; curpt = currentw + lgth2 - 2; mpjpt = mp + lgth2 - 2; for( j=lgth2-2; j>-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j] += *curpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM // WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if 0 fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif #if STOREWM // break; #else break; #endif } } #if 0 jumpi=0; jumpj=0; imid=lgth1-1; jmid=lgth2-1; } #endif // fprintf( stderr, "imid = %d, but jumpi = %d\n", imid, jumpi ); // fprintf( stderr, "jmid = %d, but jumpj = %d\n", jmid, jumpj ); // for( j=0; j amino_dis['a']['g'] -1 ) fprintf( stdout, "%d %d %8.1f", i, j, WMMTX[i][j] ); if( WMMTX[i][j] == maxwm ) fprintf( stdout, "selected \n" ); else fprintf( stdout, "\n" ); } fprintf( stdout, "\n" ); } #endif #if 0 fprintf( stderr, "jumpbacki = \n" ); for( j=0; j N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( midw ); FreeFloatVec( midm ); FreeFloatVec( midn ); FreeIntVec( jumpbacki ); FreeIntVec( jumpbackj ); FreeIntVec( jumpforwi ); FreeIntVec( jumpforwj ); FreeIntVec( jumpdummi ); FreeIntVec( jumpdummj ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); #if STOREWM FreeFloatMtx( WMMTX ); FreeFloatMtx( WMMTX2 ); #endif return( value ); } static double MSalignmm_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, double **cpmx1, double **cpmx2, int ist, int ien, int jst, int jen, int alloclen, char **mseq1, char **mseq2, int depth, double **gapinfo, double **map ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { double value = 0.0; register int i, j; char **aseq1, **aseq2; int ll1, ll2; int lasti, lastj, imid, jmid=0; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; #if USE_PENALTY_EX double fpenalty_ex = (double)RNApenalty_ex; #endif // double fpenalty = (double)penalty; double *wtmp; // short *ijppt; int *mpjpt; // short **ijp; int *mp; int mpi; double *mjpt, *prept, *curpt; double mi; double *m; double *w1, *w2; // double *match; double *initverticalw; /* kufuu sureba iranai */ double *lastverticalw; /* kufuu sureba iranai */ int **intwork; double **doublework; // short **shortmtx; #if STOREWM double **WMMTX; double **WMMTX2; #endif double *midw; double *midm; double *midn; int lgth1, lgth2; double maxwm = 0.0; int *jumpforwi; int *jumpforwj; int *jumpbacki; int *jumpbackj; int *jumpdummi; //muda int *jumpdummj; //muda int jumpi, jumpj = 0; char *gaps; int ijpi, ijpj; double *ogcp1; double *fgcp1; double *ogcp2; double *fgcp2; double firstm; int firstmp; #if 0 static char ttt1[50000]; static char ttt2[50000]; #endif localthr = -offset + 500; // 0? ogcp1 = gapinfo[0] + ist; fgcp1 = gapinfo[1] + ist; ogcp2 = gapinfo[2] + jst; fgcp2 = gapinfo[3] + jst; depth++; reccycle++; lgth1 = ien-ist+1; lgth2 = jen-jst+1; // if( lgth1 < 5 ) // fprintf( stderr, "\nWARNING: lgth1 = %d\n", lgth1 ); // if( lgth2 < 5 ) // fprintf( stderr, "\nWARNING: lgth2 = %d\n", lgth2 ); // #if 0 fprintf( stderr, "==== MSalign (depth=%d, reccycle=%d), ist=%d, ien=%d, jst=%d, jen=%d\n", depth, reccycle, ist, ien, jst, jen ); strncpy( ttt1, seq1[0]+ist, lgth1 ); strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt1[lgth1] = 0; ttt2[lgth2] = 0; fprintf( stderr, "seq1 = %s\n", ttt1 ); fprintf( stderr, "seq2 = %s\n", ttt2 ); #endif if( lgth2 <= 0 ) // lgth1 <= 0 ha? { // fprintf( stderr, "\n\n==== jimei\n\n" ); // exit( 1 ); for( i=0; i", wm ); #endif g = mi + fgcp2[j-1]; // g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = -( j - mpi ); } g = *prept + ogcp2[j]; // g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fgcp1[i-1]; // g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijppt = +( i - *mpjpt ); } g = *prept + ogcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; #if STOREWM WMMTX[i][j] = *curpt; WMMTX2[i][j] = *mjpt; #endif if( i == imid ) //muda { jumpbackj[j] = *mpjpt; // muda atode matomeru jumpbacki[j] = mpi; // muda atode matomeru // fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt ); // fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi ); midw[j] = *curpt; midm[j] = *mjpt; midn[j] = mi; } // fprintf( stderr, "m[%d] = %f\n", j, m[j] ); mjpt++; prept++; mpjpt++; curpt++; } lastverticalw[i] = currentw[lgth2-1]; #if STOREWM WMMTX2[i][lgth2] = m[lgth2-1]; #endif #if 0 // ue if( i == imid ) { for( j=0; j0; --j ) { m[j-1] = currentw[j] + fgcp2[lgth2-2]; // m[j-1] = currentw[j]; mp[j] = lgth1-1; } // for( j=0; j=imid; i-- ) firstm = -9999999.9; firstmp = lgth1-1; for( i=lgth1-2; i>-1; i-- ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; // match_calc( currentw, seq1, seq2, i, lgth2 ); match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 ); currentw[lgth2-1] = initverticalw[i]; // m[lgth2] = fgcp1[i]; // WMMTX2[i][lgth2] += m[lgth2]; // fprintf( stderr, "m[] = %f\n", m[lgth2] ); mi = previousw[lgth2-1] + fgcp2[lgth2-2]; // mi = previousw[lgth2-1]; mpi = lgth2 - 1; mjpt = m + lgth2 - 2; prept = previousw + lgth2 - 1; curpt = currentw + lgth2 - 2; mpjpt = mp + lgth2 - 2; for( j=lgth2-2; j>-1; j-- ) { wm = *prept; ijpi = i+1; ijpj = j+1; g = mi + ogcp2[j+1]; // g = mi + fpenalty; if( g > wm ) { wm = g; ijpj = mpi; ijpi = i+1; } g = *prept + fgcp2[j]; // g = *prept; if( g >= mi ) { // fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 ); mi = g; mpi = j + 1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt ); g = *mjpt + ogcp1[i+1]; // g = *mjpt + fpenalty; if( g > wm ) { wm = g; ijpi = *mpjpt; ijpj = j+1; } // if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j ); g = *prept + fgcp1[i]; // g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i + 1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( i == jumpi || i == imid - 1 ) { jumpforwi[j] = ijpi; //muda jumpforwj[j] = ijpj; //muda // fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi ); // fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj ); } if( i == imid ) // muda { midw[j] += wm; // midm[j+1] += *mjpt + fpenalty; //?????? midm[j+1] += *mjpt; //?????? } if( i == imid - 1 ) { // midn[j] += mi + fpenalty; //???? midn[j] += mi; //???? } #if LOCAL if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); wm = 0; } #endif #if STOREWM WMMTX[i][j] += wm; // WMMTX2[i][j+1] += *mjpt + fpenalty; WMMTX2[i][j] += *curpt; #endif *curpt += wm; mjpt--; prept--; mpjpt--; curpt--; } // fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 ); g = *prept + fgcp1[i]; if( firstm < g ) { firstm = g; firstmp = i + 1; } #if STOREWM // WMMTX2[i][j+1] += firstm; #endif if( i == imid ) midm[j+1] += firstm; if( i == imid - 1 ) { maxwm = midw[1]; jmid = 0; // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); for( j=2; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } for( j=0; j maxwm ) { jmid = j; maxwm = wm; } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); } // if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm ); // fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid ); wm = midw[jmid]; jumpi = imid-1; jumpj = jmid-1; if( jmid > 0 && midn[jmid-1] > wm ) //060413 { jumpi = imid-1; jumpj = jumpbacki[jmid]; wm = midn[jmid-1]; // fprintf( stderr, "rejump (n)\n" ); } if( midm[jmid] > wm ) { jumpi = jumpbackj[jmid]; jumpj = jmid-1; wm = midm[jmid]; // fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi ); } // fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid ); // fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj ); #if 0 fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "midn = \n" ); for( j=0; j= lgth2 ) { // fprintf( stderr, "CHUI1!\n" ); jumpi=imid-1; jmid=lgth2; jumpj = lgth2-1; } #endif else { imid = jumpforwi[jumpj]; jmid = jumpforwj[jumpj]; } #if 0 fprintf( stderr, "jumpi -> %d\n", jumpi ); fprintf( stderr, "jumpj -> %d\n", jumpj ); fprintf( stderr, "imid -> %d\n", imid ); fprintf( stderr, "jmid -> %d\n", jmid ); #endif #if STOREWM // break; #else break; #endif } } #if 0 jumpi=0; jumpj=0; imid=lgth1-1; jmid=lgth2-1; } #endif // fprintf( stderr, "imid = %d, but jumpi = %d\n", imid, jumpi ); // fprintf( stderr, "jmid = %d, but jumpj = %d\n", jmid, jumpj ); // for( j=0; j amino_dis['a']['g'] -1 ) fprintf( stdout, "%d %d %8.1f", i, j, WMMTX[i][j] ); if( WMMTX[i][j] == maxwm ) fprintf( stdout, "selected \n" ); else fprintf( stdout, "\n" ); } fprintf( stdout, "\n" ); } exit( 1 ); #endif #if 0 fprintf( stderr, "jumpbacki = \n" ); for( j=0; j N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } #endif #if 0 fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid ); fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid ); fprintf( stderr, "imid = %d\n", imid ); fprintf( stderr, "jmid = %d\n", jmid ); #endif FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( midw ); FreeFloatVec( midm ); FreeFloatVec( midn ); FreeIntVec( jumpbacki ); FreeIntVec( jumpbackj ); FreeIntVec( jumpforwi ); FreeIntVec( jumpforwj ); FreeIntVec( jumpdummi ); FreeIntVec( jumpdummj ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); #if STOREWM FreeFloatMtx( WMMTX ); FreeFloatMtx( WMMTX2 ); #endif free( gaps ); #if MEMSAVE free( aseq1 ); free( aseq2 ); #else FreeCharMtx( aseq1 ); FreeCharMtx( aseq2 ); #endif return( value ); } double Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, double **map ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; int i, j; int ll1, ll2; int lgth1, lgth2; double wm = 0.0; /* int ?????? */ char **mseq1; char **mseq2; // char **mseq; double *ogcp1; double *ogcp2; double *fgcp1; double *fgcp2; double **cpmx1; double **cpmx2; double **gapinfo; // double fpenalty; double fpenalty = (double)RNApenalty; int nglen1, nglen2; #if 0 fprintf( stderr, "eff in SA+++align\n" ); for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, mseq1, mseq2, 0, gapinfo, map ); #if DEBUG fprintf( stderr, " seq1[0] = %s\n", seq1[0] ); fprintf( stderr, " seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] ); #endif // fprintf( stderr, "wm = %f\n", wm ); #if 0 for( i=0; i%d of GROUP1\n", i ); fprintf( stdout, "%s\n", seq1[i] ); } for( i=0; i%d of GROUP2\n", i ); fprintf( stdout, "%s\n", seq2[i] ); } fflush( stdout ); #endif wm = MSalign2m2m_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, mseq1, mseq2, 0, gapinfo, map ); #if DEBUG fprintf( stderr, " seq1[0] = %s\n", seq1[0] ); fprintf( stderr, " seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] ); fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] ); #endif // fprintf( stderr, "wm = %f\n", wm ); #if 0 for( i=0; i all positive double ribosum4[4][4] = { // a g c t { 2.22, -1.46, -1.86, -1.39, }, // a { -1.46, 1.03, -2.48, -1.74, }, // g { -1.86, -2.48, 1.16, -1.05, }, // c { -1.39, -1.74, -1.05, 1.65, }, // t }; double ribosum16[16][16] = { // aa ag ac at ga gg gc gt ca cg cc ct ta tg tc tt { -2.49, -8.24, -7.04, -4.32, -6.86, -8.39, -5.03, -5.84, -8.84, -4.68, -14.37, -12.64, -4.01, -6.16, -11.32, -9.05, }, // aa { -8.24, -0.80, -8.89, -5.13, -8.61, -5.38, -5.77, -6.60, -10.41, -4.57, -14.53, -10.14, -5.43, -5.94, -8.87, -11.07, }, // ag { -7.04, -8.89, -2.11, -2.04, -9.73, -11.05, -3.81, -4.72, -9.37, -5.86, -9.08, -10.45, -5.33, -6.93, -8.67, -7.83, }, // ac { -4.32, -5.13, -2.04, 4.49, -5.33, -5.61, 2.70, 0.59, -5.56, 1.67, -6.71, -5.17, 1.61, -0.51, -4.81, -2.98, }, // at { -6.86, -8.61, -9.73, -5.33, -1.05, -8.67, -4.88, -6.10, -7.98, -6.00, -12.43, -7.71, -5.85, -7.55, -6.63, -11.54, }, // ga { -8.39, -5.38, -11.05, -5.61, -8.67, -1.98, -4.13, -5.77, -11.36, -4.66, -12.58, -13.69, -5.75, -4.27, -12.01, -10.79, }, // gg { -5.03, -5.77, -3.81, 2.70, -4.88, -4.13, 5.62, 1.21, -5.95, 2.11, -3.70, -5.84, 1.60, -0.08, -4.49, -3.90, }, // gc { -5.84, -6.60, -4.72, 0.59, -6.10, -5.77, 1.21, 3.47, -7.93, -0.27, -7.88, -5.61, -0.57, -2.09, -5.30, -4.45, }, // gt { -8.84, -10.41, -9.37, -5.56, -7.98, -11.36, -5.95, -7.93, -5.13, -3.57, -10.45, -8.49, -2.42, -5.63, -7.08, -8.39, }, // ca { -4.68, -4.57, -5.86, 1.67, -6.00, -4.66, 2.11, -0.27, -3.57, 5.36, -5.71, -4.96, 2.75, 1.32, -4.91, -3.67, }, // cg { -14.37, -14.53, -9.08, -6.71, -12.43, -12.58, -3.70, -7.88, -10.45, -5.71, -3.59, -5.77, -6.88, -8.41, -7.40, -5.41, }, // cc { -12.64, -10.14, -10.45, -5.17, -7.71, -13.69, -5.84, -5.61, -8.49, -4.96, -5.77, -2.28, -4.72, -7.36, -3.83, -5.21, }, // ct { -4.01, -5.43, -5.33, 1.61, -5.85, -5.75, 1.60, -0.57, -2.42, 2.75, -6.88, -4.72, 4.97, 1.14, -2.98, -3.39, }, // ta { -6.16, -5.94, -6.93, -0.51, -7.55, -4.27, -0.08, -2.09, -5.63, 1.32, -8.41, -7.36, 1.14, 3.36, -4.76, -4.28, }, // tg { -11.32, -8.87, -8.67, -4.81, -6.63, -12.01, -4.49, -5.30, -7.08, -4.91, -7.40, -3.83, -2.98, -4.76, -3.21, -5.97, }, // tc { -9.05, -11.07, -7.83, -2.98, -11.54, -10.79, -3.90, -4.45, -8.39, -3.67, -5.41, -5.21, -3.39, -4.28, -5.97, -0.02, }, // tt }; int locpenaltyn = -1750; char locaminon[] = "agctuAGCTUnNbdhkmnrsvwyx-O"; char locgrpn[] = { 0, 1, 2, 3, 3, 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; int exgpn = +00; int locn_disn[26][26] = /* u ha constants.c no nakade shori */ /* 0 - 4 dake yomareru. */ { { 1000, 600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 600, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 1000, 600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 600, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 500, 500, 0, 0, 0, 500, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -500, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, -500, 0, 500, }, }; mafft-7.505-without-extensions/core/miyata5.h0000644000175000017500000001103314224501721020521 0ustar nileshnileshint locpenaltym = -1440; int exgpm = +0; /* != 0 nisuruto kowareru. exgp ha constants.c de kurikomu */ char locaminom[] = "ARNDCQEGHILKMFPSTWYVBZX.-J"; char locgrpm[] = { 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, 6, 6, 6, 1, }; int locn_dism[26][26] = { { 600, -235, 91, -78, 202, 51, -103, 340, -21, -169, -189, -246, -92, -323, 582, 454, 342, -400, -309, 71, 7, -26, -15, -400, 0,-1400, }, { -235, 600, 17, -69, -275, 277, 185, -400, 365, -112, -149, 485, -55, -106, -229, -183, 20, -178, 22, -95, -26, 231, -15, -400, 0,-1400, }, { 91, 17, 600, 414, -209, 317, 357, 39, 231, -363, -398, 74, -280, -400, 85, 225, 200, -400, -378, -189, 507, 337, -15, -400, 0,-1400, }, { -78, -69, 414, 600, -395, 179, 342, -78, 108, -400, -400, 14, -400, -400, -86, 65, 14, -400, -400, -372, 507, 261, -15, -400, 0,-1400, }, { 202, -275, -209, -395, 600, -109, -332, -35, -132, 134, 128, -335, 182, -40, 220, 74, 185, -355, -81, 354, -302, -220, -15, -400, 0,-1400, }, { 51, 277, 317, 179, -109, 600, 360, -109, 508, -135, -172, 297, -58, -203, 51, 128, 280, -378, -109, -9, 248, 480, -15, -400, 0,-1400, }, { -103, 185, 357, 342, -332, 360, 600, -195, 325, -369, -400, 274, -295, -400, -109, 11, 77, -400, -321, -249, 350, 480, -15, -400, 0,-1400, }, { 340, -400, 39, -78, -35, -109, -195, 600, -195, -400, -400, -400, -355, -400, 322, 357, 114, -400, -400, -189, -19, -152, -15, -400, 0,-1400, }, { -21, 365, 231, 108, -132, 508, 325, -195, 600, -100, -141, 374, -26, -152, -15, 45, 222, -303, -49, -3, 169, 417, -15, -400, 0,-1400, }, { -169, -112, -363, -400, 134, -135, -369, -400, -100, 600, 560, -212, 517, 425, -149, -243, -12, 108, 354, 357, -400, -252, -15, -400, 0,-1400, }, { -189, -149, -398, -400, 128, -172, -400, -400, -141, 560, 600, -252, 482, 420, -172, -269, -43, 105, 331, 340, -400, -290, -15, -400, 0,-1400, }, { -246, 485, 74, 14, -335, 297, 274, -400, 374, -212, -252, 600, -152, -215, -240, -175, -1, -289, -92, -172, 44, 285, -15, -400, 0,-1400, }, { -92, -55, -280, -400, 182, -58, -295, -355, -26, 517, 482, -152, 600, 365, -75, -163, 68, 59, 334, 422, -368, -176, -15, -400, 0,-1400, }, { -323, -106, -400, -400, -40, -203, -400, -400, -152, 425, 420, -215, 365, 600, -306, -386, -143, 282, 462, 191, -400, -315, -15, -400, 0,-1400, }, { 582, -229, 85, -86, 220, 51, -109, 322, -15, -149, -172, -240, -75, -306, 600, 440, 351, -400, -292, 88, 0, -29, -15, -400, 0,-1400, }, { 454, -183, 225, 65, 74, 128, 11, 357, 45, -243, -269, -175, -163, -386, 440, 600, 345, -400, -352, -15, 145, 70, -15, -400, 0,-1400, }, { 342, 20, 200, 14, 185, 280, 77, 114, 222, -12, -43, -1, 68, -143, 351, 345, 600, -400, -100, 194, 107, 178, -15, -400, 0,-1400, }, { -400, -178, -400, -400, -355, -378, -400, -400, -303, 108, 105, -289, 59, 282, -400, -400, -400, 600, 297, -118, -400, -400, -15, -400, 0,-1400, }, { -309, 22, -378, -400, -81, -109, -321, -400, -49, 354, 331, -92, 334, 462, -292, -352, -100, 297, 600, 165, -400, -215, -15, -400, 0,-1400, }, { 71, -95, -189, -372, 354, -9, -249, -189, -3, 357, 340, -172, 422, 191, 88, -15, 194, -118, 165, 600, -280, -129, -15, -400, 0,-1400, }, { 7, -26, 507, 507, -302, 248, 350, -19, 169, -400, -400, 44, -368, -400, 0, 145, 107, -400, -400, -280, 507, 299, -400, -400, 0,-1400, }, { -26, 231, 337, 261, -220, 480, 480, -152, 417, -252, -290, 285, -176, -315, -29, 70, 178, -400, -215, -129, 299, 480, -400, -400, 0,-1400, }, { -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -400, -400, -400, -400, 0,-1400, }, { -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, 0,-1400, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, { -1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400, -1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400, -1400,-1400,-1400,-1400, 0, 1600, }, }; mafft-7.505-without-extensions/core/addfunctions.c0000644000175000017500000015435114224501721021637 0ustar nileshnilesh#include "mltaln.h" static void strncpy0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } #if 0 static void strncpy0x( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0b0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0b1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0b2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0n0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0n1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0n2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0a0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0a1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0a2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0o0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0o1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static void strncpy0o2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } #endif static void eqpick( char *aseq, char *seq ) { for( ; *seq != 0; seq++ ) { if( *seq != '=' ) *aseq++ = *seq; } *aseq = 0; } void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap { int i, newlen; double *effarr0, *effarr2; int *allgap0, *allgap2; double dumdb; int alcount0, alcount2; if( aln0[0][1] == 0 && aln2[0][1] == 0 ) return; // --allowshift no tokiha... // reporterr( "profilealignment!\n" ); commongappick( n0, aln0 ); commongappick( n2, aln2 ); effarr0 = AllocateDoubleVec( n0 ); effarr2 = AllocateDoubleVec( n2 ); allgap0 = AllocateIntVec( n0 ); allgap2 = AllocateIntVec( n2 ); #if 1 // new weight 2015/Jun alcount0 = 0; for( i=0; ialn0[%d] = \n%s\n", i, aln0[i] ); for( i=0; ialn1[%d] = \n%s\n", i, aln1[i] ); for( i=0; ialn2[%d] = \n%s\n", i, aln2[i] ); #endif free( effarr0 ); free( effarr2 ); free( allgap0 ); free( allgap2 ); } static void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap { int i, j, newlen; double *effarr0 = NULL, *effarr2 = NULL; int *allgap0 = NULL, *allgap2 = NULL; double dumdb; int alcount0, alcount2; char *cptr; // effarr0 = AllocateDoubleVec( n0 ); // effarr2 = AllocateDoubleVec( n2 ); // allgap0 = AllocateIntVec( n0 ); // allgap2 = AllocateIntVec( n2 ); // if( aln0[0][1] == 0 && aln2[0][1] == 0 ) return; // --allowshift no tokiha... // reporterr( "In profilealignment(), strlen( aln0[0] ) %d\n", strlen( aln0[0] ) ); // reporterr( "In profilealignment(), strlen( aln2[0] ) %d\n", strlen( aln2[0] ) ); commongappick( n0, aln0 ); commongappick( n2, aln2 ); // reporterr( "after commongappick, strlen( aln0[0] ) %d\n", strlen( aln0[0] ) ); // reporterr( "after commongappick, strlen( aln2[0] ) %d\n", strlen( aln2[0] ) ); // reporterr( "\n\n\n" ); if( aln2[0][0] == 0 ) { newlen = j = strlen( aln0[0] ); cptr = aln2[0]; while( j-- ) *cptr++ = '-'; *cptr = 0; cptr = aln2[0]; for( i=1; ialn0[%d] = %s\n", i, aln0[i] ); for( i=0; ialn1[%d] = %s\n", i, aln1[i] ); for( i=0; ialn2[%d] = %s\n", i, aln2[i] ); #endif #if 0 fprintf( stderr, "in profilealignment, after commongappick\n" ); for( i=0; ialn0[%d] = %s\n", i, aln0[i] ); for( i=0; ialn1[%d] = %s\n", i, aln1[i] ); for( i=0; ialn2[%d] = %s\n", i, aln2[i] ); #endif free( effarr0 ); free( effarr2 ); free( allgap0 ); free( allgap2 ); } void eq2dashmatomete( char **s, int n ) { int i, j; char sj; for( j=0; (sj=s[0][j]); j++ ) { if( sj == '=' ) { for( i=0; i0; i-- ) // break ari no baai, migihajiha saigo { if( ref[i-1] == '+' && ( ref[i] != '+' && ref[i] != '=' ) && ref[i+1] == '=' ) { // reporterr( "hit! i=%d, len=%d\n", i, len ); hit = realloc( hit, (nhit+1) * sizeof( int ) ); hit[nhit] = i; nhit += 1; // break; } } if( nhit == 0 ) return( 0 ); for( k=0; k 1 ) exit( 1 ); return( val ); } static int smoothing1leftmulti( int len, char *ref ) // osoi! { int i, j, k; int shiftfrom = -1; int shiftto = -1; int *hit; int val = 0, nhit = 0; hit = NULL; // reporterr( "ref (1leftmulti) = %s\n", ref ); for( i=1, nhit=0; i0; i-- ) // break ari no baai, migihajiha saigo { if( ref[i-1] == '=' && ( ref[i] != '+' && ref[i] != '=' ) && ref[i+1] == '+' ) { // reporterr( "hit! i=%d, len=%d\n", i, len ); hit = realloc( hit, (nhit+1) * sizeof( int ) ); hit[nhit] = i; nhit += 1; // break; } } if( nhit == 0 ) return( 0 ); for( k=0; k-1; j-- ) { if( ref[j] != '=' ) { shiftto = j+1; break; } } if( j == -1 && ref[0] == '=' ) { reporterr( "hit[i].end = %d, j = -1, skip!\n" ); continue; } if( shiftto > 0 && ref[shiftto-1] == '+' ) continue; // muda dakara val += 1; shiftfrom = hit[k]; if( ref[shiftto] != '=' ) // atode sakujo { reporterr( "Error in smoothing1left!\n" ); exit( 1 ); } ref[shiftto] = ref[shiftfrom]; ref[shiftfrom] = '='; } free( hit ); // reporterr( "ref (1leftmulti) = %s\n", ref ); reporterr( " %d out of %d have been smoothed (left).\n", val, nhit ); // if( nhit > 1 ) exit( 1 ); return( val ); } void restorecommongapssmoothly( int njob, int n0, char **seq, int *ex1, int *ex2, int *gapmap, int alloclen, char gapchar ) { int *mem; char *tmpseq; char *cptr; int *iptr; int *tmpgapmap; int i, j, k, len, rep1, rep2, len1, klim, leninserted; int totalres; if( n0 == 0 ) return; mem = calloc( njob+1, sizeof( int ) ); // +1 ha iranai. intcpy( mem, ex1 ); intcat( mem, ex2 ); // tmpseq = calloc( alloclen+2, sizeof( char ) ); // tmpgapmap = calloc( alloclen+2, sizeof( int ) ); #if 0 // iranai for( i=0; (k=mem[i])!=-1; i++ ) // iranai reporterr( "mem[%d] = %d\n", i, k ); // iranai if( i == njob ) // iranai { fprintf( stderr, "Error in restorecommongaps()\n" ); free( mem ); exit( 1 ); } #endif rep1 = ex1[0]; rep2 = ex2[0]; len = strlen( seq[rep1] ); len1 = len+1; tmpseq = calloc( alloclen, sizeof( char ) ); tmpgapmap = calloc( alloclen, sizeof( int ) ); #if 0 reporterr( "\n" ); reporterr( "seq[rep1] = %s\n", seq[rep1] ); reporterr( "seq[rep2] = %s\n", seq[rep2] ); #endif for( k=0; (i=mem[k])!=-1; k++ ) { cptr = tmpseq; for( j=0; j\n" ); reporterr( "seq[rep1] = \n%s\n", seq[rep1] ); reporterr( "seq[rep2] = \n%s\n", seq[rep2] ); #endif leninserted = strlen( seq[rep1] ); #if 0 reporterr( "gapmap =\n" ); for(j=0; j0; i-- ) reporterr( "-" ); } reporterr( "\n" ); #endif #if 0 resprev = 10000; // tekitou while( 1 ) { res = 0; // reporterr( "\nsmoothing1right..\n" ); res = (0= resprev ) break; // if( res == 0 ) break; resprev = res; } #else totalres = 0; totalres += smoothing1rightmulti( leninserted, seq[rep1] ); totalres += smoothing1leftmulti( leninserted, seq[rep1] ); if( totalres ) reflectsmoothing( seq[rep1], ex1, seq, leninserted ); totalres = 0; totalres += smoothing1rightmulti( leninserted, seq[rep2] ); totalres += smoothing1leftmulti( leninserted, seq[rep2] ); if( totalres ) reflectsmoothing( seq[rep2], ex2, seq, leninserted ); #endif for( k=0; (i=mem[k])!=-1; k++ ) plus2gapchar( seq[i], gapchar ); #if 0 reporterr( "->\n" ); reporterr( "seq[rep1] = \n%s\n", seq[rep1] ); reporterr( "seq[rep2] = \n%s\n", seq[rep2] ); reporterr( "gapmap =\n" ); for(j=0; j0; i-- ) reporterr( "-" ); } reporterr( "\n" ); #endif iptr = tmpgapmap; for( j=0; j _ no tame fprintf( fp, ">%s\n", nameptr ); fprintf( fp, "# letter, position in the original sequence, position in the reference alignment\n" ); #if 0 // reporterr( "addbk[%d] = %s\n", i, addbk[i] ); for( j=0; (p=deletelist[i][j])!=-1; j++ ) { // reporterr( "deleting %d, %c\n", p, addbk[i][p] ); gapped[p] = '-'; } #else // reporterr( "addbk[%d] = %s\n", i, addbk[i] ); for( j=0; (p=deletelist[i][j].pos)!=-1; j++ ) { // reporterr( "deleting %d, %c\n", p, addbk[i][p] ); gaplen = deletelist[i][j].len; while( gaplen-- ) gapped[p++] = '-'; } #endif // reporterr( "addbk = %s\n", addbk[i] ); // reporterr( "gapped = %s\n", gapped ); for( j=0,p=0; j Position in reference\n" ); for( i=0; i _ no tame status = 0; #if 0 // reporterr( "addbk[%d] = %s\n", i, addbk[i] ); for( j=0; (p=deletelist[i][j])!=-1; j++ ) { // reporterr( "deleting %d, %c\n", p, addbk[i][p] ); gapped[p] = '-'; status = 1; } #else // reporterr( "addbk[%d] = %s\n", i, addbk[i] ); for( j=0; (p=deletelist[i][j].pos)!=-1; j++ ) { // reporterr( "deleting %d-%d, %c\n", p, p+deletelist[i][j].len, addbk[i][p] ); gaplen = deletelist[i][j].len; while( gaplen-- ) gapped[p++] = '-'; // origin??????????? 2022/Jan status = 1; } #endif // reporterr( "addbk = %s\n", addbk[i] ); // reporterr( "gapped = %s\n", gapped ); if( status == 0 ) { free( gapped ); continue; } fprintf( fp, ">%s\n", nameptr ); status = -1; #if MODIFYNAME insstr[0] = 0; nins = 0; #endif for( j=0,p=0; j %dv%d\n", j, addbk[i][j-1], p, p+1 ); // 1origin #if MODIFYNAME if( nins == 1 ) sprintf( insstr+strlen(insstr), "%d%c,", j, addbk[i][j-1] ); #endif } status = 0; // fprintf( fp, "%c, %d, %d\n", addbk[i][j], j+1, p+1 ); // 1origin p++; } } if( status == 1 ) { fprintf( fp, "%d%c > %dv%d\n", j, addbk[i][j-1], p, p+1 ); // 1origin #if MODIFYNAME if( nins == 1 ) sprintf( insstr+strlen(insstr), "%d%c,", j, addbk[i][j-1] ); #endif } free( gapped ); #if MODIFYNAME insstr[strlen(insstr)-1] = 0; strcpy( newname, name[i] ); sprintf( newname+(nameptr-name[i]), "%dins:%s|%s", nins, insstr, nameptr ); newname[B] = 0; strcpy( name[i], newname ); #endif } #if MODIFYNAME free( newname ); free( insstr ); #endif } mafft-7.505-without-extensions/core/univscript.tmpl0000644000175000017500000000243014224501721022104 0ustar nileshnileshprogs="_PROGS" progs="$progs dash_client" for prog in $progs; do printf $prog" " done make clean make CC="/opt/homebrew-x86_64/bin/gcc-11" CFLAGS="-O3 -arch x86_64 -mmacosx-version-min=10.9 -DMACOSX_DEPLOYMENT_TARGET=10.9" LIBS="-static-libgcc -lm -lpthread" ENABLE_MULTITHREAD="-Denablemultithread" rm -f dash_client env CC='/opt/homebrew-x86_64/bin/gcc-11 -static-libgcc -O3 -arch x86_64' GOARCH=amd64 GOOS=darwin CGO_ENABLED=0 /opt/homebrew-x86_64/bin/go build --ldflags '-extldflags "-static"' dash_client.go # uwagaki for prog in $progs; do mv $prog $prog.x86_64 done make clean make CC="/opt/homebrew/bin/gcc-11" CFLAGS="-static-libgcc -O3 -arch arm64 -mmacosx-version-min=11.1 -DMACOSX_DEPLOYMENT_TARGET=11.1" LIBS="-lm -lpthread" ENABLE_MULTITHREAD="-Denablemultithread" rm -f dash_client env CC='/opt/homebrew/bin/gcc-11 -static-libgcc -O3 -arch arm64' GOARCH=arm64 GOOS=darwin CGO_ENABLED=0 /opt/homebrew/bin/go build --ldflags '-extldflags "-static"' dash_client.go # uwagaki for prog in $progs; do mv $prog $prog.arm64 done set $progs for prog in $progs; do # lipo -create $prog.icc $prog.ppc32 $prog.ppc64 -output $prog # lipo -create $prog.intel64 $prog.intel32 $prog.ppc32 $prog.ppc64 -output $prog lipo -create $prog.x86_64 $prog.arm64 -output $prog cp $prog ../binaries done mafft-7.505-without-extensions/core/mafft-sparsecore.tmpl0000644000175000017500000001714114224501721023144 0ustar nileshnilesh#! /usr/bin/env ruby require 'optparse' mafftpath = "_BINDIR/mafft" def cleartempfiles( filenames ) for f in filenames system( "rm -rf #{f}" ) end end seed = 0 scand = "50%" npick = 500 infn = "" reorderoption = "--reorder" pickoptions = " --retree 1 " coreoptions = " --globalpair --maxiterate 100 " corelastarg = " " addoptions = " " directionoptions = " --retree 0 --pileup " markcore = "" randompickup = true outnum = false begin params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:') rescue => e STDERR.puts e STDERR.puts "See #{$0} -h" exit 1 end #p params mafftpath = params["m"] if params["m"] seed = params["s"].to_i if params["s"] scand = params["n"].to_s if params["n"] npick = params["p"].to_i if params["p"] infn = params["i"] if params["i"] #pickoptions += params["P"] if params["P"] coreoptions += params["C"] if params["C"] # tsuikagaki! corelastarg += params["L"] if params["L"] # tsuikagaki! addoptions += params["A"] if params["A"] directionoptions += params["D"] if params["D"] # tsuikagaki markcore = "*" if params["M"] #randompickup = false if params["S"] reorderoption = "" if params["o"] =~ /^i/ outnum = true if params["u"] if params["h"] then STDERR.puts "Usage: #{$0} -i inputfile [options]" STDERR.puts "Options:" STDERR.puts " -i string Input file." STDERR.puts " -m string Mafft command. Default: mafft" STDERR.puts " -s int Seed. Default:0" STDERR.puts " -n int Number of candidates for core sequences. Default: upper 50% in length" STDERR.puts " -p int Number of core sequences. Default: 500" # STDERR.puts " -P \"string\" Mafft options for the PICKUP stage." # STDERR.puts " Default: \"--retree 1\"" # STDERR.puts " -S Tree-based pickup. Default: off" STDERR.puts " -C \"string\" Mafft options for the CORE stage." STDERR.puts " Default: \"--globalpair --maxiterate 100\"" STDERR.puts " -A \"string\" Mafft options for the ADD stage." STDERR.puts " Default: \"\"" STDERR.puts " -D \"string\" Mafft options for inferring the direction of nucleotide sequences." STDERR.puts " Default: \"\"" STDERR.puts " -o r or i r: Reorder the sequences based on similarity. Default" STDERR.puts " i: Same as input." exit 1 end if infn == "" then STDERR.puts "Give input file with -i." exit 1 end pid = $$.to_s tmpdir = ENV["TMPDIR"] tmpdir = "/tmp" if tmpdir == nil tempfiles = [] tempfiles.push( temp_pf = tmpdir + "/_pf" + pid ) tempfiles.push( temp_nf = tmpdir + "/_nf" + pid ) tempfiles.push( temp_cf = tmpdir + "/_cf" + pid ) tempfiles.push( temp_of = tmpdir + "/_of" + pid ) Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1} at_exit{ cleartempfiles( tempfiles )} system "#{mafftpath} --version > #{temp_of} 2>&1" fp = File.open( temp_of, "r" ) line = fp.gets fp.close versionnum = line.split(' ')[0].sub(/v/,"").to_f if versionnum < 7.210 then STDERR.puts "\n" STDERR.puts "Please use mafft version >= 7.210\n" STDERR.puts "\n" exit end srand( seed ) def readfasta( fp, name, seq ) nseq = 0 tmpseq = "" while fp.gets if $_ =~ /^>/ then name.push( $_.sub(/>/,"").chop ) seq.push( tmpseq ) if nseq > 0 nseq += 1 tmpseq = "" else tmpseq += $_.strip end end seq.push( tmpseq ) return nseq end begin infp = File.open( infn, "r" ) rescue => e STDERR.puts e exit 1 end infp.close if directionoptions =~ /--adjustdirection/ then system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" ) else system( "cp #{infn} #{temp_of}" ) end tname = [] tseq = [] infp = File.open( temp_of, "r" ) tin = readfasta( infp, tname, tseq ) infp.close lenhash = {} if outnum then for i in 1..(tin) tname[i-1] = "_numo_s_0#{i}_numo_e_" + tname[i-1] end end npick = 0 if npick == 1 npick = tin if npick > tin if scand =~ /%$/ then ncand = (tin * scand.to_f * 0.01 ).to_i else ncand = scand.to_i end if ncand < 0 || ncand > tin then STDERR.puts "Error. -n #{scand}?" exit 1 end ncand = npick if ncand < npick ncand = tin if ncand > tin STDERR.puts "ncand = #{ncand}, npick = #{npick}" sai = [] for i in 0..(tin-1) lenhash[i] = tseq[i].gsub(/-/,"").length end i = 0 sorted = lenhash.sort_by{|key, value| [-value, i+=1]} #for i in 0..(ncand-1) # sai[sorted[i][0]] = 1 #end #for i in ncand..(tin-1) # sai[sorted[i][0]] = 0 #end ncandres = 0 ntsukau = 0 for i in 0..(tin-1) cand = sorted[i][0] if tname[cand] =~ /^_focus_/ then sai[cand] = 0 ntsukau += 1 elsif ncandres < ncand then unless tname[cand] =~ /^_tsukawanai_/ then sai[cand] = 1 ncandres += 1 else sai[cand] = 0 end else sai[cand] = 0 end end if ncandres+ntsukau < npick STDERR.puts "ncandres = #{ncandres}" STDERR.puts "ncand = #{ncand}" STDERR.puts "ntsukau = #{ntsukau}" STDERR.puts "npick = #{npick}" STDERR.puts "Too many _tsukawanai_ sequences." exit 1 end if ntsukau > npick STDERR.puts "ntsukau = #{ntsukau}" STDERR.puts "npick = #{npick}" STDERR.puts "Too many _focus_ sequences." exit 1 end #p sai #for i in 0..(tin-1) # puts sai[i].to_s + " " + tname[i] #end npickrand = npick - ntsukau if randompickup pick = [] for i in 0..(npickrand-1) pick[i] = 1 end for i in npickrand..(ncandres-1) pick[i] = 0 end pick2 = pick.sort_by{rand} pick = pick2 # p pick # p sai ipick = 0 for i in 0..(tin-1) if sai[i] == 1 then if pick[ipick] == 0 then sai[i] = 0 end ipick += 1 end end # p sai for i in 0..(tin-1) if tname[i] =~ /^_focus_/ then sai[i] = 1 end end # p sai pfp = File.open( temp_pf, 'w' ) nfp = File.open( temp_nf, 'w' ) i = 0 while i < tin if sai[i] == 1 then pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i] pfp.puts tseq[i] else nfp.puts ">" + i.to_s + " " + ">" + tname[i] nfp.puts tseq[i] end i += 1 end nfp.close pfp.close else # yamerukamo STDERR.puts "Not supported in this version" exit 1 end if npick > 1 then if npick < tin then system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort else system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort end res = ( File::stat(temp_cf).size == 0 ) else system( "cat /dev/null > #{temp_cf}" ) res = false end if res == true then STDERR.puts "\n\nError in the core alignment stage.\n\n" exit 1 end if npick < tin system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" ) res = ( File::stat(temp_of).size == 0 ) else system( "cp #{temp_cf} #{temp_of}" ) res = false end if res == true then STDERR.puts "\n\nError in the add stage.\n\n" exit 1 end resname = [] resseq = [] resfp = File.open( temp_of, "r" ) nres = readfasta( resfp, resname, resseq ) resfp.close if reorderoption =~ /--reorder/ then for i in 0..(nres-1) puts ">" + resname[i].sub(/^[0-9]* >/,"") puts resseq[i] end else seqhash = {} namehash = {} seqlast = [] namelast = [] nlast = 0 for i in 0..(nres-1) if resname[i] =~ /^[0-9]* >/ key = resname[i].split(' ')[0] seqhash[key] = resseq[i] namehash[key] = resname[i] else seqlast.push( resseq[i] ) namelast.push( resname[i] ) nlast += 1 end end for i in 0..(nlast-1) puts ">" + namelast[i] puts seqlast[i] end for i in 0..(nres-nlast-1) key = i.to_s puts ">" + namehash[key].sub(/^[0-9]* >/,"") puts seqhash[key] end end mafft-7.505-without-extensions/core/countlen.c0000644000175000017500000000214714224501721021000 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 void arguments( int argc, char *argv[] ) { int c; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; int nlenmin; double nfreq; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; dorp = NOTSPECIFIED; getnumlen_nogap_countn( infp, &nlenmin, &nfreq ); fprintf( stdout, "%d x %d - %d %c nfreq=%f\n", njob, nlenmax, nlenmin, dorp, nfreq ); fclose( infp ); return( 0 ); } mafft-7.505-without-extensions/core/multi2hat3s.c0000644000175000017500000002104714224501721021330 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 1 #define TSUYOSAFACTOR 100 static int nhomologs; static int seedoffset; void strip( char *s ) { char *pt = s; while( *++pt ) if( *pt == '\n' ) *pt = 0; } void arguments( int argc, char *argv[] ) { int c; seedoffset = 0; nhomologs = 1; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "seed = %s\n", inputfile ); --argc; goto nextoption; case 't': nhomologs = myatoi( *++argv ); fprintf( stderr, "nhomologs = %d\n", nhomologs ); --argc; goto nextoption; case 'o': seedoffset = myatoi( *++argv ); fprintf( stderr, "seedoffset = %d\n", seedoffset ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen ) { int i, j; FILE *hat3p; double pscore = 0.0; // by D.Mathog static double *effarr1 = NULL; static double *effarr2 = NULL; char *aseq; static char **pseq; LocalHom **localhomtable, *tmpptr; double tsuyosa; if( nhomologs < 1 ) nhomologs = 1; // tsuyosa=0.0 wo sakeru tsuyosa = (double)nhomologs * nhomologs * TSUYOSAFACTOR; fprintf( stderr, "tsuyosa = %f\n", tsuyosa ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; if( tmpptr->start1 == -1 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d k\n", i+seedoffset, j+seedoffset, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); } } } fprintf( stderr, "\n" ); fclose( hat3p ); #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **bseq; static double *eff; int i; char c; int alloclen; FILE *infp; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax*9+1 ); bseq = AllocateCharMtx( njob, nlenmax*9+1 ); alloclen = nlenmax*9; eff = AllocateDoubleVec( njob ); #if 0 Read( name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i_seed_%s\n%s\n", name[i]+1, bseq[i] ); // CHUUI!! for( i=0; i_seed_%s\n%s\n", name[i]+1, seq[i] ); pairalign( name, nlen, seq, eff, alloclen ); fprintf( trap_g, "done.\n" ); #if DEBUG fprintf( stderr, "closing trap_g\n" ); #endif fclose( trap_g ); #if IODEBUG fprintf( stderr, "OSHIMAI\n" ); #endif SHOWVERSION; return( 0 ); } mafft-7.505-without-extensions/core/JTT.c0000644000175000017500000002046414224501721017614 0ustar nileshnilesh#if 0 #include "mltaln.h" #endif #define DEFAULTGOP_J -1530 #define DEFAULTGEP_J -00 #define DEFAULTOFS_J -123 /* +10 -- -50 teido ka ? */ #define DEFAULTPAMN 200 void JTTmtx( double **rsr, double *freq, unsigned char locamino[26], char locgrp[26], int isTM ) { int i, j; double r[20][20]; // char locamino0[] = "ARNDCQEGHILKMFPSTWYVBZX.-U"; char locamino0[] = "ARNDCQEGHILKMFPSTWYVBZX.-J"; char locgrp0[] = { 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, 6, 6, 6, 1, }; double freq0[20] = { 0.077, 0.051, 0.043, 0.052, 0.020, 0.041, 0.062, 0.074, 0.023, 0.052, 0.091, 0.059, 0.024, 0.040, 0.051, 0.069, 0.059, 0.014, 0.032, 0.066, }; double freq0_TM[20] = { 0.1051, 0.0157, 0.0185, 0.0089, 0.0219, 0.0141, 0.0097, 0.0758, 0.0168, 0.1188, 0.1635, 0.0112, 0.0333, 0.0777, 0.0260, 0.0568, 0.0523, 0.0223, 0.0324, 0.1195, }; /* Lower triangular is JTT's Accepted point mutations */ r[ 1][ 0]= 247; r[ 2][ 0]= 216; r[ 2][ 1]= 116; r[ 3][ 0]= 386; r[ 3][ 1]= 48; r[ 3][ 2]= 1433; r[ 4][ 0]= 106; r[ 4][ 1]= 125; r[ 4][ 2]= 32; r[ 4][ 3]= 13; r[ 5][ 0]= 208; r[ 5][ 1]= 750; r[ 5][ 2]= 159; r[ 5][ 3]= 130; r[ 5][ 4]= 9; r[ 6][ 0]= 600; r[ 6][ 1]= 119; r[ 6][ 2]= 180; r[ 6][ 3]= 2914; r[ 6][ 4]= 8; r[ 6][ 5]= 1027; r[ 7][ 0]= 1183; r[ 7][ 1]= 614; r[ 7][ 2]= 291; r[ 7][ 3]= 577; r[ 7][ 4]= 98; r[ 7][ 5]= 84; r[ 7][ 6]= 610; r[ 8][ 0]= 46; r[ 8][ 1]= 446; r[ 8][ 2]= 466; r[ 8][ 3]= 144; r[ 8][ 4]= 40; r[ 8][ 5]= 635; r[ 8][ 6]= 41; r[ 8][ 7]= 41; r[ 9][ 0]= 173; r[ 9][ 1]= 76; r[ 9][ 2]= 130; r[ 9][ 3]= 37; r[ 9][ 4]= 19; r[ 9][ 5]= 20; r[ 9][ 6]= 43; r[ 9][ 7]= 25; r[ 9][ 8]= 26; r[10][ 0]= 257; r[10][ 1]= 205; r[10][ 2]= 63; r[10][ 3]= 34; r[10][ 4]= 36; r[10][ 5]= 314; r[10][ 6]= 65; r[10][ 7]= 56; r[10][ 8]= 134; r[10][ 9]= 1324; r[11][ 0]= 200; r[11][ 1]= 2348; r[11][ 2]= 758; r[11][ 3]= 102; r[11][ 4]= 7; r[11][ 5]= 858; r[11][ 6]= 754; r[11][ 7]= 142; r[11][ 8]= 85; r[11][ 9]= 75; r[11][10]= 94; r[12][ 0]= 100; r[12][ 1]= 61; r[12][ 2]= 39; r[12][ 3]= 27; r[12][ 4]= 23; r[12][ 5]= 52; r[12][ 6]= 30; r[12][ 7]= 27; r[12][ 8]= 21; r[12][ 9]= 704; r[12][10]= 974; r[12][11]= 103; r[13][ 0]= 51; r[13][ 1]= 16; r[13][ 2]= 15; r[13][ 3]= 8; r[13][ 4]= 66; r[13][ 5]= 9; r[13][ 6]= 13; r[13][ 7]= 18; r[13][ 8]= 50; r[13][ 9]= 196; r[13][10]= 1093; r[13][11]= 7; r[13][12]= 49; r[14][ 0]= 901; r[14][ 1]= 217; r[14][ 2]= 31; r[14][ 3]= 39; r[14][ 4]= 15; r[14][ 5]= 395; r[14][ 6]= 71; r[14][ 7]= 93; r[14][ 8]= 157; r[14][ 9]= 31; r[14][10]= 578; r[14][11]= 77; r[14][12]= 23; r[14][13]= 36; r[15][ 0]= 2413; r[15][ 1]= 413; r[15][ 2]= 1738; r[15][ 3]= 244; r[15][ 4]= 353; r[15][ 5]= 182; r[15][ 6]= 156; r[15][ 7]= 1131; r[15][ 8]= 138; r[15][ 9]= 172; r[15][10]= 436; r[15][11]= 228; r[15][12]= 54; r[15][13]= 309; r[15][14]= 1138; r[16][ 0]= 2440; r[16][ 1]= 230; r[16][ 2]= 693; r[16][ 3]= 151; r[16][ 4]= 66; r[16][ 5]= 149; r[16][ 6]= 142; r[16][ 7]= 164; r[16][ 8]= 76; r[16][ 9]= 930; r[16][10]= 172; r[16][11]= 398; r[16][12]= 343; r[16][13]= 39; r[16][14]= 412; r[16][15]= 2258; r[17][ 0]= 11; r[17][ 1]= 109; r[17][ 2]= 2; r[17][ 3]= 5; r[17][ 4]= 38; r[17][ 5]= 12; r[17][ 6]= 12; r[17][ 7]= 69; r[17][ 8]= 5; r[17][ 9]= 12; r[17][10]= 82; r[17][11]= 9; r[17][12]= 8; r[17][13]= 37; r[17][14]= 6; r[17][15]= 36; r[17][16]= 8; r[18][ 0]= 41; r[18][ 1]= 46; r[18][ 2]= 114; r[18][ 3]= 89; r[18][ 4]= 164; r[18][ 5]= 40; r[18][ 6]= 15; r[18][ 7]= 15; r[18][ 8]= 514; r[18][ 9]= 61; r[18][10]= 84; r[18][11]= 20; r[18][12]= 17; r[18][13]= 850; r[18][14]= 22; r[18][15]= 164; r[18][16]= 45; r[18][17]= 41; r[19][ 0]= 1766; r[19][ 1]= 69; r[19][ 2]= 55; r[19][ 3]= 127; r[19][ 4]= 99; r[19][ 5]= 58; r[19][ 6]= 226; r[19][ 7]= 276; r[19][ 8]= 22; r[19][ 9]= 3938; r[19][10]= 1261; r[19][11]= 58; r[19][12]= 559; r[19][13]= 189; r[19][14]= 84; r[19][15]= 219; r[19][16]= 526; r[19][17]= 27; r[19][18]= 42; /* Upper triangular is JTT's Accepted point mutations for transmembrane */ r[ 0][ 1]= 21; r[ 0][ 2]= 2; r[ 0][ 3]= 7; r[ 0][ 4]= 13; r[ 0][ 5]= 4; r[ 0][ 6]= 6; r[ 0][ 7]= 160; r[ 0][ 8]= 6; r[ 0][ 9]= 44; r[ 0][10]= 43; r[ 0][11]= 5; r[ 0][12]= 10; r[ 0][13]= 21; r[ 0][14]= 34; r[ 0][15]= 198; r[ 0][16]= 202; r[ 0][17]= 0; r[ 0][18]= 1; r[ 0][19]= 292; r[ 1][ 2]= 0; r[ 1][ 3]= 1; r[ 1][ 4]= 2; r[ 1][ 5]= 21; r[ 1][ 6]= 3; r[ 1][ 7]= 22; r[ 1][ 8]= 21; r[ 1][ 9]= 4; r[ 1][10]= 8; r[ 1][11]= 53; r[ 1][12]= 19; r[ 1][13]= 0; r[ 1][14]= 1; r[ 1][15]= 5; r[ 1][16]= 5; r[ 1][17]= 28; r[ 1][18]= 0; r[ 1][19]= 0; r[ 2][ 3]= 14; r[ 2][ 4]= 1; r[ 2][ 5]= 7; r[ 2][ 6]= 0; r[ 2][ 7]= 0; r[ 2][ 8]= 8; r[ 2][ 9]= 4; r[ 2][10]= 5; r[ 2][11]= 11; r[ 2][12]= 3; r[ 2][13]= 1; r[ 2][14]= 2; r[ 2][15]= 32; r[ 2][16]= 19; r[ 2][17]= 1; r[ 2][18]= 1; r[ 2][19]= 2; r[ 3][ 4]= 0; r[ 3][ 5]= 0; r[ 3][ 6]= 12; r[ 3][ 7]= 15; r[ 3][ 8]= 4; r[ 3][ 9]= 1; r[ 3][10]= 0; r[ 3][11]= 2; r[ 3][12]= 1; r[ 3][13]= 0; r[ 3][14]= 1; r[ 3][15]= 0; r[ 3][16]= 6; r[ 3][17]= 0; r[ 3][18]= 1; r[ 3][19]= 4; r[ 4][ 5]= 0; r[ 4][ 6]= 0; r[ 4][ 7]= 13; r[ 4][ 8]= 2; r[ 4][ 9]= 4; r[ 4][10]= 11; r[ 4][11]= 0; r[ 4][12]= 1; r[ 4][13]= 34; r[ 4][14]= 0; r[ 4][15]= 48; r[ 4][16]= 13; r[ 4][17]= 8; r[ 4][18]= 23; r[ 4][19]= 47; r[ 5][ 6]= 16; r[ 5][ 7]= 1; r[ 5][ 8]= 26; r[ 5][ 9]= 1; r[ 5][10]= 16; r[ 5][11]= 6; r[ 5][12]= 3; r[ 5][13]= 0; r[ 5][14]= 5; r[ 5][15]= 7; r[ 5][16]= 2; r[ 5][17]= 0; r[ 5][18]= 0; r[ 5][19]= 0; r[ 6][ 7]= 21; r[ 6][ 8]= 0; r[ 6][ 9]= 0; r[ 6][10]= 0; r[ 6][11]= 0; r[ 6][12]= 0; r[ 6][13]= 0; r[ 6][14]= 0; r[ 6][15]= 4; r[ 6][16]= 2; r[ 6][17]= 0; r[ 6][18]= 0; r[ 6][19]= 7; r[ 7][ 8]= 1; r[ 7][ 9]= 10; r[ 7][10]= 0; r[ 7][11]= 0; r[ 7][12]= 3; r[ 7][13]= 4; r[ 7][14]= 7; r[ 7][15]= 64; r[ 7][16]= 12; r[ 7][17]= 5; r[ 7][18]= 0; r[ 7][19]= 53; r[ 8][ 9]= 3; r[ 8][10]= 2; r[ 8][11]= 0; r[ 8][12]= 1; r[ 8][13]= 0; r[ 8][14]= 0; r[ 8][15]= 0; r[ 8][16]= 4; r[ 8][17]= 0; r[ 8][18]= 29; r[ 8][19]= 2; r[ 9][10]= 273; r[ 9][11]= 0; r[ 9][12]= 161; r[ 9][13]= 66; r[ 9][14]= 4; r[ 9][15]= 22; r[ 9][16]= 150; r[ 9][17]= 1; r[ 9][18]= 4; r[ 9][19]= 883; r[10][11]= 1; r[10][12]= 153; r[10][13]= 251; r[10][14]= 37; r[10][15]= 43; r[10][16]= 26; r[10][17]= 20; r[10][18]= 6; r[10][19]= 255; r[11][12]= 4; r[11][13]= 0; r[11][14]= 0; r[11][15]= 1; r[11][16]= 2; r[11][17]= 0; r[11][18]= 5; r[11][19]= 1; r[12][13]= 8; r[12][14]= 0; r[12][15]= 1; r[12][16]= 32; r[12][17]= 1; r[12][18]= 5; r[12][19]= 89; r[13][14]= 0; r[13][15]= 32; r[13][16]= 9; r[13][17]= 2; r[13][18]= 54; r[13][19]= 37; r[14][15]= 9; r[14][16]= 10; r[14][17]= 0; r[14][18]= 1; r[14][19]= 1; r[15][16]= 134; r[15][17]= 1; r[15][18]= 22; r[15][19]= 13; r[16][17]= 1; r[16][18]= 3; r[16][19]= 48; r[17][18]= 2; r[17][19]= 18; r[18][19]= 2; for (i = 0; i < 20; i++) r[i][i] = 0.0; if( isTM ) { for (i = 1; i < 20; i++) for (j = 0; j < i; j++) { r[j][i] /= 400.0 * freq0_TM[i] * freq0_TM[j]; r[i][j] = r[j][i]; } for( i=0; i<20; i++ ) freq[i] = freq0_TM[i]; } else { for (i = 1; i < 20; i++) for (j = 0; j < i; j++) { r[i][j] /= 400.0 * freq0[i] * freq0[j]; r[j][i] = r[i][j]; } for( i=0; i<20; i++ ) freq[i] = freq0[i]; } for( i=0; i<26; i++ ) locamino[i] = locamino0[i]; for( i=0; i<26; i++ ) locgrp[(int)locamino[i]] = locgrp0[i]; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) rsr[i][j] = r[i][j]; } mafft-7.505-without-extensions/core/tditeration.c0000644000175000017500000022336514224501721021506 0ustar nileshnilesh /* tree-dependent iteration algorithm A+ when group-to-group, C when group-to-singleSeqence OR algorithm A+ */ #include "mltaln.h" #define FULLSCORE 0 #define DEBUG 0 #define RECORD 0 extern char **seq_g; extern char **res_g; static int nwa; #ifdef enablemultithread typedef struct _threadarg { int thread_no; int *jobposintpt; int *ndonept; int *ntrypt; int *collectingpt; int njob; int nbranch; int maxiter; int nkozo; int *subgenerationpt; double *basegainpt; double *gainlist; double *tscorelist; int *generationofinput; char *kozoarivec; char **mastercopy; char ***candidates; int *generationofmastercopypt; int *branchtable; RNApair ***singlerna; LocalHom **localhomtable; int alloclen; Node *stopol; int ***topol; double **len; double **tscorehistory_detail; int *finishpt; int **skipthisbranch; double **distmtx; int ntarget; int *targetmap; pthread_mutex_t *mutex; pthread_cond_t *collection_end; pthread_cond_t *collection_start; } threadarg_t; #endif #if 1 static void shuffle( int *arr, int n ) { int i; int x; int b; for( i=1; i= ndistclass ) c = ndistclass-1; if( c >= maxdistclass ) c = maxdistclass-1; fprintf( stderr, "pair %d-%d (%f), dist=%f -> c=%d\n", i, j, eff1[i] * eff2[j], smalldistmtx[i][j], c ); eff1s[c][i] += 1.0; eff2s[c][j] += 1.0; matnum[i][j] = c; } for( c=0; c= ndistclass ) c = ndistclass-1; if( c >= maxdistclass ) c = maxdistclass-1; // fprintf( stderr, "pair %d-%d (%f), dist=%f -> c=%d\n", i, j, eff1[i] * eff2[j], smalldistmtx[i][j], c ); eff1s[c][i] = eff1[i]; eff2s[c][j] = eff2[j]; matnum[i][j] = c; } #endif #if 0 double totaleff; for( i=0; ithread_no; int njob = targ->njob; int nbranch = targ->nbranch; int maxiter = targ->maxiter; int *ndonept = targ->ndonept; int *ntrypt = targ->ntrypt; int *collectingpt = targ->collectingpt; int *jobposintpt = targ->jobposintpt; int nkozo = targ->nkozo; double *gainlist = targ->gainlist; double *tscorelist = targ->tscorelist; int *generationofinput = targ->generationofinput; int *subgenerationpt = targ->subgenerationpt; double *basegainpt = targ->basegainpt; char *kozoarivec = targ->kozoarivec; char **mastercopy = targ->mastercopy; char ***candidates = targ->candidates; int *generationofmastercopypt = targ->generationofmastercopypt; int *branchtable = targ->branchtable; RNApair ***singlerna = targ->singlerna; LocalHom **localhomtable = targ->localhomtable; int alloclen = targ->alloclen; Node * stopol = targ->stopol; int ***topol = targ->topol; double **len = targ->len; double **tscorehistory_detail = targ->tscorehistory_detail; int *finishpt = targ->finishpt; int **skipthisbranch = targ->skipthisbranch; double **distmtx = targ->distmtx; int ntarget = targ->ntarget; int *targetmap = targ->targetmap; int i, k, l, ii; double gain; int iterate; int **memlist; char *pairbuf; int locnjob; int s1, s2; int clus1, clus2; char **localcopy; char **mseq1, **mseq2; double *distarr; // re-calc double *effarr, *effarr_kozo; // re-calc double *effarr1, *effarr2, *effarr1_kozo, *effarr2_kozo; char *indication1, *indication2; int length; RNApair ***grouprna1, ***grouprna2; RNApair *rnapairboth; LocalHom ***localhomshrink; char *swaplist; int *gapmap1, *gapmap2; double tscore, mscore; double oimpmatchdouble; double impmatchdouble; int identity; double tmpdouble; // double naivescore0 = 0, naivescore1; double *effarrforlocalhom; double *tscorehistory; int intdum; #if 0 int oscillating; int lin, ldf; #endif double maxgain; int bestthread; int branchpos; int subgenerationatfirst; double unweightedspscore; int myjob; int converged2 = 0; int chudanres; double **smalldistmtx; double ***scoringmatrices; double **eff1s, **eff2s; int **whichmtx; locnjob = njob; if( utree == 0 ) { fprintf( stderr, "Dynamic tree is not supported in the multithread version.\n" ); exit( 1 ); } if( score_check == 2 ) { fprintf( stderr, "Score_check 2 is not supported in the multithread version.\n" ); exit( 1 ); } if( weight == 2 ) { fprintf( stderr, "Weight 2 is not supported in the multithread version.\n" ); exit( 1 ); } if( cooling && cut > 0.0 ) { fprintf( stderr, "Cooling is not supported in the multithread version.\n" ); exit( 1 ); } tscorehistory = calloc( maxiter, sizeof( double ) ); if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); distarr = AllocateDoubleVec( locnjob ); effarr = AllocateDoubleVec( locnjob ); effarrforlocalhom = AllocateDoubleVec( locnjob ); effarr1 = AllocateDoubleVec( locnjob ); effarr2 = AllocateDoubleVec( locnjob ); mseq1 = AllocateCharMtx( locnjob, 0 ); mseq2 = AllocateCharMtx( locnjob, 0 ); localcopy = AllocateCharMtx( locnjob, alloclen ); gapmap1 = AllocateIntVec( alloclen ); gapmap2 = AllocateIntVec( alloclen ); if( specificityconsideration != 0 ) { smalldistmtx = AllocateDoubleMtx( locnjob, locnjob ); // ookii? scoringmatrices = AllocateDoubleCub( maxdistclass, nalphabets, nalphabets ); makescoringmatrices( scoringmatrices, n_dis_consweight_multi ); eff1s = AllocateDoubleMtx( maxdistclass, locnjob ); eff2s = AllocateDoubleMtx( maxdistclass, locnjob ); whichmtx = AllocateIntMtx( locnjob, locnjob ); } else { smalldistmtx = NULL; scoringmatrices = NULL; eff1s = eff2s = NULL; whichmtx = NULL; } effarr1_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr2_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr_kozo = AllocateDoubleVec( locnjob ); for( i=0; imutex ); if( *collectingpt == 1 ) { *collectingpt = 0; *generationofmastercopypt = iterate; *subgenerationpt = 0; *basegainpt = 0.0; *ndonept = 0; *jobposintpt = 0; for( i=0; icollection_end ); pthread_mutex_unlock( targ->mutex ); } else { pthread_cond_broadcast( targ->collection_end ); pthread_mutex_unlock( targ->mutex ); freelocalarrays ( tscorehistory, grouprna1, grouprna2, rnapairboth, indication1, indication2, distarr, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, localcopy, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink, swaplist, smalldistmtx, scoringmatrices, eff1s, eff2s, whichmtx ); // return( NULL ); pthread_exit( NULL ); } pthread_mutex_lock( targ->mutex ); while( *ndonept < nbranch ) pthread_cond_wait( targ->collection_start, targ->mutex ); pthread_mutex_unlock( targ->mutex ); // fprintf( stderr, "Thread 0 got a signal, *collectionpt = %d\n", *collectingpt ); /* Hoka no thread ga keisan */ pthread_mutex_lock( targ->mutex ); *collectingpt = 1; // chofuku #if 0 for( i=0; i maxgain ) { maxgain = gainlist[i]; bestthread = i; } } if( maxgain > 0.0 ) { // fprintf( stderr, "\nGain = %f\n", maxgain ); // fprintf( stderr, "best gain = %f by thread %d\n", gainlist[bestthread], bestthread ); // fprintf( stderr, "tscorelist[best] = %f by thread %d\n", tscorelist[bestthread], bestthread ); if( parallelizationstrategy == BESTFIRST ) { for( i=0; i0; i-- ) { // if( iterate-i < 15 ) fprintf( stderr, "hist[%d] = %f\n", i, tscorehistory[i] ); if( tscorehistory[i] == tscorelist[bestthread] ) { fprintf( stderr, "\nOscillating? %f == %f\n", tscorehistory[i], tscorelist[bestthread] ); *collectingpt = -1; break; } } tscorehistory[iterate] = tscorelist[bestthread]; #endif } else { fprintf( stderr, "\nConverged.\n" ); *collectingpt = -1; // pthread_cond_broadcast( targ->collection_end ); // pthread_mutex_unlock( targ->mutex ); // freelocalarrays(); // return( NULL ); // pthread_exit( NULL ); } #if 1 if( *finishpt ) { fprintf( stderr, "\nConverged2.\n" ); *collectingpt = -1; } #endif pthread_mutex_unlock( targ->mutex ); } pthread_mutex_lock( targ->mutex ); fprintf( stderr, "\nReached %d\n", maxiter ); *collectingpt = -1; pthread_cond_broadcast( targ->collection_end ); pthread_mutex_unlock( targ->mutex ); freelocalarrays ( tscorehistory, grouprna1, grouprna2, rnapairboth, indication1, indication2, distarr, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, localcopy, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink, swaplist, smalldistmtx, scoringmatrices, eff1s, eff2s, whichmtx ); return( NULL ); pthread_exit( NULL ); } else { while( 1 ) { #if 0 if( iterate % 2 == 0 ) { lin = 0; ldf = +1; } else { lin = locnjob - 2; ldf = -1; } for( l=lin; l < locnjob-1 && l >= 0 ; l+=ldf ) for( k=0; k<2; k++ ) #endif pthread_mutex_lock( targ->mutex ); while( *collectingpt > 0 ) pthread_cond_wait( targ->collection_end, targ->mutex ); if( *collectingpt == -1 ) { pthread_mutex_unlock( targ->mutex ); freelocalarrays ( tscorehistory, grouprna1, grouprna2, rnapairboth, indication1, indication2, distarr, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, localcopy, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink, swaplist, smalldistmtx, scoringmatrices, eff1s, eff2s, whichmtx ); return( NULL ); pthread_exit( NULL ); } // pthread_mutex_unlock( targ->mutex ); // pthread_mutex_lock( targ->mutex ); if( *jobposintpt == nbranch ) { if( *collectingpt != -1 ) *collectingpt = 1; // chofuku pthread_mutex_unlock( targ->mutex ); continue; } // fprintf( stderr, "JOB jobposintpt=%d\n", *jobposintpt ); myjob = branchtable[*jobposintpt]; l = myjob / 2; if( l == locnjob-2 ) k = 1; else k = myjob - l * 2; // fprintf( stderr, "JOB l=%d, k=%d\n", l, k ); branchpos = myjob; (*jobposintpt)++; iterate = *generationofmastercopypt; (*ntrypt)++; pthread_mutex_unlock( targ->mutex ); // fprintf( stderr, "\n IRANAI IRANAI *jobposintpt=%d, nbranch = %d\n", *jobposintpt, nbranch ); // fprintf( stderr, "branchpos = %d (thread %d)\n", branchpos, thread_no ); // fprintf( stderr, "iterate=%d, l=%d, k=%d (thread %d)\n", iterate, l, k, thread_no ); #if 0 fprintf( stderr, "STEP %03d-%03d-%d (Thread %d) ", iterate+1, l+1, k, thread_no ); fprintf( stderr, "STEP %03d-%03d-%d (thread %d) %s ", iterate+1, l+1, k, thread_no, use_fft?"\n":"\n" ); #endif // for( i=0; i<2; i++ ) for( j=0; jmutex ); for( i=0; imutex ); length = strlen( localcopy[0] ); if( nkozo ) { // double tmptmptmp; // tmptmptmp = 0.0; // clus1 = conjuctionfortbfast_kozo( &tmptmptmp, pair[0], s1, localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); clus1 = fastconjuction_noname_kozo( memlist[0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); for( i=0; i=0; i-- ) { oimpmatchdouble += (double)imp_match_out_scD( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatchdouble += (double)part_imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } else { if( alg == 'Q' ) { fprintf( stderr, "'Q' is no longer supported\n" ); exit( 1 ); } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1], NULL, NULL, NULL, -1, 0 ); fprintf( stderr, "not supported\n" ); exit( 1 ); for( i=length-1; i>=0; i-- ) { oimpmatchdouble += (double)imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } // fprintf( stderr, "#### initial impmatch = %f\n", oimpmatch ); } else { if( RNAscoremtx == 'r' ) intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame else { if( smalldistmtx ) #if 1 intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score_dynmtx( smalldistmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame #endif else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame } oimpmatchdouble = 0.0; } // fprintf( stderr, "#### tmpdouble = %f\n", tmpdouble ); mscore = oimpmatchdouble + tmpdouble; } else { fprintf( stderr, "score_check = %d\n", score_check ); fprintf( stderr, "Not supported. Please add --threadit 0 to disable the multithreading in the iterative refinement calculation.\n" ); exit( 1 ); } // if( rnakozo ) foldalignedrna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, rnapairboth ); // if( !use_fft && !rnakozo ) // if( !use_fft ) if( !use_fft ) { commongappick_record( clus1, mseq1, gapmap1 ); commongappick_record( clus2, mseq2, gapmap2 ); } #if 0 fprintf( stderr, "##### mscore = %f\n", mscore ); #endif #if DEBUG if( !devide ) { fprintf( trap_g, "\n%d-%d-%d\n", iterate+1, l+1, k ); fprintf( trap_g, "group1 = %s\n", indication1 ); fprintf( trap_g, "group2 = %s\n", indication2 ); fflush( trap_g ); } #endif #if 0 printf( "STEP %d-%d-%d\n", iterate, l, k ); for( i=0; i %f, tscore = %f\n", tmpdouble, oimpmatch, impmatch, tscore ); } else { if( smalldistmtx ) #if 1 intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score_dynmtx( smalldistmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #endif else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); tscore = tmpdouble; } // fprintf( stderr, "#######ii=%d, iterate=%d score = %f -> %f \n", ii, iterate , mscore, tscore ); #if 0 for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq1[i] ); fprintf( stderr, "+++++++\n" ); for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq2[i] ); #endif } else { tscore = mscore + 1.0; // tscore = 0.0; // fprintf( stderr, "in line 705, tscore=%f\n", tscore ); // for( i=0; i 0 ) { if( parallelizationstrategy == BESTFIRST ) { if( gain > gainlist[thread_no] ) { gainlist[thread_no] = gain; for( i=0; imutex ); for( i=0; imutex ); tscorelist[thread_no] = tscore; } #if 0 fprintf( stderr, "tscore = %f mscore = %f accepted.\n", tscore, mscore ); fprintf( stderr, "\nbetter! gain = %f (thread %d)\r", gain, thread_no ); #else fprintf( stderr, "%03d-%04d-%d (thread %4d) better \r", iterate+1, *ndonept, k, thread_no ); #endif } else { #if 0 fprintf( stderr, "tscore = %f mscore = %f rejected.\r", tscore, mscore ); fprintf( stderr, "worse! gain = %f", gain ); #else fprintf( stderr, "%03d-%04d-%d (thread %4d) worse \r", iterate+1, *ndonept, k, thread_no ); #endif tscore = mscore; } } #if FULLSCORE { int j; double fullscore = 0.0; for( i=1; i=0; ii-=1 ) { // fprintf( stderr, "Checking tscorehistory %f ?= %f\n", tscore, tscorehistory_detail[ii][branchpos] ); if( tscore == tscorehistory_detail[ii][branchpos] ) { converged2 = 1; break; } } if( parallelizationstrategy != BESTFIRST && converged2 ) { // fprintf( stderr, "\nFINISH!\n" ); pthread_mutex_lock( targ->mutex ); *finishpt = 1; pthread_mutex_unlock( targ->mutex ); } tscorehistory_detail[iterate][branchpos] = tscore; fprintf( stderr, "\r" ); pthread_mutex_lock( targ->mutex ); (*ndonept)++; // fprintf( stderr, "*ndonept = %d, nbranch = %d (thread %d) iterate=%d\n", *ndonept, nbranch, thread_no, iterate ); generationofinput[branchpos] = iterate; if( *ndonept == nbranch ) { if( *collectingpt != -1 ) *collectingpt = 1; // chofuku // fprintf( stderr, "Thread %d sends a signal, *ndonept = %d\n", thread_no, *ndonept ); pthread_cond_signal( targ->collection_start ); } pthread_mutex_unlock( targ->mutex ); } /* while( 1 ) */ } /* for( iterate ) */ // return( NULL ); } #endif int TreeDependentIteration( int locnjob, char **name, int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **distmtx, int **skipthisbranch, int alloclen, LocalHom **localhomtable, RNApair ***singlerna, int nkozo, char *kozoarivec, int ntarget, int *targetmap, int *targetmapr ) { int i, j, k, l, iterate, ii, iu, ju; int lin, ldf, length; int clus1, clus2; int s1, s2; static double **imanoten; static Node *stopol; static double *distarr = NULL; static double *effarrforlocalhom = NULL; static double *effarr = NULL; static double *effarr1 = NULL; static double *effarr2 = NULL; static double *effarr_kozo = NULL; static double *effarr1_kozo = NULL; static double *effarr2_kozo = NULL; static double **mtx = NULL; static int **node = NULL; static int *branchnode = NULL; static double **branchWeight = NULL; static char **mseq1, **mseq2; static double ***history; FILE *trap; double tscore, mscore; int identity; int converged; int oscillating; // double naivescore0 = 0.0; // by D.Mathog, a guess // double naivescore1; #if 0 char pair[njob][njob]; #else static int **memlist; static char *pairbuf; #endif #if DEBUG + RECORD double score_for_check0, score_for_check1; static double **effmtx = NULL; extern double score_calc0(); #endif static char *indication1, *indication2; static LocalHom ***localhomshrink = NULL; static char *swaplist = NULL; double impmatchdouble = 0.0; double oimpmatchdouble = 0.0; static int *gapmap1; static int *gapmap2; double tmpdouble; int intdum; static RNApair *rnapairboth; RNApair ***grouprna1, ***grouprna2; double unweightedspscore; static double **smalldistmtx; static double ***scoringmatrices; static double **eff1s, **eff2s; static int **whichmtx; int value; if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } Writeoptions( trap_g ); fflush( trap_g ); if( 1 || effarr == NULL ) /* locnjob == njob ni kagiru */ { indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); effarr = AllocateDoubleVec( locnjob ); distarr = AllocateDoubleVec( locnjob ); effarrforlocalhom = AllocateDoubleVec( locnjob ); effarr1 = AllocateDoubleVec( locnjob ); effarr2 = AllocateDoubleVec( locnjob ); mseq1 = AllocateCharMtx( locnjob, 0 ); mseq2 = AllocateCharMtx( locnjob, 0 ); mtx = AllocateDoubleMtx( locnjob, locnjob ); node = AllocateIntMtx( locnjob, locnjob ); branchnode = AllocateIntVec( locnjob ); branchWeight = AllocateDoubleMtx( locnjob, 2 ); history = AllocateFloatCub( niter, locnjob, 2 ); stopol = (Node *)calloc( locnjob * 2, sizeof( Node ) ); gapmap1 = AllocateIntVec( alloclen ); gapmap2 = AllocateIntVec( alloclen ); if( score_check == 2 ) imanoten = AllocateDoubleMtx( njob, njob ); if( specificityconsideration != 0 ) { smalldistmtx = AllocateDoubleMtx( locnjob, locnjob ); // ookii? scoringmatrices = AllocateDoubleCub( maxdistclass, nalphabets, nalphabets ); makescoringmatrices( scoringmatrices, n_dis_consweight_multi ); eff1s = AllocateDoubleMtx( maxdistclass, locnjob ); eff2s = AllocateDoubleMtx( maxdistclass, locnjob ); whichmtx = AllocateIntMtx( locnjob, locnjob ); } else { smalldistmtx = NULL; scoringmatrices = NULL; eff1s = eff2s = NULL; whichmtx = NULL; } effarr1_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr2_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru. effarr_kozo = AllocateDoubleVec( locnjob ); for( i=0; i 2 && ( weight == 4 || weight == 0 ) ) { treeCnv( stopol, locnjob, topol, len, branchWeight ); calcBranchWeight( branchWeight, locnjob, stopol, topol, len ); // IRU!!! } } #ifdef enablemultithread if( nthread > 0 ) { threadarg_t *targ; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t collection_end; pthread_cond_t collection_start; int jobposint; int generationofmastercopy; int subgeneration; double basegain; int *generationofinput; double *gainlist; double *tscorelist; int ndone; int ntry; int collecting; int nbranch; int maxiter; char ***candidates; int *branchtable; double **tscorehistory_detail; int finish; nwa = nthread + 1; nbranch = (njob-1) * 2 - 1; maxiter = niter; targ = calloc( nwa, sizeof( threadarg_t ) ); handle = calloc( nwa, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &collection_end, NULL ); pthread_cond_init( &collection_start, NULL ); gainlist = calloc( nwa, sizeof( double ) ); tscorelist = calloc( nwa, sizeof( double ) ); branchtable = calloc( nbranch, sizeof( int ) ); generationofinput = calloc( nbranch, sizeof( int ) ); if( parallelizationstrategy == BESTFIRST ) candidates = AllocateCharCub( nwa, locnjob, alloclen ); for( i=0; i 2 && ( weight == 4 || weight == 0 ) ) { treeCnv( stopol, locnjob, topol, len, branchWeight ); calcBranchWeight( branchWeight, locnjob, stopol, topol, len ); // IRU!!! } trap = fopen( "hat2", "w" ); if( !trap ) ErrorExit( "Cannot open hat2." ); WriteHat2_pointer( trap, locnjob, name, mtx ); fclose( trap ); if( constraint ) { counteff_simple( locnjob, topol, len, effarrforlocalhom ); if( ntarget < locnjob ) calcimportance_target( locnjob, ntarget, effarrforlocalhom, aseq, localhomtable, targetmap, targetmapr, alloclen ); else calcimportance_half( locnjob, effarrforlocalhom, aseq, localhomtable, alloclen ); } } if( iterate % 2 == 0 ) { lin = 0; ldf = +1; } else { lin = locnjob - 2; ldf = -1; } if( score_check == 2 ) { effarr1[0] = 1.0; effarr2[0] = 1.0; length = strlen( bseq[0] ); for( i=0; i= 0 ; l+=ldf ) { for( k=0; k<2; k++ ) { if( l == locnjob-2 ) k = 1; #else for( jobpos=0; jobpos=0; i-- ) oimpmatchdouble += (double)part_imp_match_out_sc( i, i ); } } else { if( alg == 'Q' ) { fprintf( stderr, "'Q' is no longer supported\n" ); exit( 1 ); } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1], NULL, NULL, NULL, -1, 0 ); fprintf( stderr, "not supported\n" ); exit( 1 ); } } // fprintf( stderr, "### oimpmatch = %f\n", oimpmatch ); } else { oimpmatchdouble = 0.0; } #if 0 tmpdouble = 0.0; iu=0; for( i=s1; i>> oimpmatchdouble = 0.0; if( use_fft ) { if( alg == 'Q' ) { fprintf( stderr, "'Q' is no longer supported\n" ); exit( 1 ); } else if( alg == 'd' ) { imp_match_init_strictD( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1], NULL, NULL, NULL, -1, 0 ); for( i=length-1; i>=0; i-- ) { oimpmatchdouble += (double)imp_match_out_scD( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); for( i=length-1; i>=0; i-- ) { oimpmatchdouble += (double)part_imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } else { if( alg == 'Q' ) { fprintf( stderr, "'Q' is no longer supported\n" ); exit( 1 ); } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1], NULL, NULL, NULL, -1, 0 ); fprintf( stderr, "not supported\n" ); exit( 1 ); for( i=length-1; i>=0; i-- ) { oimpmatchdouble += (double)imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } // fprintf( stderr, "otmpmatch = %f\n", oimpmatch ); } // fprintf( stderr, "#### initial impmatch = %f\n", oimpmatch ); } else { if( RNAscoremtx == 'r' ) intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame else { if( smalldistmtx ) #if 1 intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score_dynmtx( offsetmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // n_dis ha machigai #endif else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame } oimpmatchdouble = 0.0; } // fprintf( stderr, "#### tmpdouble = %f\n", tmpdouble ); mscore = oimpmatchdouble + tmpdouble; } else { // fprintf( stderr, "score_check = %d\n" ); #if 1 /* Oscilation check no tame hitsuyou! atode kousokuka */ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); mscore = tmpdouble; /* atode kousokuka */ #else mscore = 0.0; #endif if( constraint ) { oimpmatchdouble = 0.0; // shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink ); if( ntarget < locnjob ) msshrinklocalhom_fast_target( memlist[0], memlist[1], localhomtable, localhomshrink, swaplist, targetmap ); else msshrinklocalhom_fast_half( memlist[0], memlist[1], localhomtable, localhomshrink ); if( use_fft ) { if( alg == 'Q' ) { fprintf( stderr, "'Q' is no longer supported\n" ); exit( 1 ); } else if( alg == 'd' ) { imp_match_init_strictD( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1], NULL, NULL, NULL, -1, 0 ); if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); } else { part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] ); if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL ); } } else { if( alg == 'Q' ) { fprintf( stderr, "'Q' is no longer supported\n" ); exit( 1 ); } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1], NULL, NULL, NULL, -1, 0 ); fprintf( stderr, "Not supported\n" ); exit( 1 ); } } } } // oimpmatch = 0.0; if( constraint ) { #if 0 // iranai if( alg == 'Q' ) { imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_scQ( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } else { imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); for( i=length-1; i>=0; i-- ) { oimpmatch += imp_match_out_sc( i, i ); // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] ); } } #endif } #if 0 if( alg == 'H' ) naivescore0 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + oimpmatch; else if( alg == 'Q' ) naivescore0 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + oimpmatch; else if( alg == 'R' ) naivescore0 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + oimpmatch; #endif // if( rnakozo ) foldalignedrna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, rnapairboth ); // if( !use_fft && !rnakozo ) // if( !use_fft ) if( !use_fft ) { commongappick_record( clus1, mseq1, gapmap1 ); commongappick_record( clus2, mseq2, gapmap2 ); } #if 0 fprintf( stderr, "##### mscore = %f\n", mscore ); #endif #if DEBUG if( !devide ) { fprintf( trap_g, "\nSTEP%d-%d-%d\n", iterate+1, l+1, k ); fprintf( trap_g, "group1 = %s\n", indication1 ); fprintf( trap_g, "group2 = %s\n", indication2 ); fflush( trap_g ); } #endif #if 0 printf( "STEP %d-%d-%d\n", iterate, l, k ); for( i=0; i fullscore ) { for( i=0; igroup1\n%s\n", mseq1[i] ); for( i=0; igroup2\n%s\n", mseq2[i] ); for( i=0; ibetter alignment\n%s\n", bseq[i] ); exit( 1 ); } } #endif length = strlen( mseq1[0] ); if( identity ) { tscore = mscore; if( !devide ) fprintf( trap_g, "tscore = %f identical.\n", tscore ); fprintf( stderr, " identical. " ); converged++; } else { if( score_check ) { if( constraint == 2 ) { #if 1 if( RNAscoremtx == 'r' ) intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #endif tscore = impmatchdouble + tmpdouble; // fprintf( stderr, "tmpdouble=%f, impmatch = %f -> %f, tscore = %f\n", tmpdouble, oimpmatch, impmatch, tscore ); } else { if( smalldistmtx ) #if 1 intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); #else intergroup_score_dynmtx( offsetmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // n_dis ha machigai #endif else intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); tscore = tmpdouble; } // fprintf( stderr, "#######ii=%d, iterate=%d score = %f -> %f \n", ii, iterate , mscore, tscore ); #if 0 for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq1[i] ); fprintf( stderr, "+++++++\n" ); for( i=0; i<1; i++ ) fprintf( stderr, "%s\n", mseq2[i] ); #endif } else { tscore = mscore + 1.0; // tscore = 0.0; // fprintf( stderr, "in line 705, tscore=%f\n", tscore ); // for( i=0; i mscore - cut/100.0*mscore ) { writePre( locnjob, name, nlen, aseq, 0 ); for( i=0; i= locnjob * 2 ) { fprintf( trap_g, "Converged.\n\n" ); fprintf( stderr, "\nConverged.\n\n" ); if( scoreout ) { unweightedspscore = plainscore( njob, bseq ); fprintf( stderr, "\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore ); fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) ); if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" ); fprintf( stderr, "\n\n" ); } value = 0; goto end; } if( iterate >= 1 ) { /* oscillation check */ oscillating = 0; for( ii=iterate-2; ii>=0; ii-=2 ) { if( (double)tscore == history[ii][l][k] ) { oscillating = 1; break; } } if( ( oscillating && !cooling ) || ( oscillating && cut < 0.001 && cooling ) ) { fprintf( trap_g, "Oscillating.\n" ); fprintf( stderr, "\nOscillating.\n\n" ); if( scoreout ) { unweightedspscore = plainscore( njob, bseq ); fprintf( stderr, "\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore ); fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) ); if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" ); fprintf( stderr, "\n\n" ); } #if 1 /* hujuubun */ value = -1; goto end; #endif } } /* if( iterate ) */ } /* for( k ) */ } /* for( l ) */ if( scoreout ) { unweightedspscore = plainscore( njob, bseq ); fprintf( stderr, "\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore ); fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) ); if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" ); fprintf( stderr, "\n\n" ); } } /* for( iterate ) */ } value = 2; end: // if( grouprna1 ) free( grouprna1 ); // if( grouprna2 ) free( grouprna2 ); #if 1 freelocalarrays ( NULL, grouprna1, grouprna2, rnapairboth, indication1, indication2, distarr, effarr, effarrforlocalhom, effarr1, effarr2, mseq1, mseq2, NULL, gapmap1, gapmap2, effarr1_kozo, effarr2_kozo, effarr_kozo, memlist, pairbuf, localhomshrink, swaplist, smalldistmtx, scoringmatrices, eff1s, eff2s, whichmtx ); if( branchnode ) free( branchnode ); if( stopol) free( stopol ); if( mtx ) FreeDoubleMtx( mtx ); if( node ) FreeIntMtx( node ); if( branchWeight ) FreeDoubleMtx( branchWeight ); if( history) FreeFloatCub( history ); treeCnv( NULL, 0, 0, NULL, NULL ); // 2021/Sep #endif // freelocalarrays // ( // NULL, // grouprna1, grouprna2, // rnapairboth, // indication1, indication2, // distarr, // effarr, effarrforlocalhom, effarr1, effarr2, // mseq1, mseq2, // NULL, // gapmap1, gapmap2, // effarr1_kozo, effarr2_kozo, effarr_kozo, // memlist, pairbuf, // localhomshrink, // smalldistmtx, // scoringmatrices, // eff1s, eff2s, // whichmtx // ); // free( branchnode ); // free( stopol ); // return( value ); } /* int Tree... */ mafft-7.505-without-extensions/core/fft.h0000644000175000017500000000043714224501721017735 0ustar nileshnilesh#include #include #include #include #include "mtxutl.h" #define PI 3.14159265358979323846 #define END_OF_VEC -1 #define NKOUHO 20 #define NKOUHO_LONG 500 #define MAX(X,Y) ( ((X)>(Y))?(X):(Y) ) #define MIN(X,Y) ( ((X)<(Y))?(X):(Y) ) mafft-7.505-without-extensions/core/mafft.ps10000644000175000017500000000162514224501721020527 0ustar nileshnileshWrite-Host; Write-Host Preparing environment to run MAFFT on Windows. Write-Host This may take a while, if real-time scanning by anti-virus software is on. Set-Item Env:Path "/usr/bin;$Env:Path" Set-Item Env:MAFFT_BINARIES "/usr/lib/mafft" Set-Item Env:TMPDIR "$Env:TMP" Set-Item Env:MAFFT_TMPDIR "$Env:TMP" Set-Item Env:mafft_working_dir "$PWD" #Set-Item Env:TMPDIR "/tmp" #Set-Item Env:MAFFT_TMPDIR "/tmp" # If you do not have write permission for standard temporary folder # (typically C:\Users\username\AppData\Local\Temp\), then # uncomment (remove #) the above two lines to use an alternative # temporary folder. #$ROOTDIR=$PSScriptRoot # not supported by powershell versions <= 2 $ROOTDIR=Split-Path -Parent $MyInvocation.MyCommand.Path $proc = Start-Process -Wait -NoNewWindow -PassThru -FilePath "$ROOTDIR\usr\bin\bash.exe" -ArgumentList "'/usr/bin/mafft' $args" exit $proc.ExitCode mafft-7.505-without-extensions/core/makedirectionlist.c0000644000175000017500000006726714224501721022701 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define GLOBAL 0 #define END_OF_VEC -1 //int nadd; double thresholdtorev; int dodp; int addfragment; int mode = '2'; int reflim = 1000; int contrastsort = 1; typedef struct _thread_arg { int iend; char **seq; int *map; char *tmpseq; int *res; int **spointt; short *table1; int iq; #ifdef enablemultithread int *jshare; int thread_no; pthread_mutex_t *mutex_counter; #endif } thread_arg_t; typedef struct _selfdpthread_arg { int iend; char **seq; double *res; #ifdef enablemultithread int *jshare; int thread_no; pthread_mutex_t *mutex_counter; #endif } selfdpthread_arg_t; typedef struct _contrast { int pos; double dif; } contrastarr; static void *selfdpthread( void *arg ) { selfdpthread_arg_t *targ = (selfdpthread_arg_t *)arg; int iend = targ->iend; char **seq = targ->seq; double *res = targ->res; #ifdef enablemultithread int thread_no = targ->thread_no; int *jshare = targ->jshare; #endif int j; char **revseq; revseq = AllocateCharMtx( 1, nlenmax+1 ); j = -1; while( 1 ) { #ifdef enablemultithread if( nthread ) { pthread_mutex_lock( targ->mutex_counter ); j = *jshare; if( j%100 == 0 ) reporterr( "%d / %d (thread %d) \r", j, iend, thread_no ); if( j == iend ) { pthread_mutex_unlock( targ->mutex_counter ); break; } ++(*jshare); pthread_mutex_unlock( targ->mutex_counter ); } else #endif { j++; if( j%100 == 0 ) reporterr( "%d / %d \r", j, iend ); if( j == iend ) { break; } } sreverse( revseq[0], seq[j] ); #if GLOBAL res[j] = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, seq+j, seq+j, 0 ); res[j] -= G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, seq+j, revseq, 0 ); #else res[j] = L__align11_noalign( n_dis_consweight_multi, seq+j, seq+j ); res[j] -= L__align11_noalign( n_dis_consweight_multi, seq+j, revseq ); #endif } creverse( 0 ); FreeCharMtx( revseq ); #if GLOBAL G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); #else L__align11_noalign( NULL, NULL, NULL ); #endif return( NULL ); } #if 0 static void partshuffle( int size, int outsize, int *ary ) { int i; // reporterr( "ary before shuffle = \n" ); // for(i=0;i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 't': thresholdtorev = atof( *++argv ); fprintf( stderr, "thresholdtorev = %f\n", thresholdtorev ); --argc; goto nextoption; case 'o': mode = *(*++argv); fprintf( stderr, "mode = %c\n", mode ); --argc; goto nextoption; case 'r': reflim = myatoi(*++argv); fprintf( stderr, "reflim = %d\n", reflim ); --argc; goto nextoption; case 'c': contrastsort = 0; break; case 'd': dodp = 1; break; case 'F': addfragment = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'S': alg = 'S'; break; case 'M': alg = 'M'; break; case 'm': alg = 'm'; break; case 'G': alg = 'G'; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } void seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else // fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); ; } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else // fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); ; } *grp = END_OF_VEC; if( grp - grpbk < 6 ) { // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); // exit( 1 ); *grpbk = -1; } } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; if( *n == -1 ) { *pointt = -1; return; } p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } static int localcommonsextet_p2( short *table, int *pointt ) { int value = 0; short tmp; int point; short *memo; int *ct; int *cp; if( *pointt == -1 ) return( 0 ); memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!! if( !ct ) ErrorExit( "Cannot allocate memo\n" ); cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; free( memo ); free( ct ); return( value ); } static int compfunc( const void *a, const void *b ) { return ((contrastarr *)b)->dif - ((contrastarr *)a)->dif; // correct // return ((contrastarr *)a)->dif - ((contrastarr *)b)->dif; // incorrect! } static void makecontrastorder6mer( int *order, int **pointt, int **pointt_rev, char **seq, int iend, int shift ) { int i; double *res; contrastarr *arr; short *table1, *table1_rev; arr = calloc( iend, sizeof( contrastarr ) ); res = calloc( iend, sizeof( double ) ); for( i=0; iiend; char **seq = targ->seq; int *map = targ->map; char *tmpseq = targ->tmpseq; int *res = targ->res; int **spointt = targ->spointt; short *table1 = targ->table1; // int iq = targ->iq; #ifdef enablemultithread // int thread_no = targ->thread_no; int *jshare = targ->jshare; #endif int j; char **mseq1, **mseq2; if( dodp ) // nakuserukamo { mseq1 = AllocateCharMtx( 1, 0 ); mseq2 = AllocateCharMtx( 1, 0 ); } j = -1; while( 1 ) { #ifdef enablemultithread if( nthread ) { pthread_mutex_lock( targ->mutex_counter ); j = *jshare; if( j == iend ) { pthread_mutex_unlock( targ->mutex_counter ); break; } ++(*jshare); pthread_mutex_unlock( targ->mutex_counter ); } else #endif { j++; if( j == iend ) { // if( iq%100==1 ) fprintf( stderr, "\r %d / %d \r", iq, njob ); break; } } if( dodp ) { // strcpy( mseq1[0], tmpseq ); // strcpy( mseq2[0], seq[j] ); mseq1[0] = tmpseq; mseq2[0] = seq[map[j]]; #if GLOBAL res[j] = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, 0 ); #else res[j] = L__align11_noalign( n_dis_consweight_multi, mseq1, mseq2 ); #endif } else { // reporterr( "\n\nj=%d, map[j]=%d\n\n", j, map[j] ); res[j] = localcommonsextet_p2( table1, spointt[map[j]] ); } } if( dodp ) // nakuserukamo { free( mseq1 ); free( mseq2 ); #if GLOBAL G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); #else L__align11_noalign( NULL, NULL, NULL ); #endif } // else // if( nthread ) // inthread == 0 no toki free suru to, error. nazeda // localcommonsextet_p( NULL, NULL ); return( NULL ); } int main( int argc, char *argv[] ) { static int *nlen; static int *nogaplen; static char **name, **seq; int i, j, istart, iend, ic; FILE *infp; // FILE *adfp; char c; int *grpseq; char *tmpseq, *revseq; int **pointt, **pointt_rev, **spointt; double res_forward, res_reverse, res_max; int ires, mres, mres2; int *res, *resr, *resf; int *map; static short *table1, *table1_rev; static char **mseq1f, **mseq1r, **mseq2; int *contrastorder; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( alg == 'a' ) { if( nlenmax < 10000 ) alg = 'G'; else alg = 'S'; } seq = AllocateCharMtx( njob, nlenmax*1+1 ); #if 0 Read( name, nlen, seq ); readData( infp, name, nlen, seq ); #else name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); nogaplen = AllocateIntVec( njob ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); if( dorp != 'd' ) { fprintf( stderr, "Not necessary!\n" ); for( i=0; i thresholdtorev ) // tekitou { // fprintf( stderr, "REVERSE!!!\n" ); sreverse( seq[i], mseq2[0] ); strcpy( tmpseq, name[i] ); strcpy( name[i], "_R_" ); strncpy( name[i]+3, tmpseq+1, 10 ); name[i][13] = 0; } else { strcpy( seq[i], mseq2[0] ); strcpy( tmpseq, name[i] ); strcpy( name[i], "_F_" ); strncpy( name[i]+3, tmpseq+1, 10 ); name[i][13] = 0; } } FreeCharMtx( mseq1f ); FreeCharMtx( mseq1r ); FreeCharMtx( mseq2 ); free( tmpseq ); } else if( alg == 'm' ) { if( dodp ) // nakuserukamo { mseq1f = AllocateCharMtx( 1, nlenmax+1); mseq1r = AllocateCharMtx( 1, nlenmax+1 ); mseq2 = AllocateCharMtx( 1, nlenmax+1 ); } else { // nthread = 0; // heiretsu keisan no kouritsu ha warui node spointt = AllocateIntMtx( njob, 0 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); pointt_rev = AllocateIntMtx( njob, nlenmax+1 ); } tmpseq = AllocateCharVec( MAX( nlenmax, B ) +1 ); revseq = AllocateCharVec( nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); res = AllocateIntVec( njob ); resr = AllocateIntVec( njob ); resf = AllocateIntVec( njob ); map = AllocateIntVec( njob ); contrastorder = AllocateIntVec( njob ); if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); else tsize = (int)pow( 6, 6 ); // iranai maxl = 0; for( i=0; i maxl ) maxl = nogaplen[i]; } reporterr( "Step 1/2\n" ); if( !dodp ) { if( nadd ) iend = njob - nadd; else iend = 0; // keisan shinai for( i=0; i moto no basho ni modosu seq_grp_nuc( grpseq, revseq ); makepointtable_nuc( pointt_rev[i], grpseq ); // makecompositiontable_p( table1_rev, pointt_rev[i] ); -> moto no basho ni modosu spointt[i] = pointt[i]; // reporterr( "pointt[i] = %p\n", pointt[i] ); // reporterr( "pointt[i][0] = %p\n", pointt[i][0] ); } } if( contrastsort ) // sukoshi chuui { if( nadd ) { iend = njob-nadd; for( i=0; i makecontrastorder() no mae ni idou if( !dodp ) { seq_grp_nuc( grpseq, tmpseq ); makepointtable_nuc( pointt[ic], grpseq ); spointt[ic] = pointt[ic]; } #endif strcpy( tmpseq, name[ic] ); strcpy( name[ic], "_F_" ); strncpy( name[ic]+3, tmpseq+1, 10 ); name[ic][13] = 0; } reporterr( "\n\nStep 2/2\n" ); if( nadd ) istart = njob - nadd; else istart = 1; for( i=istart; i makecontrastorder() no mae ni idou if( !dodp ) { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); table1_rev = (short *)calloc( tsize, sizeof( short ) ); if( !table1_rev ) ErrorExit( "Cannot allocate table1_rev\n" ); seq_grp_nuc( grpseq, tmpseq ); makepointtable_nuc( pointt[ic], grpseq ); makecompositiontable_p( table1, pointt[ic] ); seq_grp_nuc( grpseq, revseq ); makepointtable_nuc( pointt_rev[ic], grpseq ); makecompositiontable_p( table1_rev, pointt_rev[ic] ); } #else if( !dodp ) { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); table1_rev = (short *)calloc( tsize, sizeof( short ) ); if( !table1_rev ) ErrorExit( "Cannot allocate table1_rev\n" ); makecompositiontable_p( table1, pointt[ic] ); makecompositiontable_p( table1_rev, pointt_rev[ic] ); } #endif if( nadd && addfragment ) iend = njob-nadd; else iend = i; if( iend > reflim ) { // reporterr( "iend = %d -> %d\n", iend, reflim ); #if 0 for( j=0; jmres2 ) { if( ires>mres ) { mres2 = mres; mres = ires; } else mres2 = ires; } } res_forward = (double)( mres + mres2 ) / 2; mres = mres2 = 0; for( j=0; jmres2 ) { if( ires>mres ) { mres2 = mres; mres = ires; } else mres2 = ires; } } res_reverse = (double)( mres + mres2 ) / 2; res_max = MAX(res_reverse,res_forward); } // reporterr( "i=%d, res_reverse = %f\n", i, res_reverse ); else if( mode == '1' ) { res_reverse = 0.0; for( j=0; jres_forward) ) // tekitou // if( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou { strcpy( seq[ic], revseq ); strcpy( tmpseq, name[ic] ); strcpy( name[ic], "_R_" ); strncpy( name[ic]+3, tmpseq+1, 10 ); name[ic][13] = 0; if( !dodp ) spointt[ic] = pointt_rev[ic]; } else { strcpy( tmpseq, name[ic] ); strcpy( name[ic], "_F_" ); strncpy( name[ic]+3, tmpseq+1, 10 ); name[ic][13] = 0; if( !dodp ) spointt[ic] = pointt[ic]; } if( !dodp ) { free( table1 ); free( table1_rev ); } } if( name[0][1] == 'R' ) { for( j=0; j%s\n", name[i] ); // fprintf( stdout, "%s\n", seq[i] ); fprintf( stdout, "%s\n", name[i] ); } FreeCharMtx( seq ); FreeCharMtx( name ); freeconstants(); closeFiles(); fprintf( stderr, "\n" ); SHOWVERSION; return( 0 ); } mafft-7.505-without-extensions/core/setdirection.c0000644000175000017500000001074414224501721021647 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 char *directionfile; static int show_R_ = 1; static int subalignment; static int subalignmentoffset; void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; directionfile = NULL; subalignment = 0; subalignmentoffset = 0; show_R_ = 1; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'd': directionfile = *++argv; fprintf( stderr, "directionfile = %s\n", directionfile ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; case 'r': show_R_ = 0; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *difp; int nlenmin; char **name; char **seq; char *tmpseq; char line[100]; int *nlen; int i, j; int nsubalignments, maxmem; int **subtable = NULL; int *preservegaps = NULL; char firstdir; char *directions; arguments( argc, argv ); reporterr( "subalignment = %d\n", subalignment ); reporterr( "subalignmentoffset = %d\n", subalignmentoffset ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( directionfile ) { difp = fopen( directionfile, "r" ); if( !difp ) { fprintf( stderr, "Cannot open %s\n", directionfile ); exit( 1 ); } } else { fprintf( stderr, "Give directionfile!\n" ); } dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); tmpseq = AllocateCharVec( MAX( B, nlenmax )+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); directions = calloc( njob, sizeof( int ) ); readData_pointer_casepreserve( infp, name, nlen, seq ); for( i=0; i-1; i++ ) { if( directions[subtable[j][i]] != firstdir ) { reporterr( "\n\n#############################################################################\n" ); reporterr( "\nDirection of nucleotide sequences seems to be inconsistent.\n" ); reporterr( "Please check the following two sequences:\n" ); reporterr( " Sequece no.%d (%s)\n", subtable[j][0]+1, name[subtable[j][0]] ); reporterr( " Sequece no.%d (%s)\n", subtable[j][i]+1, name[subtable[j][i]] ); reporterr( "\nThese sequences are in sub alignment no.%d in your setting of --merge,\nbut their directions seem to be different.\n\n", j+1 ); reporterr( "#############################################################################\n\n\n\n" ); exit( 1 ); } } reporterr( "OK!\n" ); } } for( i=0; i%s\n", name[i]+1 ); fprintf( stdout, "%s\n", seq[i] ); } free( nlen ); FreeCharMtx( seq ); FreeCharMtx( name ); free( tmpseq ); return( 0 ); } mafft-7.505-without-extensions/core/mafft.tmpl0000644000175000017500000032460014224501721021001 0ustar nileshnilesh#! /bin/bash er=0; myself=`dirname "$0"`/`basename "$0"`; export myself version="v7.505 (2022/Apr/10)"; export version LANG=C; export LANG os=`uname` progname=`basename "$0"` windows="no" if [ `echo $os | grep -i cygwin` ]; then os="cygwin" windows="yes" elif [ `echo $os | grep -i msys` ]; then os="msys" windows="yes" elif [ `echo $os | grep -i mingw` ]; then os="mingw" windows="yes" elif [ `echo $os | grep -i darwin` ]; then os="darwin" elif [ `echo $os | grep -i sunos` ]; then os="sunos" elif [ `echo $os | grep -i linux` ]; then os="linux" else os="unix" fi #export os # iranai if [ "$windows" = "yes" ]; then echo "" 1>&2 echo "It may take a while before the calculation starts" 1>&2 echo "if being scanned by anti-virus software." 1>&2 echo "Also consider using a faster version for Windows 10:" 1>&2 echo "https://mafft.cbrc.jp/alignment/software/wsl.html" 1>&2 fi if [ "$MAFFT_BINARIES" ]; then prefix="$MAFFT_BINARIES" else prefix=_LIBDIR fi export prefix # iranai if [ $# -gt 0 ]; then if [ "$1" = "--man" ]; then man "$prefix/mafft.1" exit 0; fi fi if [ -x "$prefix/version" ]; then # versionbin=`"$prefix/version" | awk '{print $1}'` # for cygwin versionbin=`"$prefix/version"` # for cygwin 2.7 else versionbin="0.000" fi if ! expr "$version" : v"$versionbin" > /dev/null ; then echo "" 1>&2 echo "v$versionbin != $version" 1>&2 echo "" 1>&2 echo "There is a problem in the configuration of your shell." 1>&2 echo "Check the MAFFT_BINARIES environmental variable by" 1>&2 echo "$ echo \$MAFFT_BINARIES" 1>&2 echo "" 1>&2 echo "This variable must be *unset*, unless you have installed MAFFT" 1>&2 echo "with a special configuration. To unset this variable, type" 1>&2 echo "$ unset MAFFT_BINARIES" 1>&2 echo "or" 1>&2 echo "% unsetenv MAFFT_BINARIES" 1>&2 echo "Then retry" 1>&2 echo "$ mafft input > output" 1>&2 echo "" 1>&2 echo "To keep this change permanently, edit setting files" 1>&2 echo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2 echo "to delete the MAFFT_BINARIES line." 1>&2 echo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2 echo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2 echo "" 1>&2 echo "Please send a problem report to katoh@ifrec.osaka-u.ac.jp," 1>&2 echo "if this problem remains." 1>&2 echo "" 1>&2 exit 1 er=1 fi defaultiterate=0 defaultcycle=2 defaultgop="1.53" #defaultaof="0.123" defaultaof="0.000" defaultlaof="0.100" defaultlgop="-2.00" defaultfft=1 defaultrough=0 defaultdistance="ktuples" #defaultdistance="local" defaultweighti="2.7" defaultweightr="0.0" defaultweightm="1.0" defaultdafs=0 defaultmccaskill=0 defaultcontrafold=0 defaultalgopt=" " defaultalgoptit=" " defaultsbstmodel=" -b 62 " defaultfmodel=" " defaultkappa=" " if [ $progname = "xinsi" -o $progname = "mafft-xinsi" ]; then defaultfft=1 defaultcycle=1 defaultiterate=1000 defaultdistance="scarna" defaultweighti="3.2" defaultweightr="8.0" defaultweightm="2.0" defaultmccaskill=1 defaultcontrafold=0 defaultdafs=0 defaultalgopt=" -A " defaultalgoptit=" -AB " ## chui defaultaof="0.0" defaultsbstmodel=" -b 62 " defaultkappa=" " defaultfmodel=" " # 2013/06/18 elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then defaultfft=1 defaultcycle=1 defaultiterate=1000 defaultdistance="global" defaultweighti="3.2" defaultweightr="8.0" defaultweightm="2.0" defaultmccaskill=1 defaultcontrafold=0 defaultdafs=0 defaultalgopt=" -A " defaultalgoptit=" -AB " ## chui defaultaof="0.0" defaultsbstmodel=" -b 62 " defaultkappa=" " defaultfmodel=" " # 2013/06/18 elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then defaultfft=0 defaultcycle=1 defaultiterate=1000 defaultdistance="local" elif [ $progname = "ginsi" -o $progname = "mafft-ginsi" ]; then defaultfft=1 defaultcycle=1 defaultiterate=1000 defaultdistance="global" elif [ $progname = "einsi" -o $progname = "mafft-einsi" ]; then defaultfft=0 defaultcycle=1 defaultiterate=1000 defaultdistance="localgenaf" elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then defaultfft=1 defaultcycle=2 defaultdistance="ktuples" elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then defaultfft=1 defaultcycle=2 defaultiterate=2 defaultdistance="ktuples" elif [ $progname = "nwns" -o $progname = "mafft-nwns" ]; then defaultfft=0 defaultcycle=2 defaultdistance="ktuples" elif [ $progname = "nwnsi" -o $progname = "mafft-nwnsi" ]; then defaultfft=0 defaultcycle=2 defaultiterate=2 defaultdistance="ktuples" fi outputfile="" namelength=-1 anysymbol=0 parallelizationstrategy="BAATARI2" kappa=$defaultkappa sbstmodel=$defaultsbstmodel fmodel=$defaultfmodel nmodel=" " gexp=0 gop=$defaultgop gopdist=$defaultgop aof=$defaultaof cycle=$defaultcycle iterate=$defaultiterate fft=$defaultfft rough=$defaultrough distance=$defaultdistance forcefft=0 memopt=" " weightopt=" " GGOP="-6.00" LGOP="-6.00" LEXP="-0.000" GEXP="-0.000" lgop=$defaultlgop lexp="-0.100" laof=$defaultlaof pggop="-2.00" pgexp="-0.10" pgaof="0.10" rgop="-1.530" rgep="-0.000" seqtype=" " weighti=$defaultweighti weightr=$defaultweightr weightm=$defaultweightm rnaalifold=0 dafs=$defaultdafs mccaskill=$defaultmccaskill contrafold=$defaultcontrafold progressfile="/dev/stderr" anchorfile="/dev/null" anchoropt="" maxanchorseparation=1000 debug=0 sw=0 algopt=$defaultalgopt algoptit=$defaultalgoptit #algspecified=0 pairspecified=0 scorecalcopt=" " coreout=0 corethr="0.5" corewin="100" coreext=" " outputformat="pir" f2clext="-N" outorder="input" seed="x" seedtable="x" auto=0 groupsize=-1 partsize=50 partdist="ktuples" partorderopt=" -x " treeout=0 nodeout=0 distout=0 treein=0 topin=0 treeinopt=" " seedfiles="/dev/null" seedtablefile="/dev/null" pdblist="/dev/null" ownlist="/dev/null" strdir="$PWD" scorematrix="/dev/null" textmatrix="/dev/null" treeinfile="/dev/null" codonposfile="/dev/null" codonscorefile="/dev/null" rnascoremtx=" " laraparams="/dev/null" foldalignopt=" " treealg=" -X 0.1 " sueff="1.0" maxambiguous="1.0" dofilter=0 scoreoutarg=" " numthreads=0 numthreadsit=-1 numthreadstb=-1 randomseed=0 addfile="/dev/null" addarg0=" " addarg=" " addsinglearg=" " add2ndhalfarg=" " mapoutfile="/dev/null" fragment=0 legacygapopt=" " mergetable="/dev/null" mergearg=" " seedoffset=0 outnum=" " last_e=5000 last_m=3 last_subopt=" " last_once=" " adjustdirection=0 tuplesize=6 termgapopt=" -O " #termgapopt=" " # gap/gap ga kakenai node similarityoffset="0.0" unalignlevel="0.0" unalignspecified=0 spfactor="100.0" shiftpenaltyspecified=0 opdistspecified=0 allowshift=0 enrich=0 # ato de kezuru enrichseq=0 # ato de kezuru enrichstr=0 # ato de kezuru seektarget="" # ato de kezuru dashserver="https://sysimm.org/dash/REST1.0/" newdash=0 newdash_originalsequenceonly=0 exclude_ho=0 fixthreshold="0.0" bunkatsuopt=" " npickup=0 minimumweight="0.00001" # 2016/Mar usenaivepairscore=" " oldgenafparam=0 sprigorous=0 treeext="none" initialramusage="20GB" focusarg=" " lhlimit=" " mpiscript="/dev/null" if [ $# -gt 0 ]; then if [ "$1" = "--version" ]; then echo "$version" 1>&2 exit 0; elif [ "$1" = "--help" -o "$1" = "--info" ]; then shift er=1; fi while [ $# -gt 1 ]; do if [ "$1" = "--auto" ]; then auto=1 elif [ "$1" = "--anysymbol" ]; then anysymbol=1 elif [ "$1" = "--preservecase" ]; then anysymbol=1 elif [ "$1" = "--clustalout" ]; then outputformat="clustal" elif [ "$1" = "--phylipout" ]; then outputformat="phylip" elif [ "$1" = "--reorder" ]; then outorder="aligned" partorderopt=" " elif [ "$1" = "--inputorder" ]; then outorder="input" partorderopt=" -x " elif [ "$1" = "--unweight" ]; then weightopt=" -u " elif [ "$1" = "--termgappenalty" ]; then termgapopt=" " elif [ "$1" = "--alga" ]; then algopt=" " algoptit=" " # algspecified=1 elif [ "$1" = "--algq" ]; then algopt=" -Q " algoptit=" " echo "" 1>&2 echo "--algq is no longer supported!" 1>&2 echo "" 1>&2 exit 1; # algspecified=1 elif [ "$1" = "--namelength" ]; then shift namelength=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify the length of name in clustal format output!" 1>&2 exit fi elif [ "$1" = "--groupsize" ]; then shift groupsize=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify groupsize!" 1>&2 exit fi elif [ "$1" = "--partsize" ]; then shift partsize=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify partsize!" 1>&2 exit fi elif [ "$1" = "--parttree" ]; then distance="parttree" partdist="ktuples" elif [ "$1" = "--dpparttree" ]; then distance="parttree" partdist="localalign" elif [ "$1" = "--fastaparttree" ]; then distance="parttree" partdist="fasta" elif [ "$1" = "--treeout" ]; then treeout=1 elif [ "$1" = "--nodeout" ]; then nodeout=1 treeout=1 elif [ "$1" = "--distout" ]; then distout=1 elif [ "$1" = "--fastswpair" ]; then distance="fasta" pairspecified=1 sw=1 elif [ "$1" = "--fastapair" ]; then distance="fasta" pairspecified=1 sw=0 elif [ "$1" = "--averagelinkage" ]; then treealg=" -X 1.0 " sueff="1.0" elif [ "$1" = "--minimumlinkage" ]; then treealg=" -X 0.0 " sueff="0.0" elif [ "$1" = "--mixedlinkage" ]; then shift sueff="$1" treealg=" -X $1" elif [ "$1" = "--maxambiguous" ]; then shift maxambiguous="$1" dofilter=1 elif [ "$1" = "--codonpos" ]; then shift codonposfile="$1" if [ ! -e "$codonposfile" ]; then echo "Cannot open $codonposfile" 1>&2 echo "" 1>&2 exit fi codonposopt=" -R " elif [ "$1" = "--codonscore" ]; then shift codonscorefile="$1" if [ ! -e "$codonscorefile" ]; then echo "Cannot open $codonscorefile" 1>&2 echo "" 1>&2 exit fi codonscoreopt=" -S " elif [ "$1" = "--noscore" ]; then scorecalcopt=" -Z " elif [ "$1" = "--6mermultipair" ]; then distance="ktuplesmulti" tuplesize=6 pairspecified=1 elif [ "$1" = "--10mermultipair" ]; then distance="ktuplesmulti" tuplesize=10 pairspecified=1 elif [ "$1" = "--6merpair" ]; then distance="ktuples" tuplesize=6 pairspecified=1 elif [ "$1" = "--10merpair" ]; then distance="ktuples" tuplesize=10 pairspecified=1 elif [ "$1" = "--blastpair" ]; then distance="blast" pairspecified=1 elif [ "$1" = "--lastmultipair" ]; then distance="lastmulti" pairspecified=1 elif [ "$1" = "--globalpair" ]; then distance="global" pairspecified=1 elif [ "$1" = "--shortlongpair" ]; then distance="local" usenaivepairscore="-Z" laof=0.0 # addfull no tokini tsukawareru. lexp=0.0 # addfull no tokini tsukawareru. pgaof=0.0 # local nara iranai pgexp=0.0 # local nara iranai pairspecified=1 elif [ "$1" = "--longshortpair" ]; then distance="local" usenaivepairscore="-Z" laof=0.0 # addfull no tokini tsukawareru. lexp=0.0 # addfull no tokini tsukawareru. pgaof=0.0 # local nara iranai pgexp=0.0 # local nara iranai pairspecified=1 elif [ "$1" = "--localpair" ]; then distance="local" pairspecified=1 elif [ "$1" = "--lastpair" ]; then distance="last" pairspecified=1 elif [ "$1" = "--multipair" ]; then distance="multi" pairspecified=1 elif [ "$1" = "--hybridpair" ]; then distance="hybrid" pairspecified=1 elif [ "$1" = "--scarnapair" ]; then distance="scarna" pairspecified=1 elif [ "$1" = "--dafspair" ]; then distance="dafs" pairspecified=1 elif [ "$1" = "--larapair" ]; then distance="lara" pairspecified=1 elif [ "$1" = "--slarapair" ]; then distance="slara" pairspecified=1 elif [ "$1" = "--foldalignpair" ]; then distance="foldalignlocal" pairspecified=1 elif [ "$1" = "--foldalignlocalpair" ]; then distance="foldalignlocal" pairspecified=1 elif [ "$1" = "--foldalignglobalpair" ]; then distance="foldalignglobal" pairspecified=1 elif [ "$1" = "--globalgenafpair" ]; then distance="globalgenaf" pairspecified=1 echo "" 1>&2 echo "--globalgenaf is no longer supported!" 1>&2 echo "" 1>&2 exit 1; elif [ "$1" = "--localgenafpair" ]; then distance="localgenaf" pairspecified=1 elif [ "$1" = "--genafpair" ]; then distance="localgenaf" pairspecified=1 elif [ "$1" = "--oldgenafpair" ]; then distance="localgenaf" pairspecified=1 oldgenafparam=1 elif [ "$1" = "--memsave" ]; then memopt=" -M -B " # -B (bunkatsunashi no riyu ga omoidasenai) elif [ "$1" = "--nomemsave" ]; then memopt=" -N " elif [ "$1" = "--nuc" ]; then seqtype="-D" # Deleted space, 2018/Dec elif [ "$1" = "--amino" ]; then seqtype="-P" # Deleted space, 2018/Dec elif [ "$1" = "--fft" ]; then fft=1 forcefft=1 elif [ "$1" = "--nofft" ]; then fft=0 elif [ "$1" = "--quiet" ]; then # if [ $os = "msys" ]; then # progressfile="nul" # else progressfile="/dev/null" # fi elif [ "$1" = "--debug" ]; then debug=1 elif [ "$1" = "--coreext" ]; then coreext=" -c " elif [ "$1" = "--core" ]; then coreout=1 elif [ "$1" = "--adjustdirection" ]; then adjustdirection=1 elif [ "$1" = "--adjustdirectionaccurately" ]; then adjustdirection=2 elif [ "$1" = "--oneiteration" ]; then oneiterationopt=" -r " elif [ "$1" = "--progress" ]; then shift progressfile="$1" if ! ( expr "$progressfile" : "\/" > /dev/null || expr "$progressfile" : "[A-Za-z]\:" > /dev/null ) ; then echo "Specify a progress file name with the absolute path!" 1>&2 exit fi elif [ "$1" = "--out" ]; then shift outputfile="$1" elif [ "$1" = "--skipanchorsremoterthan" ]; then shift if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify maximum gap length between anchors." 1>&2 exit fi maxanchorseparation=`expr "$1" - 0` elif [ "$1" = "--anchors" ]; then shift anchorfile="$1" anchoropt=" -l " # memopt=" -M -B " # ato de kentou # memopt=" -N " # no memsave if [ ! -e "$anchorfile" ]; then echo "Cannot open $anchorfile" 1>&2 echo "" 1>&2 exit fi elif [ "$1" = "--thread" ]; then shift if ! expr "$1" : "[0-9\-]" > /dev/null ; then echo "Specify the number of threads. Or, use --thread -1" 1>&2 exit fi numthreads=`expr "$1" - 0` elif [ "$1" = "--threadtb" ]; then shift if ! expr "$1" : "[0-9\-]" > /dev/null ; then echo "Check the argument after --threadtb, the number of threads for the progressive step." 1>&2 exit fi numthreadstb=`expr "$1" - 0` elif [ "$1" = "--threadit" ]; then shift if ! expr "$1" : "[0-9\-]" > /dev/null ; then echo "Check the argument after --threadit, the number of threads for the iterative step." 1>&2 exit fi numthreadsit=`expr "$1" - 0` elif [ "$1" = "--last_subopt" ]; then last_subopt="-S" elif [ "$1" = "--last_once" ]; then last_once="-U" elif [ "$1" = "--last_m" ]; then shift last_m=`expr "$1" - 0` elif [ "$1" = "--last_e" ]; then shift last_e=`expr "$1" - 0` elif [ "$1" = "--randomseed" ]; then shift randomseed=`expr "$1" - 0` elif [ "$1" = "--bestfirst" ]; then parallelizationstrategy="BESTFIRST" elif [ "$1" = "--adhoc0" ]; then parallelizationstrategy="BAATARI0" elif [ "$1" = "--adhoc1" ]; then parallelizationstrategy="BAATARI1" elif [ "$1" = "--adhoc2" ]; then parallelizationstrategy="BAATARI2" elif [ "$1" = "--simplehillclimbing" ]; then parallelizationstrategy="BAATARI2" elif [ "$1" = "--scoreout" ]; then scoreoutarg="-S -B" elif [ "$1" = "--outnum" ]; then outnum="-n" elif [ "$1" = "--leavegappyregion" ]; then legacygapopt="-L" elif [ "$1" = "--legacygappenalty" ]; then legacygapopt="-L" elif [ "$1" = "--merge" ]; then shift mergetable="$1" if [ ! -e "$mergetable" ]; then echo "Cannot open $mergetable" 1>&2 echo "" 1>&2 exit fi elif [ "$1" = "--addprofile" ]; then shift addarg0="-I" addfile="$1" elif [ "$1" = "--add" ]; then shift addarg0="-K -I" addfile="$1" elif [ "$1" = "--addfragments" ]; then shift addarg0="-K -I" addfile="$1" fragment=1 elif [ "$1" = "--addfull" ]; then shift addarg0="-K -I" addfile="$1" fragment=-1 elif [ "$1" = "--addlong" ]; then shift addarg0="-K -I" addfile="$1" fragment=-2 elif [ "$1" = "--addtotop" ]; then shift addarg0="-K -I" addfile="$1" fragment=-3 elif [ "$1" = "--smoothing" ]; then add2ndhalfarg=$add2ndhalfarg" -p " elif [ "$1" = "--keeplength" ]; then add2ndhalfarg=$add2ndhalfarg" -Y " elif [ "$1" = "--compactmapout" ]; then add2ndhalfarg=$add2ndhalfarg" -z -Y " elif [ "$1" = "--compactmapoutfile" ]; then shift add2ndhalfarg=$add2ndhalfarg" -z -Y " mapoutfile="$1" elif [ "$1" = "--mapout" ]; then add2ndhalfarg=$add2ndhalfarg" -Z -Y " elif [ "$1" = "--mapoutfile" ]; then shift add2ndhalfarg=$add2ndhalfarg" -Z -Y " mapoutfile="$1" elif [ "$1" = "--maxiterate" ]; then shift iterate=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify the number of iterations!" 1>&2 exit fi elif [ "$1" = "--retree" ]; then shift cycle=`expr "$1" - 0` if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify the number of tree rebuilding!" 1>&2 exit fi elif [ "$1" = "--aamatrix" ]; then shift f2clext="-N" sbstmodel=" -b -1 " scorematrix="$1" if [ ! -e "$scorematrix" ]; then echo "Cannot open $scorematrix" 1>&2 echo "" 1>&2 exit fi elif [ "$1" = "--textmatrix" ]; then shift f2clext="-E" seqtype="-P" fft=0 sbstmodel=" -b -2 -a " scorematrix="$1" if [ ! -e "$scorematrix" ]; then echo "Cannot open $scorematrix" 1>&2 echo "" 1>&2 exit fi elif [ "$1" = "--text" ]; then f2clext="-E" seqtype="-P" fft=0 sbstmodel=" -b -2 -a " elif [ "$1" = "--treein" ]; then shift treeinopt=" -U " treein=1 treeinfile="$1" if [ ! -e "$treeinfile" ]; then echo "Cannot open $treeinfile" 1>&2 echo "" 1>&2 exit fi elif [ "$1" = "--pileup" ]; then # treeinopt=" -U " # treein=1 treeext="pileup" elif [ "$1" = "--randomchain" ]; then # treeinopt=" -U " # treein=1 # pileuporshuffle="s" treeext="randomchain" elif [ "$1" = "--topin" ]; then shift treeinopt=" -V " treein=1 treeinfile="$1" echo "The --topin option has been disabled." 1>&2 echo "There was a bug in version < 6.530." 1>&2 echo "This bug has not yet been fixed." 1>&2 exit 1 elif [ "$1" = "--mpi" ]; then mpiscript="$prefix/mpiscript" elif [ "$1" = "--large" ]; then treeext="memsavetree" elif [ "$1" = "--memsavetree" ]; then treeext="memsavetree" elif [ "$1" = "--memsavetreex" ]; then treeext="memsavetreex" elif [ "$1" = "--stepadd" ]; then treeext="stepadd" elif [ "$1" = "--youngestlinkage" ]; then treeext="youngestlinkage" elif [ "$1" = "--kappa" ]; then shift kappa=" -k $1 " if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify kappa value!" 1>&2 exit fi elif [ "$1" = "--fmodel" ]; then fmodel=" -a " elif [ "$1" = "--nwildcard" ]; then nmodel=" -: " elif [ "$1" = "--nzero" ]; then nmodel=" " elif [ "$1" = "--jtt" ]; then shift f2clext="-N" sbstmodel=" -j $1" # if ! expr "$1" : "[0-9]" > /dev/null ; then # echo "Specify pam value!" 1>&2 # exit # fi elif [ "$1" = "--kimura" ]; then shift f2clext="-N" sbstmodel=" -j $1" # if ! expr "$1" : "[0-9]" > /dev/null ; then # echo "Specify pam value!" 1>&2 # exit # fi elif [ "$1" = "--tm" ]; then shift f2clext="-N" sbstmodel=" -m $1" # if ! expr "$1" : "[0-9]" > /dev/null ; then # echo "Specify pam value!" 1>&2 # exit # fi elif [ "$1" = "--bl" ]; then shift f2clext="-N" sbstmodel=" -b $1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "blosum $1?" 1>&2 exit fi elif [ "$1" = "--weighti" ]; then shift weighti="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify weighti value!" 1>&2 exit fi elif [ "$1" = "--weightr" ]; then shift weightr="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify weightr value!" 1>&2 exit fi elif [ "$1" = "--weightm" ]; then shift weightm="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify weightm value!" 1>&2 exit fi elif [ "$1" = "--rnaalifold" ]; then rnaalifold=1 elif [ "$1" = "--mccaskill" ]; then mccaskill=1 contrafold=0 dafs=0 elif [ "$1" = "--contrafold" ]; then mccaskill=0 contrafold=1 dafs=0 elif [ "$1" = "--dafs" ]; then mccaskill=0 contrafold=0 dafs=1 elif [ "$1" = "--ribosum" ]; then rnascoremtx=" -s " elif [ "$1" = "--op" ]; then shift gop="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify op!" 1>&2 exit fi elif [ "$1" = "--opdist" ]; then shift gopdist="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify opdist!" 1>&2 exit fi opdistspecified=1 elif [ "$1" = "--allowshift" ]; then allowshift=1 elif [ "$1" = "--shiftpenalty" ]; then shift spfactor="$1" if ! expr "$1" : "[0-9]" > /dev/null ; then echo "Specify sf!" 1>&2 exit fi shiftpenaltyspecified=1 elif [ "$1" = "--exp" ]; then shift # gexp="$1" tmpval="$1" gexp=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"` if ! expr "$gexp" : "[0-9\-]" > /dev/null ; then printf "\nSpecify a number for exp, like --exp 0.1\n" 1>&2 printf "'$1' cannot be interpreted as a number..\n\n" 1>&2 exit fi elif [ "$1" = "--ep" ]; then shift # aof="$1" tmpval="$1" aof=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"` if ! expr "$aof" : "[0-9\-]" > /dev/null ; then printf "\nSpecify a number for ep, like --ep 0.1\n" 1>&2 printf "'$1' cannot be interpreted as a number..\n\n" 1>&2 exit fi elif [ "$1" = "--rop" ]; then shift rgop="$1" # Atode check elif [ "$1" = "--rep" ]; then shift rgep="$1" elif [ "$1" = "--lop" ]; then shift lgop="$1" elif [ "$1" = "--LOP" ]; then shift LGOP="$1" elif [ "$1" = "--lep" ]; then shift laof="$1" elif [ "$1" = "--lexp" ]; then shift lexp="$1" elif [ "$1" = "--LEXP" ]; then shift LEXP="$1" elif [ "$1" = "--GEXP" ]; then shift GEXP="$1" elif [ "$1" = "--GOP" ]; then shift GGOP="$1" elif [ "$1" = "--gop" ]; then shift pggop="$1" elif [ "$1" = "--gep" ]; then shift pgaof="$1" elif [ "$1" = "--gexp" ]; then shift pgexp="$1" elif [ "$1" = "--laraparams" ]; then shift laraparams="$1" elif [ "$1" = "--corethr" ]; then shift corethr="$1" elif [ "$1" = "--corewin" ]; then shift corewin="$1" elif [ "$1" = "--strdir" ]; then shift strdir="$1" elif [ "$1" = "--pdbidlist" ]; then echo "--pdbidlist is temporarily unavailable, 2018/Dec." 1>&2 echo "" 1>&2 exit shift pdblist="$1" if [ ! -e "$pdblist" ]; then echo "Cannot open $pdblist" 1>&2 echo "" 1>&2 exit fi elif [ "$1" = "--pdbfilelist" ]; then echo "--pdbfilelist is temporarily unavailable, 2018/Dec." 1>&2 echo "" 1>&2 exit shift ownlist="$1" if [ ! -e "$ownlist" ]; then echo "Cannot open $ownlist" 1>&2 echo "" 1>&2 exit fi # elif [ "$1" = "--enrich" ]; then # enrich=1 # enrichseq=1 # enrichstr=1 # seektarget="" # elif [ "$1" = "--enrichseq" ]; then # enrich=1 # enrichseq=1 # enrichstr=0 # seektarget="-seq" # elif [ "$1" = "--enrichstr" ]; then # enrich=1 # enrichseq=0 # enrichstr=1 # seektarget="-str" elif [ "$1" = "--dash" ]; then newdash=1 if [ "$distance" != "local" -a "$distance" != "localgenaf" ]; then # 2021/Oct distance="global" fi if [ "$iterate" -eq 0 ]; then # 2021/Oct iterate=3 fi elif [ "$1" = "--dashserver" ]; then shift dashserver="$1" elif [ "$1" = "--originalseqonly" ]; then newdash_originalsequenceonly=1 elif [ "$1" = "--excludehomologs" ]; then # works with --dash only exclude_ho=1 elif [ "$1" = "--seedtable" ]; then shift seedtable="y" seedtablefile="$1" elif [ "$1" = "--seed" ]; then shift seed="m" seedfiles="$seedfiles $1" elif [ "$1" = "--minimumweight" ]; then shift minimumweight="$1" elif [ "$1" = "--similaritylevel" ]; then shift similarityoffset="$1" elif [ "$1" = "--unalignlevel" ]; then shift unalignlevel="$1" unalignspecified=1 elif [ "$1" = "--skipiterate" ]; then shift fixthreshold="$1" elif [ "$1" = "--bunkatsunashi" ]; then bunkatsuopt=" -B " elif [ "$1" = "--sp" ]; then sprigorous=1 elif [ "$1" = "--focus" ]; then focusarg=" -= " elif [ "$1" = "--lhlimit" ]; then # atode namae henkou shift lhlimit=" -q $1 " elif [ "$1" = "--sparsepickup" ]; then shift npickup="$1" elif [ $progname = "fftns" -o $progname = "nwns" ]; then if [ "$1" -gt 0 ]; then cycle=`expr "$1" - 0` fi else echo "Unknown option: $1" 1>&2 er=1; # exit 1; fi shift done; # echo "" 1>"$progressfile" if [ $treeext = "memsavetree" ] || [ $treeext = "stepadd" ]; then if [ $distance != "ktuples" ]; then # auto -> memsave && globalpair ha erabarenai node, # ~/maffttmp wo tsukau noha meijiteki ni shitei saretatoki dake. if [ ! "$MAFFT_TMPDIR" ]; then # space is acceptable, 2018/Mar/17 MAFFT_TMPDIR="$HOME/maffttmp" mkdir -p "$MAFFT_TMPDIR" || exit fi fi fi if [ ! "$MAFFT_TMPDIR" ]; then MAFFT_TMPDIR="$TMPDIR" fi TMPFILE=`env TMPDIR="$MAFFT_TMPDIR" mktemp -dt "$progname.XXXXXXXXXX"` if [ $? -ne 0 ]; then echo "mktemp seems to be obsolete. Re-trying without -t" 1>&2 mkdir -p "$MAFFT_TMPDIR/tmp" 1>&2 TMPFILE=`mktemp -d "$MAFFT_TMPDIR/tmp/$progname.XXXXXXXXXX"` fi # lfs getstripe $TMPFILE 2>/dev/null && lfs setstripe -c 1 $TMPFILE # 2017/Oct lfs getstripe "$TMPFILE" > /dev/null 2>&1 && lfs setstripe -c 1 "$TMPFILE" > /dev/null 2>&1 # 2018/Feb if [ $os = "cygwin" ]; then TMPFILE=`cygpath -w "$TMPFILE"` # necessary to pass path to f2cl on cyswin, somehow unnecessary in msys. fi # umask 077 # 2021/Jan # mkdir "$TMPFILE" || er=1 function removetmpfile() { # for MPI while true do rm -rf "$TMPFILE" && break echo Retrying to remove "$TMPFILE". It may take several seconds. 1>&2 sleep 2 done } if [ $debug -eq 1 ]; then # trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys # trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0 15 trap "popd > /dev/null 2>&1; tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 else # trap "rm -rf $TMPFILE" 0 15 trap "removetmpfile" 0 15 fi if [ $# -eq 1 ]; then if [ -r "$1" -o "$1" = - ]; then if [ -r "$addfile" ]; then printf ''; else echo "$0": Cannot open "$addfile". 1>&2 echo "" 1>&2 exit 1; fi cat "$1" | tr "\r" "\n" > "$TMPFILE/infile" echo "" >> "$TMPFILE/infile" cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile" if [ $dofilter -eq 1 ]; then # mv "$TMPFILE/infile" "$TMPFILE/_tofilter" # "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1 mv "$TMPFILE/_addfile" "$TMPFILE/_tofilter" "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/_addfile" 2>>"$progressfile" || exit 1 fi cat "$TMPFILE/_addfile" >> "$TMPFILE/infile" cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx" cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable" cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree" cat "$codonposfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_codonpos" cat "$codonscorefile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_codonscore" cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_seedtablefile" cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_lara.params" cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/pdblist" cat "$ownlist" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/ownlist" cat "$anchorfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_externalanchors" SAVEIFS=$IFS # Fixed a bug: 'n' was misinterpreted as delimiter, 2020/Jun/19 IFS=$'\n' filelist="$1 $addfile $scorematrix $mergetable $treeinfile $codonposfile $codonscorefile $seedtablefile $laraparams $pdblist $ownlist" for f in $filelist; do file "$f" 2>/dev/null | grep -e 'UTF-16' -e 'UTF-32' >& /dev/null && printf "$f: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; done IFS=$SAVEIFS # echo $seedfiles infilename="$1" seedfilesintmp="/dev/null" seednseq="0" set $seedfiles > /dev/null while [ $# -gt 1 ]; do shift if [ -r "$1" ]; then cat "$1" | tr "\r" "\n" > "$TMPFILE/seed$#" file "$1" | grep -e 'UTF-16' -e 'UTF-32' >& /dev/null && printf "$1: UTF-16 or UTF-32? Convert this file to ASCII\n\n" 1>&2 && exit 1; else echo "$0": Cannot open "$1". 1>&2 echo "" 1>&2 exit 1; fi seednseq=$seednseq" "`grep -c '^[>|=]' "$TMPFILE/seed$#"` seedfilesintmp=$seedfilesintmp" "seed$# done # ls $TMPFILE # echo $seedfilesintmp # echo $seednseq else echo "$0": Cannot open "$1". 1>&2 echo "" 1>&2 er=1 # exit 1; fi else # echo '$#'"=$#" 1>&2 er=1 fi if [ $numthreads -lt 0 ]; then if [ $os = "linux" ]; then nlogicalcore=`cat /proc/cpuinfo | grep "^processor" | uniq | wc -l` ncoresinacpu=`cat /proc/cpuinfo | grep 'cpu cores' | uniq | awk '{print $4}'` nphysicalcpu=`cat /proc/cpuinfo | grep 'physical id' | sort | uniq | wc -l` if [ $nlogicalcore -eq 0 ]; then echo "Cannot get the number of processors from /proc/cpuinfo" 1>>"$progressfile" exit 1 fi if [ ${#ncoresinacpu} -gt 0 -a $nphysicalcpu -gt 0 ]; then numthreads=`expr $ncoresinacpu '*' $nphysicalcpu` # if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading # numthreads=`expr $numthreads '+' 1` # fi else numthreads=$nlogicalcore fi elif [ $os = "darwin" ]; then numthreads=`sysctl -n hw.physicalcpu` if [ -z $numthreads ]; then echo "Cannot get the number of physical cores from sysctl" 1>>"$progressfile" exit 1 fi # nlogicalcore=`sysctl -n hw.logicalcpu` # if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading # numthreads=`expr $numthreads '+' 1` # fi elif [ "$windows" = "yes" ]; then # numthreads=`wmic cpu get NumberOfCores | head -2 | tail -1 | awk '{print $1}'` numthreads=`wmic cpu get NumberOfCores | awk 'BEGIN{n=0} {n+=$1} END{print n}'` else echo "Cannot count the number of physical cores." 1>>"$progressfile" exit 1 fi echo "OS = "$os 1>>"$progressfile" echo "The number of physical cores = " $numthreads 1>>"$progressfile" fi if [ $numthreadstb -lt 0 ]; then if [ $numthreads -lt 16 -o "$mpiscript" != "/dev/null" ]; then # mpi: museigen, multithread: 16 made # if [ $numthreads -lt 31 ]; then numthreadstb=$numthreads else numthreadstb=16 fi fi if [ $numthreadsit -lt 0 ]; then if [ $numthreads -lt 8 ]; then numthreadsit=$numthreads else numthreadsit=8 fi fi if [ $numthreadsit -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then echo 'Impossible' 1>&2; exit 1; fi if [ "$addarg0" != " " ]; then # iterate=0 # 2013/03/23 -> commented out, 2017/12 "$prefix/countlen" < "$TMPFILE/_addfile" > "$TMPFILE/addsize" 2>>"$progressfile" nadd=`awk '{print $1}' "$TMPFILE/addsize"` if [ $nadd -eq "0" ]; then echo Check $addfile 1>&2 exit 1; fi if [ $seed != "x" -o $seedtable != "x" ]; then echo 'Impossible' 1>&2; echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2 exit 1; fi else nadd="0" fi if [ $auto -eq 1 ]; then "$prefix/countlen" < "$TMPFILE/infile" > "$TMPFILE/size" 2>>"$progressfile" nseq=`awk '{print $1}' "$TMPFILE/size"` nlen=`awk '{print $3}' "$TMPFILE/size"` if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then distance="local" iterate=1000 cycle=1 treeext="none" elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then distance="local" iterate=2 cycle=1 treeext="none" elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then distance="ktuples" iterate=2 cycle=2 treeext="none" elif [ $nseq -lt 20000 ]; then # changed from 10000 2014/Oct/4 distance="ktuples" iterate=0 cycle=2 treeext="none" elif [ $nseq -lt 100000 ]; then # changed from 50000 2017/Nov/24 distance="ktuples" iterate=0 cycle=2 if [ $fragment -eq 0 -a "$mergetable" = "/dev/null" ]; then treeext="memsavetree" fi elif [ $nseq -lt 200000 ]; then # changed from 90000 2017/Nov/24 distance="ktuples" iterate=0 cycle=1 if [ $fragment -eq 0 -a "$mergetable" = "/dev/null" ]; then treeext="memsavetree" fi elif [ $nlen -lt 3000 ]; then distance="parttree" partdist="localalign" algopt=" " algoptit=" " # algspecified=1 cycle=1 else distance="parttree" partdist="ktuples" algopt=" " algoptit=" " # algspecified=1 cycle=1 fi # if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then # distance="local" # iterate=1000 # cycle=1 # elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then # distance="local" # iterate=2 # cycle=1 # elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then # distance="ktuples" # iterate=2 # cycle=2 # elif [ $nseq -lt 200000 ]; then # distance="ktuples" # iterate=0 # treeinopt=" -U " # treein=1 # pileuporshuffle="a" # elif [ $nlen -lt 3000 ]; then # distance="parttree" # partdist="localalign" # algopt=" " # algoptit=" " ## algspecified=1 # cycle=1 # else # distance="parttree" # partdist="ktuples" # algopt=" " # algoptit=" " ## algspecified=1 # cycle=1 # fi if [ $fragment -ne 0 ]; then norg=`expr $nseq '-' $nadd` npair=`expr $norg '*' $nadd` echo "nadd = " $nadd 1>>"$progressfile" echo "npair = " $npair 1>>"$progressfile" echo "nseq = " $nseq 1>>"$progressfile" echo "nlen = " $nlen 1>>"$progressfile" if [ $norg -eq 0 ]; then echo "" 1>>"$progressfile" echo "The reference sequence was removed because of ambiguous letters?" 1>>"$progressfile" echo "" 1>>"$progressfile" exit 1; fi # nagasa check! # if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct if [ $npair -gt 10000000 -o $nlen -gt 5000 -o $nadd -gt 500000 ]; then # 2021/Dec pairlocalalign to buntan distance="ktuples" echo "use ktuples, size=$tuplesize!" 1>>"$progressfile" # elif [ $npair -gt 3000000 -o $nlen -gt 5000 ]; then # 2017/Oct elif [ $npair -gt 3000000 -o $nlen -gt 5000 ]; then # 2017/Oct distance="multi" weighti="0.0" echo "use multipair, weighti=0.0!" 1>>"$progressfile" else distance="multi" echo "use multipair, weighti=$weighti!" 1>>"$progressfile" fi pairspecified=1 fi fi if [ `awk "BEGIN {print( 0.0+\"$sueff\" < 0.0 || 0.0+\"$sueff\" > 1.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "The argument of --mixedlinkage must be between 0.0 and 1.0" 1>>"$progressfile" exit 1; fi if [ `awk "BEGIN {print( 0.0+\"$maxambiguous\" < 0.0 || 0.0+\"$maxambiguous\" > 1.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "The argument of --maxambiguous must be between 0.0 and 1.0" 1>>"$progressfile" exit 1; fi if [ $allowshift -eq 1 ]; then if [ $unalignspecified -ne 1 ]; then unalignlevel="0.8" fi if [ $shiftpenaltyspecified -ne 1 ]; then spfactor="2.00" fi fi if [ $opdistspecified -ne 1 ]; then gopdist=$gop fi if [ $unalignlevel != "0.0" -o `awk "BEGIN {print( 0.0+\"$spfactor\" < 100.0 )}"` -gt 0 ]; then nmodel=" -: " termgapopt=" " if [ $distance = "localgenaf" ]; then printf "\n%s\n" "The combination of --allowshift and --genafpair (E-INS-i/-1) is not supported." 1>>"$progressfile" printf "%s\n" "Instead, please try --allowshift --globalpair (G-INS-i/-1 in the web version)," 1>>"$progressfile" printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i/-1), too." 1>>"$progressfile" exit 1; fi if [ $distance != "global" -o `awk "BEGIN {print( 0.0+\"$weighti\" < 1.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is supported only with the --globalpair option." 1>>"$progressfile" exit 1; fi if [ $fragment -ne 0 ]; then printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is not supported with the --addfragments option." 1>>"$progressfile" exit 1; fi fi if [ `awk "BEGIN {print( 0.0+\"$spfactor\" < 1.0 )}"` -gt 0 ]; then printf "\n%s\n" "shiftpenalty must be >1." 1>>"$progressfile" exit 1; fi if [ `awk "BEGIN {print( 0.0+\"$fixthreshold\" < 0.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "The 'fix' parameter must be >= 0.0" 1>>"$progressfile" exit 1; fi if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" < 0.0 || 0.0+\"$unalignlevel\" > 1.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "The 'unalignlevel' parameter must be between 0.0 and 1.0" 1>>"$progressfile" exit 1; fi if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" > 0.0 )}"` -gt 0 ]; then laof="0" lexp="0" pgaof="0" pgexp="0" LEXP="0" GEXP="0" termgapopt=" " # if [ $auto -eq 1 -o $fragment -ne 0 -o $iterate -gt 0 ]; then if [ $fragment -ne 0 ]; then printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported with the --addfragments option." 1>>"$progressfile" exit 1; fi if [ $distance = "parttree" ]; then printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported in the (dp)parttree option." 1>>"$progressfile" exit 1; fi if [ $distance = "localgenaf" ]; then printf "\n%s\n" "The --genafpair is not supported in the 'unalignlevel > 0' mode." 1>>"$progressfile" printf "%s\n" "Instead, please try --unalignlevel xx --globalpair," 1>>"$progressfile" printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i), too." 1>>"$progressfile" exit 1; fi # if [ $distance != "ktuples" -a `awk "BEGIN {print( 0.0+\"$weighti\" > 0.0 )}"` -gt 0 -a $iterate -gt 0 ]; then # printf "\n%s\n\n" "Please add --weighti 0.0, for now." 1>>"$progressfile" # exit 1; # fi fi if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" != 0.0 && 0.0+\"$unalignlevel\" != 0.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "Do not simultaneously specify --similaritylevel and --unalignlevel" 1>>"$progressfile" exit 1; fi if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" < -1.0 || 0.0+\"$similarityoffset\" > 1.0 )}"` -gt 0 ]; then printf "\n%s\n\n" "Similarity must be between -1.0 and +1.0" 1>>"$progressfile" exit 1; fi aof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $aof}"` laof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $laof}"` pgaof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $pgaof}"` if [ $parallelizationstrategy = "BESTFIRST" -o $parallelizationstrategy = "BAATARI0" ]; then iteratelimit=254 else iteratelimit=16 fi if [ $iterate -gt $iteratelimit ]; then #?? iterate=$iteratelimit fi if [ $rnaalifold -eq 1 ]; then rnaopt=" -e $rgep -o $rgop -c $weightm -r $weightr -R $rnascoremtx " # rnaoptit=" -o $rgop -BT -c $weightm -r $weightr -R " rnaoptit=" -o $rgop -F -c $weightm -r $weightr -R " elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $contrafold -eq 1 ]; then rnaopt=" -o $rgop -c $weightm -r $weightr " # rnaoptit=" -e $rgep -o $rgop -BT -c $weightm -r $weightr $rnascoremtx " rnaoptit=" -e $rgep -o $rgop -F -c $weightm -r $weightr $rnascoremtx " else rnaopt=" " rnaoptit=" -F " fi # if [ $algspecified -eq 0 ]; then # if [ $distance = "parttree" ]; then # algopt=" -Q " # algoptit=" " # else # algopt=" " # algoptit=" " # fi # fi if [ $sprigorous -eq 1 ]; then algopt=" -@ " if [ $iterate -gt 0 ]; then if [ $numthreadsit -eq 0 ]; then algoptit=" -@ -B -Z -z 1000 " else echo "" 1>>"$progressfile" echo "At present, the combination of --sp and iterative refinement is supported only in a single thread." 1>>"$progressfile" echo "Please try \"--thread -1 --threadit 0\", which runs the iterative refinment calculation on a single thread." 1>>"$progressfile" echo "" 1>>"$progressfile" exit 1; # algoptit=" -@ -B -z 1000 " fi fi termgapopt=" " fft=0 memopt=" -N " fi model="$sbstmodel $kappa $fmodel $nmodel" if [ $er -eq 1 ]; then echo "------------------------------------------------------------------------------" 1>&2 echo " MAFFT" $version 1>&2 # echo "" 1>&2 # echo " Input format: fasta" 1>&2 # echo "" 1>&2 # echo " Usage: `basename $0` [options] inputfile > outputfile" 1>&2 echo " https://mafft.cbrc.jp/alignment/software/" 1>&2 echo " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)" 1>&2 # echo "------------------------------------------------------------------------------" 1>&2 # echo " % mafft in > out" 1>&2 echo "------------------------------------------------------------------------------" 1>&2 # echo "" 1>&2 echo "High speed:" 1>&2 echo " % mafft in > out" 1>&2 echo " % mafft --retree 1 in > out (fast)" 1>&2 echo "" 1>&2 echo "High accuracy (for <~200 sequences x <~2,000 aa/nt):" 1>&2 echo " % mafft --maxiterate 1000 --localpair in > out (% linsi in > out is also ok)" 1>&2 echo " % mafft --maxiterate 1000 --genafpair in > out (% einsi in > out)" 1>&2 echo " % mafft --maxiterate 1000 --globalpair in > out (% ginsi in > out)" 1>&2 echo "" 1>&2 echo "If unsure which option to use:" 1>&2 echo " % mafft --auto in > out" 1>&2 echo "" 1>&2 # echo "Other options:" 1>&2 echo "--op # : Gap opening penalty, default: 1.53" 1>&2 echo "--ep # : Offset (works like gap extension penalty), default: 0.0" 1>&2 echo "--maxiterate # : Maximum number of iterative refinement, default: 0" 1>&2 echo "--clustalout : Output: clustal format, default: fasta" 1>&2 echo "--reorder : Outorder: aligned, default: input order" 1>&2 echo "--quiet : Do not report progress" 1>&2 echo "--thread # : Number of threads (if unsure, --thread -1)" 1>&2 echo "--dash : Add structural information (Rozewicki et al, submitted)" 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 --localpair in > out (L-INS-i)" 1>&2 # echo " most accurate in many cases, assumes only one alignable domain" 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 --genafpair in > out (E-INS-i)" 1>&2 # echo " works well if many unalignable residues exist between alignable domains" 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 --globalpair in > out (G-INS-i)" 1>&2 # echo " suitable for globally alignable sequences " 1>&2 # echo "" 1>&2 # echo " % mafft --maxiterate 1000 in > out (FFT-NS-i)" 1>&2 # echo " accurate and slow, iterative refinement method " 1>&2 # echo "" 1>&2 # echo "If the input sequences are long (~1,000,000nt)," 1>&2 # echo " % mafft --retree 1 --memsave --fft in > out (FFT-NS-1-memsave, new in v5.8)" 1>&2 # echo "" 1>&2 # echo "If many (~5,000) sequences are to be aligned," 1>&2 # echo "" 1>&2 # echo " % mafft --retree 1 [--memsave] --nofft in > out (NW-NS-1, new in v5.8)" 1>&2 # echo "" 1>&2 # echo " --localpair : All pairwise local alignment information is included" 1>&2 # echo " to the objective function, default: off" 1>&2 # echo " --globalpair : All pairwise global alignment information is included" 1>&2 # echo " to the objective function, default: off" 1>&2 # echo " --op # : Gap opening penalty, default: $defaultgop " 1>&2 # echo " --ep # : Offset (works like gap extension penalty), default: $defaultaof " 1>&2 # echo " --bl #, --jtt # : Scoring matrix, default: BLOSUM62" 1>&2 # echo " Alternatives are BLOSUM (--bl) 30, 45, 62, 80, " 1>&2 # echo " or JTT (--jtt) # PAM. " 1>&2 # echo " --nuc or --amino : Sequence type, default: auto" 1>&2 # echo " --retree # : The number of tree building in progressive method " 1>&2 # echo " (see the paper for detail), default: $defaultcycle " 1>&2 # echo " --maxiterate # : Maximum number of iterative refinement, default: $defaultiterate " 1>&2 # if [ $defaultfft -eq 1 ]; then # echo " --fft or --nofft: FFT is enabled or disabled, default: enabled" 1>&2 # else # echo " --fft or --nofft: FFT is enabled or disabled, default: disabled" 1>&2 # fi # echo " --memsave: Memory saving mode" 1>&2 # echo " (for long genomic sequences), default: off" 1>&2 # echo " --clustalout : Output: clustal format, default: fasta" 1>&2 # echo " --reorder : Outorder: aligned, default: input order" 1>&2 # echo " --quiet : Do not report progress" 1>&2 # echo "-----------------------------------------------------------------------------" 1>&2 exit 1; fi if [ $sw -eq 1 ]; then swopt=" -A " else swopt=" " fi if [ $distance = "fasta" -o $partdist = "fasta" ]; then if [ ! "$FASTA_4_MAFFT" ]; then FASTA_4_MAFFT=`which fasta34` fi if [ ! -x "$FASTA_4_MAFFT" ]; then echo "" 1>&2 echo "== Install FASTA ========================================================" 1>&2 echo "This option requires the fasta34 program (FASTA version x.xx or higher)" 1>&2 echo "installed in your PATH. If you have the fasta34 program but have renamed" 1>&2 echo "(like /usr/local/bin/myfasta), set the FASTA_4_MAFFT environment variable" 1>&2 echo "to point your fasta34 (like setenv FASTA_4_MAFFT /usr/local/bin/myfasta)." 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "last" -o $distance = "lastmulti" ]; then if [ ! -x "$prefix/lastal" -o ! -x "$prefix/lastdb" ]; then echo "" 1>&2 echo "== Install LAST ============================================================" 1>&2 echo "LAST (Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487) is required." 1>&2 echo "http://last.cbrc.jp/" 1>&2 echo "http://mafft.cbrc.jp/alignment/software/xxxxxxx.html " 1>&2 echo "============================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "lara" -o $distance = "slara" ]; then if [ ! -x "$prefix/mafft_lara" ]; then echo "" 1>&2 echo "== Install LaRA =========================================================" 1>&2 echo "This option requires LaRA (Bauer et al. http://www.planet-lisa.net/)." 1>&2 echo "The executable have to be renamed to 'mafft_lara' and installed into " 1>&2 echo "the $prefix directory. " 1>&2 echo "A configuration file of LaRA also have to be given" 1>&2 echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2 echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi if [ ! -s "$laraparams" ]; then echo "" 1>&2 echo "== Configure LaRA =======================================================" 1>&2 echo "A configuration file of LaRA have to be given" 1>&2 echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2 echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then if [ ! -x "$prefix/foldalign210" ]; then echo "" 1>&2 echo "== Install FOLDALIGN ====================================================" 1>&2 echo "This option requires FOLDALIGN (Havgaard et al. http://foldalign.ku.dk/)." 1>&2 echo "The executable have to be renamed to 'foldalign210' and installed into " 1>&2 echo "the $prefix directory. " 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "scarna" -o $mccaskill -eq 1 ]; then if [ ! -x "$prefix/mxscarnamod" ]; then echo "" 1>&2 echo "== Install MXSCARNA ======================================================" 1>&2 echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2 echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2 echo "which contains the modified version of MXSCARNA." 1>&2 echo "http://mafft.cbrc.jp/alignment/software/source.html " 1>&2 echo "==========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $distance = "dafs" -o $dafs -eq 1 ]; then if [ ! -x "$prefix/dafs" ]; then echo "" 1>&2 echo "== Install DAFS===========================================================" 1>&2 echo "DAFS (Sato et al. Journal 2012 issue:page) is required." 1>&2 echo "http://www.ncrna.org/ " 1>&2 echo "==========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi if [ $contrafold -eq 1 ]; then if [ ! -x "$prefix/contrafold" ]; then echo "" 1>&2 echo "== Install CONTRAfold ===================================================" 1>&2 echo "This option requires CONTRAfold" 1>&2 echo "(Do et al. http://contra.stanford.edu/contrafold/)." 1>&2 echo "The executable 'contrafold' have to be installed into " 1>&2 echo "the $prefix directory. " 1>&2 echo "=========================================================================" 1>&2 echo "" 1>&2 exit 1 fi fi #old # if [ $treeout -eq 1 ]; then # parttreeoutopt="-t" # if [ $cycle -eq 0 ]; then # treeoutopt="-t -T" # groupsize=1 # iterate=0 # if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # distance="distonly" # fi # else # treeoutopt="-t" # fi # else # parttreeoutopt=" " # if [ $cycle -eq 0 ]; then # treeoutopt="-t -T" # iterate=0 # if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # distance="distonly" # fi # else # treeoutopt=" " # fi # fi #new if [ $cycle -eq 0 ]; then if [ $nodeout -eq 1 ]; then treeoutopt="-^ -T" else treeoutopt="-t -T" fi iterate=0 weighti="0.0" # 2016Jul31, tbfast.c kara idou # if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # 2012/04, localpair --> local alignment distance # if [ $distance = "global" ]; then # distance="distonly" # fi if [ $treeout -eq 1 ]; then parttreeoutopt="-t" groupsize=1 else parttreeoutopt=" " fi if [ $distout -eq 1 ]; then distoutopt="-y -T" if [ $treeout -eq 0 ]; then treeoutopt="" fi fi else if [ $nodeout -eq 1 ]; then if [ $iterate -gt 0 ]; then echo "The --nodeout option supports only progressive method (--maxiterate 0) for now." 1>&2 exit 1 fi parttreeoutopt="-t" treeoutopt="-^" elif [ $treeout -eq 1 ]; then parttreeoutopt="-t" treeoutopt="-t" else parttreeoutopt=" " treeoutopt=" " fi if [ $distout -eq 1 ]; then distoutopt="-y" fi fi # formatcheck=`grep -c '^[[:blank:]]\+>' "$TMPFILE/infile" | head -1 ` if [ $formatcheck -gt 0 ]; then echo "The first character of a description line must be " 1>&2 echo "the greater-than (>) symbol, not a blank." 1>&2 echo "Please check the format around the following line(s):" 1>&2 grep -n '^[[:blank:]]\+>' "$TMPFILE/infile" 1>&2 exit 1 fi nseq=`grep -c '^[>|=]' "$TMPFILE/infile" | head -1 ` if [ $nseq -eq 2 ]; then cycle=1 fi if [ $cycle -gt 3 ]; then cycle=3 fi if [ $nseq -gt 60000 -a $iterate -gt 1 ]; then # 2014/Oct/22, test echo "Too many sequences to perform iterative refinement!" 1>&2 echo "Please use a progressive method." 1>&2 exit 1 fi if [ $distance = "lastmulti" -o $distance = "multi" ]; then if [ $fragment -eq 0 ]; then echo 'Specify --addfragments too' 1>&2 exit 1 fi fi if [ $fragment -ne 0 ]; then if [ $pairspecified -eq 0 ]; then distance="multi" fi if [ $distance != "multi" -a $distance != "hybrid" -a $distance != "lastmulti" -a $distance != "local" -a $distance != "last" -a $distance != "ktuples" -a $distance != "ktuplesmulti" ]; then echo 'Specify --multipair, --lastmultipair, --lastpair, --localpair, --6merpair, --6mermultipair or --hybridpair' 1>&2 exit 1 fi fi if [ "$memopt" = " -M -B " -a "$distance" != "ktuples" ]; then echo "Impossible" 1>&2 exit 1 fi if [ $distance = "parttree" ]; then if [ "$mergetable" != "/dev/null" ]; then echo "The combination of (dp)parttree and merge is Impossible. " 1>&2 exit 1 fi if [ $addfile != "/dev/null" ]; then echo "The combination of (dp)parttree and add(fragments) is Impossible. " 1>&2 exit 1 fi if [ $seed != "x" -o $seedtable != "x" ]; then echo "Impossible" 1>&2 exit 1 fi if [ $iterate -gt 1 ]; then echo "Impossible" 1>&2 exit 1 fi if [ $outorder = "aligned" ]; then outorder="input" fi outorder="input" # partorder ga kiku if [ $partdist = "localalign" ]; then splitopt=" -U " # -U -l -> fast cycle=1 elif [ $partdist = "fasta" ]; then splitopt=" -S " cycle=1 else splitopt=" " fi fi if [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed = "x" -a $seedtable = "x" -a $ownlist = "/dev/null" -a $pdblist = "/dev/null" -a $enrichstr -eq 0 -a $newdash -eq 0 \) ]; then localparam="" weighti="0.0" elif [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed != "x" -o $seedtable != "x" -o $ownlist != "/dev/null" -o $pdblist != "/dev/null" -o $enrichstr -eq 1 -o $newdash -eq 1 \) ]; then if [ $cycle -lt 2 ]; then cycle=2 # disttbfast ha seed hi-taiou # chuui 2014Aug21 fi if [ $iterate -lt 2 ]; then echo "############################################################################" 1>&2 echo "# Warning:" 1>&2 echo "# Progressive alignment method is incompatible with the --seed option." 1>&2 echo "# Automatically switched to the iterative refinement method." 1>&2 echo "# " 1>&2 echo "# Also consider using the '--add' option, which is compatible with" 1>&2 echo "# the progressive method and FASTER than the '--seed' option." 1>&2 echo "# Usage is:" 1>&2 echo "# % mafft --add newSequences existingAlignment > output" 1>&2 echo "############################################################################" 1>&2 iterate=2 fi localparam="-l "$weighti elif [ $distance = "parttree" ]; then localparam="" weighti="0.0" if [ $groupsize -gt -1 ]; then cycle=1 fi else # localparam="-B -l "$weighti # weighti=0 demo bunkatsu nashi localparam="-l "$weighti # -B (bunkatsunashi) ha dvtditr.c de taiou (17/Jan/15) if [ $cycle -gt 1 ]; then # 09/01/08 cycle=1 fi fi if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then aof="0.000" if [ $oldgenafparam -ne 1 ]; then laof="0.0" lexp="0.0" # LEXP="0.0" # default = 0.0 usenaivepairscore="-Z" fi fi # if [ $nseq -gt 5000 ]; then # fft=0 # fi if [ $forcefft -eq 1 ]; then param_fft=" -G " fft=1 elif [ $fft -eq 1 ]; then param_fft=" -F " else param_fft=" " fi if [ $seed != "x" -a $seedtable != "x" ]; then echo 'Use either one of seedtable and seed. Not both.' 1>&2 exit 1 fi if [ $f2clext = "-E" -a $anysymbol -gt 0 ]; then echo '' 1>&2 echo 'The combination of --text and ( --anysymbol or --preservecase ) is impossible.' 1>&2 echo '' 1>&2 exit 1 fi # if [ $f2clext = "-E" -a $scorematrix != "/dev/null" ]; then # echo '' 1>&2 # echo 'At present, the combination of --text and (--aamatrix) is impossible.' 1>&2 # echo '' 1>&2 # exit 1 # fi memsavetree=0 if [ $treeext != "none" ]; then if [ $distance == "ktuples" ]; then treein=1 treeinopt=" -U " if [ $treeext == "randomchain" ]; then echo "shuffle $randomseed" > "$TMPFILE/_guidetree" cycle=1 # disttbfast.c dem shitei elif [ $treeext == "pileup" ]; then echo "pileup" > "$TMPFILE/_guidetree" cycle=1 # disttbfast. shitei elif [ $treeext == "memsavetree" ]; then echo "very compact" > "$TMPFILE/_guidetree" memsavetree=1 elif [ $treeext == "memsavetreex" ]; then echo "compact " "$initialramusage" > "$TMPFILE/_guidetree" memsavetree=1 elif [ $treeext == "stepadd" ]; then echo "stepadd" > "$TMPFILE/_guidetree" memsavetree=1 elif [ $treeext == "youngestlinkage" ]; then echo "youngestlinkage" > "$TMPFILE/_guidetree" memsavetree=1 else echo "error in mafft.tmpl" exit fi else # globalpair, localpair, genafpair, oldgenafpair # treein, treeinopt ha kimaranai if [ $treeext == "memsavetree" -o $treeext == "stepadd" ]; then memsavetree=1 else echo "With globalpair, localpair or genafpair," 1>>"$progressfile" echo "Use --large, --minimumlinkage, --averagelinkage or --mixedlinkage." 1>>"$progressfile" echo "--$treeext is supported only with --6merpair." 1>>"$progressfile" echo '' 1>>"$progressfile" exit fi fi if [ $iterate -gt 0 ]; then echo 'Iterative refinment is not supported for --large or --'$treeext 1>>"$progressfile" echo '' 1>>"$progressfile" exit 1 fi if [ $fragment -ne 0 ]; then echo '--addfragments, --addfull or --addlong is not yet supported for --large or --'$treeext 1>>"$progressfile" echo "Use --add newsequences --$treeext" 1>>"$progressfile" echo "Or, --addfragments (long, full) newsequences, without --"$treeext 1>>"$progressfile" echo '' 1>>"$progressfile" exit 1 fi if [ "$mergetable" != "/dev/null" ]; then # 2018/Mar/2 echo '--merge is not yet supported for --large or --'$treeext 1>>"$progressfile" echo "Use --merge without --"$treeext 1>>"$progressfile" echo '' 1>>"$progressfile" exit 1 fi fi if [ $nadd -gt "0" ]; then if [ $fragment -eq "1" ]; then addarg="$addarg0 $nadd -g -0.01" addsinglearg="" cycle=1 # chuui 2014Aug25 iterate=0 elif [ $fragment -eq "-1" ]; then addarg="$addarg0 $nadd" addsinglearg="-V" # allowlongadds, 2014/04/02 cycle=1 # chuui 2014Aug25 iterate=0 elif [ $fragment -eq "-2" ]; then addarg="$addarg0 $nadd" addsinglearg="-V" # allowlongadds + smoothing add2ndhalfarg=$add2ndhalfarg" -p " cycle=1 # chuui 2014Aug25 usenaivepairscore="-Z" # 2015Jun01 laof=0.0 # 2015Jun01 lexp=0.0 # 2015Jun01 iterate=0 elif [ $fragment -eq "-3" ]; then addarg="$addarg0 $nadd" addsinglearg="-x" # add to top, 2021/12/31 cycle=1 # chuui 2014Aug25 iterate=0 else addarg="$addarg0 $nadd" addsinglearg="" # iterate=1 # iterate ha shitei dori bunkatsuopt=" -B " # fftnsi demo bunktasu shinai if [ "$add2ndhalfarg" != " " ]; then if [ $auto -eq 1 -o $iterate -gt 0 ]; then # echo '' 1>>"$progressfile" # echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" # echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" # echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" # echo '' 1>>"$progressfile" # exit 1 iterate=0 fi fi fi # cycle=1 # chuui 2014Aug19 # iterate=0 # treealg=" -q " ## 2012/01/24 ## removed 2012/02/06 else if [ "$add2ndhalfarg" != " " ]; then echo '' 1>>"$progressfile" echo 'The --keeplength and --mapout options are supported' 1>>"$progressfile" echo 'only with --add, --addfragments or --addlong.' 1>>"$progressfile" echo '' 1>>"$progressfile" exit 1 fi fi if [ "$codonposfile" != "/dev/null" -o "$codonscorefile" != "/dev/null" ]; then if [ $nadd -eq "0" -o $fragment -eq "0" ]; then echo '' 1>>"$progressfile" echo "'--codonpos' and '--codonscore' options are supported only with the '--6merpair --addfragments' option." 1>>"$progressfile" echo '' 1>>"$progressfile" exit 1 fi if [ $distance != "ktuples" ]; then # ato de taiou echo '' 1>>"$progressfile" echo "'--codonpos' and '--codonscore' options are supported only with the '--6merpair --addfragments' option, at this point." 1>>"$progressfile" echo '' 1>>"$progressfile" exit 1 fi fi if [ -z "$localparam" -a $fragment -eq 0 -a $distance != "parttree" ]; then # echo "use disttbfast" # echo cycle = $cycle cycletbfast=1 # tbfast wo jikkou shinai cycledisttbfast=$cycle # disttbfast ni -E cycle wo watasu if [ $cycledisttbfast -eq 0 ]; then # --treeout de tsukau cycledisttbfast=1 fi else # echo "use tbfast" # echo cycle = $cycle cycletbfast=$cycle # 1 ijou nara jikkou cycledisttbfast=1 # disttbfast ha ikkai dake fi # echo localparam= # echo $localparam # echo cycletbfast= # echo $cycletbfast # echo cycledisttbfast= # echo $cycledisttbfast #exit if [ $adjustdirection -gt 0 -a $seed != "x" ]; then echo '' 1>&2 echo 'The combination of --adjustdirection(accurately) and --seed is not supported.' 1>&2 echo '' 1>&2 exit 1 fi if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then if [ $distance = "ktuples" ]; then echo 'Not supported.' 1>&2 echo 'Please add --globalpair, --localpair, --scarnapair, --dafspair' 1>&2 echo '--larapair, --slarapair, --foldalignlocalpair or --foldalignglobalpair' 1>&2 exit 1 fi if [ $f2clext = "-E" ]; then echo '' 1>&2 echo 'For RNA alignment, the --text mode is impossible.' 1>&2 echo '' 1>&2 exit 1 fi fi # cycle ga atode henkou sareru node koko de strategy no namae wo kimeru. # kokokara if [ $treeext = "pileup" ]; then strategy="Pileup-" elif [ $treeext = "randomchain" ]; then strategy="Randomchain-" elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then if [ $distance = "scarna" -o $distance = "dafs" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then strategy="X-" elif [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o "globalgenaf" ]; then strategy="Q-" fi elif [ $distance = "fasta" -a $sw -eq 0 ]; then strategy="F-" elif [ $distance = "fasta" -a $sw -eq 1 ]; then strategy="H-" elif [ $distance = "blast" ]; then strategy="B-" elif [ $distance = "global" -o $distance = "distonly" ]; then strategy="G-" elif [ $distance = "local" ]; then strategy="L-" elif [ $distance = "last" ]; then strategy="Last-" elif [ $distance = "hybrid" ]; then strategy="Hybrid-" elif [ $distance = "multi" ]; then strategy="Multi-" elif [ $distance = "lastmulti" ]; then strategy="LastMulti-" elif [ $distance = "localgenaf" ]; then strategy="E-" elif [ $distance = "globalgenaf" ]; then strategy="K-" elif [ $fft -eq 1 ]; then strategy="FFT-" else strategy="NW-" fi if [ $memsavetree -eq 1 ]; then strategy=$strategy"large-" fi # if [ `echo "$weighti>0.0" | bc` -gt 0 ]; then if [ `awk "BEGIN {print(0.0+\"$weighti\">0.0)}"` -gt 0 ]; then strategy=$strategy"I" fi strategy=$strategy"NS-" if [ $iterate -gt 0 ]; then strategy=$strategy"i" elif [ $distance = "parttree" ]; then if [ $partdist = "fasta" ]; then strategy=$strategy"FastaPartTree-"$cycle elif [ $partdist = "localalign" ]; then strategy=$strategy"DPPartTree-"$cycle else strategy=$strategy"PartTree-"$cycle fi elif [ $fragment -eq 1 ]; then strategy=$strategy"fragment" elif [ $fragment -eq -1 ]; then strategy=$strategy"full" elif [ $fragment -eq -2 ]; then strategy=$strategy"long" elif [ $fragment -eq -3 ]; then strategy=$strategy"top" else strategy=$strategy$cycle fi explanation='?' performance='Not tested.' if [ $strategy = "F-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information' performance='Most accurate, but very slow' elif [ $strategy = "L-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information' performance='Probably most accurate, very slow' elif [ $strategy = "E-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment with generalized affine gap costs (Altschul 1998)' performance='Suitable for sequences with long unalignable regions, very slow' elif [ $strategy = "G-INS-i" ]; then explanation='Iterative refinement method (<'$iterate') with GLOBAL pairwise alignment information' performance='Suitable for sequences of similar lengths, very slow' elif [ $strategy = "X-INS-i" ]; then explanation='RNA secondary structure information is taken into account.' performance='For short RNA sequences only, extremely slow' elif [ $strategy = "F-INS-1" ]; then explanation='Progressive method incorporating LOCAL pairwise alignment information' elif [ $strategy = "L-INS-1" ]; then explanation='Progressive method incorporating LOCAL pairwise alignment information' elif [ $strategy = "G-INS-1" ]; then explanation='Progressive method incorporating GLOBAL pairwise alignment information' elif [ $strategy = "FFT-NS-i" -o $strategy = "NW-NS-i" ]; then explanation='Iterative refinement method (max. '$iterate' iterations)' if [ $iterate -gt 2 ]; then performance='Accurate but slow' else performance='Standard' fi elif [ $strategy = "FFT-NS-2" -o $strategy = "NW-NS-2" ]; then explanation='Progressive method (guide trees were built '$cycle' times.)' performance='Fast but rough' elif [ $strategy = "FFT-NS-1" -o $strategy = "NW-NS-1" ]; then explanation='Progressive method (rough guide tree was used.)' performance='Very fast but very rough' fi if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then outputopt=" -c $strategy -r $TMPFILE/order $f2clext " elif [ $outputformat = "clustal" -a $outorder = "input" ]; then outputopt=" -c $strategy $f2clext " elif [ $outputformat = "phylip" -a $outorder = "aligned" ]; then outputopt=" -y -r $TMPFILE/order " elif [ $outputformat = "phylip" -a $outorder = "input" ]; then outputopt=" -y " elif [ $outputformat = "pir" -a $outorder = "aligned" ]; then outputopt=" -f -r $TMPFILE/order " else outputopt="-f" fi if [ $newdash_originalsequenceonly -eq 1 ]; then outputopt="$outputopt -d " fi # kokomade pushd "$TMPFILE" > /dev/null cat /dev/null > pre # echo "nseq = " $nseq 1>>"$progressfile" # echo "distance = " $distance 1>>"$progressfile" # echo "iterate = " $iterate 1>>"$progressfile" # echo "cycle = " $cycle 1>>"$progressfile" if [ $anysymbol -eq 1 ]; then mv infile orig "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1 fi if [ $mergetable != "/dev/null" ]; then if [ $nadd -gt "0" ]; then echo "Impossible" 1>&2 exit 1 fi # if [ $seed != "x" -o $seedtable != "x" ]; then # echo "This version does not support the combination of merge and seed." 1>&2 # exit 1 # fi # iterate=0 # 2013/04/16 mergearg="-H $seedoffset" fi if [ $adjustdirection -gt 0 ]; then if [ $fragment -ne 0 ]; then fragarg="-F" # else fragarg="-F" # 2014/02/06, do not consider other additional sequences, even in the case of --add fi if [ $adjustdirection -eq 1 ]; then "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 5000 -o a > _direction 2>>"$progressfile" elif [ $adjustdirection -eq 2 ]; then "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 100 -o a -d > _direction 2>>"$progressfile" fi "$prefix/setdirection" $mergearg -d _direction -i infile > infiled 2>>"$progressfile" || exit mv infiled infile if [ $anysymbol -eq 1 ]; then "$prefix/setdirection" $mergearg -d _direction -i orig -r > origd 2>>"$progressfile" || exit mv origd orig fi fi if [ $seed != "x" -o $seedtable != "x" ]; then if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then echo "The combination of --seed and (--pdbidlist or --pdbfilelist) is impossible." 1>>"$progressfile" exit 1 fi # if [ $enrich -eq 1 ]; then # echo "The combination of --seed and (--enrich, --enrichseq or --enrichstr) is impossible at present." 1>>"$progressfile" # exit 1 # fi if [ $newdash -eq 1 ]; then echo "The combination of --seed and --dash is impossible at present." 1>>"$progressfile" exit 1 fi fi # if [ $enrich -eq 1 ]; then # if [ $ownlist != "/dev/null" ]; then # echo "Warning: Sequence homologs of the structures given with the --pdbfilelist option cannot be collected.\n" 1>>"$progressfile" # fi # echo "SEEKQUENCER (http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/) is" 1>>"$progressfile" # if [ $pdblist != "/dev/null" ]; then # echo "collecting homoplogs of the input sequences and the structures given with the --pdbidlist option." 1>>"$progressfile" # perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -idf pdblist -out seekout -mod mafftash-split 2>>"seekerr" # seekres="$?" # else # echo "collecting homologs of the input sequences." 1>>"$progressfile" # perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -out seekout -mod mafftash-split 2>>"seekerr" # seekres="$?" # fi # cat seekerr 1>>"$progressfile" # # if [ $seekres -ne "0" ]; then # echo "Error in SEEKQUENCER" 1>>"$progressfile" # exit 1; # fi # echo "Done." 1>>"$progressfile" # # if [ $enrichseq -eq 1 ]; then ## cat seekout.seq >> infile # if [ $anysymbol -eq 1 ]; then # "$prefix/replaceu" $seqtype -i seekout.seq -o $nseq >> infile # cat seekout.seq >> orig # else # "$prefix/replaceu" $seqtype -i seekout.seq | sed 's/_os_[0-9]*_oe_//' >> infile # fi # # fi # if [ $enrichstr -eq 1 ]; then # nseekstr=`wc -l < seekout.str` # if [ $nseekstr -gt 1 ]; then # cat seekout.str >> pdblist # pdblist="tsukaimasu" # fi # fi # fi if [ $seed != "x" ]; then mv infile infile2 if [ $anysymbol -eq 1 ]; then mv orig orig2 cat /dev/null > orig fi cat /dev/null > infile cat /dev/null > hat3.seed seedoffset=0 # echo "seednseq="$seednseq # echo "seedoffset="$seedoffset set $seednseq >> "$progressfile" # echo $# while [ $# -gt 1 ] do shift # echo "num="$# if [ $anysymbol -eq 1 ]; then cat seed$# >> orig "$prefix/replaceu" $seqtype -i seed$# -o $seedoffset > clean 2>>"$progressfile" || exit 1 mv clean seed$# fi "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>>"$progressfile" || exit 1 cat hat3 >> hat3.seed # echo "$1" seedoffset=`expr $seedoffset + $1` # echo "$1" # echo "seedoffset="$seedoffset done; # echo "seedoffset="$seedoffset if [ $anysymbol -eq 1 ]; then "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi cat orig2 >> orig else cat infile2 >> infile fi elif [ $seedtable != "x" ]; then cat _seedtablefile > hat3.seed elif [ $newdash -eq 1 ]; then seemstobe=`"$prefix/countlen" -i infile | awk '{print $6}'` if [ $seemstobe = "d" -a "x$seqtype" != "x-P" ]; then echo "" 1>>"$progressfile" echo "Error: This data seems to be nucleotide sequences." 1>>"$progressfile" echo "Add the --amino flag if this is surely protein." 1>>"$progressfile" echo "" 1>>"$progressfile" exit 1; fi if [ $anysymbol -eq 1 ]; then mv orig infile # replaceu wo mukouka fi # sed 's/-//g' infile > dashin # gap nozoku awk '{if(/^>/)print; else {gsub( /-/,"" ); print;}}' infile > dashin if [ ! -x "$prefix/dash_client" -o ! -x "$prefix/dash_client" ]; then echo "" 1>&2 echo "== Install DASH client =====================================================" 1>&2 echo "To use this feature, uncomment the following line in Makefile" 1>&2 echo "DASH_CLIENT = dash_client" 1>&2 echo "and re-compile the source." 1>&2 echo "Note that it requires the 'Go' compiler." 1>&2 echo "============================================================================" 1>&2 echo "" 1>&2 exit 1 fi echo "Calling DASH (https://sysimm.org/dash/)" 1>>"$progressfile" "$prefix/dash_client" -url "$dashserver" -i dashin -sequences dashsequences -hat3 hat3.seed 1>>"$progressfile" dashres="$?" if [ $dashres -ne "0" ]; then echo "Error in DASH" 1>>"$progressfile" echo "To enable this feature, compile with DASH_CLIENT=dash_client. Go compiler is necessary." 1>>"$progressfile" exit 1; fi if [ $exclude_ho -eq 1 ]; then # amari yokunai awk 'BEGIN{out=1} !/^>_addedbymaffte_/{if(out) print; out=1} /^>_addedbymaffte_/{out=0}' dashsequences | sed 's/>DASH_/>DASH|/' > ho_excluded mv ho_excluded dashsequences fi if [ "$mergetable" != "/dev/null" ]; then # 2020/Apr/30 ndash=`grep -c '>DASH_' dashsequences | head -1` # echo "ndash = " $ndash awk "{for( i=1;i<=NF;i++){if(0+\$i==0)break; printf( \"%d \", $ndash+\$i); } print \"\" }" _subalignmentstable > _subalignmentstableshifted mv _subalignmentstableshifted _subalignmentstable cp dashsequences dashsequences.bk awk "BEGIN{nout=0} {if(\$1~/^>/) nout++; if( nout <= $ndash ) print;}" dashsequences > infile2 cat infile >> infile2 cp infile2 dashsequences fi sed 's/>DASH_/>DASH|/' dashsequences > renamed mv renamed dashsequences echo "Done." 1>>"$progressfile" # cat hat3.seed seedoffset=`grep -c '^[>|=]' dashsequences | head -1 ` echo "# of structures = " 1>>"$progressfile" echo $seedoffset 1>>"$progressfile" if [ $anysymbol -eq 1 ]; then cat dashsequences >> orig "$prefix/replaceu" $seqtype -i dashsequences -o 0 > clean 2>>"$progressfile" || exit 1 mv clean infile # "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi # cat orig2 >> orig else cat dashsequences > infile # cat infile2 >> infile fi else cat /dev/null > hat3.seed fi # cat hat3.seed if [ $mccaskill -eq 1 ]; then "$prefix/mccaskillwrap" -s -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 elif [ $dafs -eq 1 ]; then "$prefix/mccaskillwrap" -G -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 elif [ $contrafold -eq 1 ]; then "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1 fi if [ $distance = "fasta" ]; then "$prefix/dndfast7" $swopt < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "blast" ]; then "$prefix/dndblast" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "foldalignlocal" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -Q $spfactor -h $laof -H -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "foldalignglobal" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "slara" ]; then "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -T -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "lara" ]; then "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -B -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "scarna" ]; then # "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 # cat hat3.seed hat3 > hatx # mv hatx hat3 # "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 "$prefix/tbfast" _ -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "dafs" ]; then "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -G -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "global" -a $memsavetree -eq 1 ]; then if [ "$mpiscript" != "/dev/null" ]; then sh $mpiscript "$prefix/nodepair_mpi" $lhlimit -u $unalignlevel $localparam $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/nodepair" $lhlimit -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 fi echo 'nodepair' > _guidetree "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt -U $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "local" -a $memsavetree -eq 1 ]; then if [ "$mpiscript" != "/dev/null" ]; then sh $mpiscript "$prefix/nodepair_mpi" $lhlimit -u $unalignlevel $localparam $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/nodepair" $lhlimit -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 fi echo 'nodepair' > _guidetree "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt -U $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "localgenaf" -a $memsavetree -eq 1 ]; then "$prefix/nodepair" $lhlimit -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg $treeinopt $treeoutopt -i infile > /dev/null 2>>"$progressfile" || exit 1 echo 'nodepair' > _guidetree "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt -U $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "global" -a $memsavetree -eq 0 ]; then "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "local" -a $memsavetree -eq 0 ]; then if [ $fragment -ne 0 ]; then "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 fi elif [ $distance = "globalgenaf" ]; then "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -O $GGOP -E $GEXP -K $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "localgenaf" -a $memsavetree -eq 0 ]; then "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "last" ]; then if [ $fragment -ne 0 ]; then "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 # addarg wo watasanai cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 fi elif [ $distance = "lastmulti" ]; then "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 mv hat2 hat2i "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -r $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hat2 hat2n mv hatx hat3 if [ $fragment -ne 0 ]; then "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else echo "Impossible" 1>&2 exit 1 fi elif [ $distance = "multi" ]; then "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 mv hat2 hat2i "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hat2 hat2n mv hatx hat3 if [ $fragment -ne 0 ]; then "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else echo "Impossible" 1>&2 exit 1 fi elif [ $distance = "hybrid" ]; then "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1 cat hat3.seed hat3 > hatx mv hatx hat3 "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 if [ $fragment -ne 0 ]; then "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 fi # elif [ $distance = "distonly" ]; then # "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -t < infile > /dev/null 2>>"$progressfile" || exit 1 # "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 elif [ $distance = "parttree" ]; then "$prefix/splittbfast" $legacygapopt $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1 mv hat3.seed hat3 elif [ $distance = "ktuplesmulti" ]; then # "$prefix/dndpre" $model -M 1 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1 # mv hat2 hat2i # "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1 # mv hat2 hat2n if [ $fragment -ne 0 ]; then "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 # "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else echo "Impossible" 1>&2 exit 1 fi else if [ $fragment -ne 0 ]; then "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1 else "$prefix/disttbfast" -q $npickup -E $cycledisttbfast -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -g $gexp -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg $anchoropt -x $maxanchorseparation $oneiterationopt < infile > pre 2>>"$progressfile" || exit 1 mv hat3.seed hat3 fi fi while [ $cycletbfast -gt 1 ] do if [ $distance = "parttree" ]; then mv pre infile "$prefix/splittbfast" $legacygapopt -Z $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1 else "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum -C $numthreadstb $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1 # fragment>0 no baai, nanimoshinai # seed youchuui!! fi cycletbfast=`expr $cycletbfast - 1` done if [ $iterate -gt 0 ]; then if [ $distance = "ktuples" ]; then "$prefix/dndpre" $seqtype $model -M 2 -C $numthreads < pre > /dev/null 2>>"$progressfile" || exit 1 fi "$prefix/dvtditr" -W $minimumweight $bunkatsuopt -E $fixthreshold -s $unalignlevel $legacygapopt $mergearg $outnum -C $numthreadsit -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -Q $spfactor -h $aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg -K $nadd < pre > /dev/null 2>>"$progressfile" || exit 1 fi if [ $coreout -eq 1 ]; then "$prefix/setcore" -w $corewin -i $corethr $coreext < pre > pre2 mv pre2 pre elif [ $anysymbol -eq 1 ]; then "$prefix/restoreu" $add2ndhalfarg -a pre -i orig > restored || exit 1 mv restored pre fi echo '' 1>>"$progressfile" if [ $mccaskill -eq 1 ]; then echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>>"$progressfile" echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>>"$progressfile" echo "incorporated in the iterative alignment process (4)." 1>>"$progressfile" echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>>"$progressfile" echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>>"$progressfile" echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>>"$progressfile" echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile" echo "" 1>>"$progressfile" elif [ $contrafold -eq 1 ]; then echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>>"$progressfile" echo "and then incorporated in the iterative alignment process (4)." 1>>"$progressfile" echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>>"$progressfile" echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile" echo "" 1>>"$progressfile" fi if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then echo "Input structures are decomposed into structural domains using" 1>>"$progressfile" echo "Protein Domain Parser (Alexandrov & Shindyalov 2003)." 1>>"$progressfile" echo "Domain pairs are aligned using the rash function in" 1>>"$progressfile" echo "the ASH structural alignment package (Standley et al. 2007)." 1>>"$progressfile" fi if [ $pdblist != "/dev/null" ]; then echo "Pre-computed alignments stored in " 1>>"$progressfile" echo "DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/) are used. " 1>>"$progressfile" fi if [ $distance = "fasta" -o $partdist = "fasta" ]; then echo "Pairwise alignments were computed by FASTA" 1>>"$progressfile" echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>>"$progressfile" fi if [ $distance = "blast" ]; then echo "Pairwise alignments were computed by BLAST" 1>>"$progressfile" echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>>"$progressfile" fi if [ $distance = "last" -o $distance = "lastmulti" ]; then echo "Pairwise alignments were computed by LAST" 1>>"$progressfile" echo "http://last.cbrc.jp/" 1>>"$progressfile" echo "Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487" 1>>"$progressfile" fi if [ $distance = "scarna" ]; then echo "Pairwise alignments were computed by MXSCARNA" 1>>"$progressfile" echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>>"$progressfile" fi if [ $distance = "dafs" ]; then echo "Pairwise alignments were computed by DAFS" 1>>"$progressfile" echo "(Sato et al., 2012,,,,)." 1>>"$progressfile" fi if [ $distance = "lara" -o $distance = "slara" ]; then echo "Pairwise alignments were computed by LaRA" 1>>"$progressfile" echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>>"$progressfile" fi if [ $distance = "foldalignlocal" ]; then echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>>"$progressfile" echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile" fi if [ $distance = "foldalignglobal" ]; then echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>>"$progressfile" echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile" fi # printf "\n" 1>>"$progressfile" echo 'Strategy:' 1>>"$progressfile" printf ' '$strategy 1>>"$progressfile" echo ' ('$performance')' 1>>"$progressfile" echo ' '$explanation 1>>"$progressfile" echo '' 1>>"$progressfile" echo "If unsure which option to use, try 'mafft --auto input > output'." 1>>"$progressfile" echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>>"$progressfile" echo "" 1>>"$progressfile" echo "The default gap scoring scheme has been changed in version 7.110 (2013 Oct)." 1>>"$progressfile" echo "It tends to insert more gaps into gap-rich regions than previous versions." 1>>"$progressfile" echo "To disable this change, add the --leavegappyregion option." 1>>"$progressfile" # echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>>"$progressfile" # echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>>"$progressfile" if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then echo "" 1>>"$progressfile" if [ $oldgenafparam -eq 1 ]; then echo "Obsolete parameters used for this calculation." 1>>"$progressfile" echo "Also try the new parameters for E-INS-i, by not specifying --oldgenafpair." 1>>"$progressfile" else echo "Parameters for the E-INS-i option have been changed in version 7.243 (2015 Jun)." 1>>"$progressfile" echo "To switch to the old parameters, use --oldgenafpair, instead of --genafpair." 1>>"$progressfile" fi fi echo '' 1>>"$progressfile" if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then # cat dasherr >>"$progressfile" echo '' >>"$progressfile" fi popd > /dev/null if [ "$outputopt" != "-f" -o "$windows" = "yes" ]; then # Windows deha kaigyo code wo f2cl de modosu. # ln -s "$TMPFILE/order" _order$$ # f2cl ga space ari filename ni taiou shiteinainode # cp "$TMPFILE/order" _order$$ # ln -s no error wo sakeru if [ "$outputfile" = "" ]; then "$prefix/f2cl" -n $namelength $outputopt < "$TMPFILE/pre" 2>"/dev/null" || exit 1 else "$prefix/f2cl" -n $namelength $outputopt < "$TMPFILE/pre" > "$outputfile" 2>"/dev/null" || exit 1 fi # rm _order$$ else if [ "$outputfile" = "" ]; then cat < "$TMPFILE/pre" || exit 1 else cat < "$TMPFILE/pre" > "$outputfile" || exit 1 fi fi if [ $treeout -eq 1 ]; then cp "$TMPFILE/infile.tree" "$infilename.tree" fi if [ -s "$TMPFILE/GuideTree" ]; then # --merge no toki dake cp "$TMPFILE/GuideTree" . fi if [ $distout -eq 1 ]; then cp "$TMPFILE/hat2" "$infilename.hat2" fi if [ $npickup -ne 0 ]; then cp "$TMPFILE/notused" "$infilename.notused" fi if [ -s "$TMPFILE/_deletemap" ]; then if [ "$mapoutfile" = "/dev/null" ]; then cp "$TMPFILE/_deletemap" "$addfile.map" else cp "$TMPFILE/_deletemap" "$mapoutfile" fi fi exit 0; fi prog="awk" #tmpawk=`which nawk 2>/dev/null | awk '{print $1}'` #if [ -x "$tmpawk" ]; then # prog="$tmpawk" #fi # #tmpawk=`which gawk 2>/dev/null | awk '{print $1}'` #if [ -x "$tmpawk" ]; then # prog="$tmpawk" #fi # 2017/May/12, Windows no gawk wo sakeru tame #echo "prog="$prog 1>&2 umask 077 ( $prog ' BEGIN { prefix = ENVIRON["prefix"]; version = ENVIRON["version"]; myself = ENVIRON["myself"]; pwd = ENVIRON["mafft_working_dir"]; # from mafft.bat on windows if( pwd == "" ) pwd = ENVIRON["PWD"]; while( 1 ) { options = "" printf( "\n" ) > "/dev/tty"; printf( "---------------------------------------------------------------------\n" ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; printf( " MAFFT %s\n", version ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; # printf( " Copyright (c) 2002- Kazutaka Katoh\n" ) > "/dev/tty"; printf( " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)\n" ) > "/dev/tty"; printf( " https://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty"; printf( "---------------------------------------------------------------------\n" ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; while( 1 ) { printf( "\n" ) > "/dev/tty"; printf( "Input file? (FASTA format; Folder=%s)\n@ ", pwd ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ) if( res == 0 || NF == 0 ) continue; infile = sprintf( "%s", $0 ); res = getline < infile; close( infile ); if( res == -1 ) { printf( "%s: No such file.\n\n", infile ) > "/dev/tty"; printf( "Filename extension (eg., .txt) must be typed, if any.\n\n" ) > "/dev/tty"; } else if( res == 0 ) printf( "%s: Empty.\n", infile ) > "/dev/tty"; else { printf( "OK. infile = %s\n\n", infile ) > "/dev/tty"; break; } } nseq = 0; while( 1 ) { printf( "\n" ) > "/dev/tty"; printf( "Output file?\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 || NF == 0 ) continue; else { outfile = sprintf( "%s", $0 ); printf( "OK. outfile = %s\n\n", outfile ) > "/dev/tty"; break; } } while( 1 ) { outargs = ""; printf( "\n" ) > "/dev/tty"; printf( "Output format?\n" ) > "/dev/tty"; printf( " 1. Clustal format / Sorted\n" ) > "/dev/tty"; printf( " 2. Clustal format / Input order\n" ) > "/dev/tty"; printf( " 3. Fasta format / Sorted\n" ) > "/dev/tty"; printf( " 4. Fasta format / Input order\n" ) > "/dev/tty"; printf( " 5. Phylip format / Sorted\n" ) > "/dev/tty"; printf( " 6. Phylip format / Input order\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); # printf( "res=%d, NF=%d\n", res, NF ); resnum = 0 + $1; # printf( "resnum=%d\n", resnum ); if( resnum < 1 || 6 < resnum ) continue; else { if( resnum == 1 ) outargs = "--clustalout --reorder"; else if( resnum == 2 ) outargs = "--clustalout --inputorder"; else if( resnum == 3 ) outargs = "--reorder"; else if( resnum == 4 ) outargs = "--inputorder"; else if( resnum == 5 ) outargs = "--phylipout --reorder"; else if( resnum == 6 ) outargs = "--phylipout --inputorder"; else continue; printf( "OK. arguments = %s\n\n", outargs ) > "/dev/tty"; break; } } while( 1 ) { arguments = ""; printf( "\n" ) > "/dev/tty"; printf( "Strategy?\n" ) > "/dev/tty"; printf( " 1. --auto\n" ) > "/dev/tty"; printf( " 2. FFT-NS-1 (fast)\n" ) > "/dev/tty"; printf( " 3. FFT-NS-2 (default)\n" ) > "/dev/tty"; printf( " 4. G-INS-i (accurate)\n" ) > "/dev/tty"; printf( " 5. L-INS-i (accurate)\n" ) > "/dev/tty"; printf( " 6. E-INS-i (accurate)\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); # printf( "res=%d, NF=%d\n", res, NF ); resnum = 0 + $1; # printf( "resnum=%d\n", resnum ); if( resnum < 1 || 6 < resnum ) continue; else { if( resnum == 1 ) arguments = "--auto"; else if( resnum == 2 ) arguments = "--retree 1"; else if( resnum == 3 ) arguments = "--retree 2"; else if( resnum == 4 ) arguments = "--globalpair --maxiterate 16"; else if( resnum == 5 ) arguments = "--localpair --maxiterate 16"; else if( resnum == 6 ) arguments = "--genafpair --maxiterate 16"; else arguments = sprintf( "%s", $0 ); printf( "OK. arguments = %s %s\n\n", arguments, outargs ) > "/dev/tty"; break; } } while( 1 ) { printf( "\n" ) > "/dev/tty"; printf( "Additional arguments? (--ep # --op # --kappa # etc)\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 || NF == 0 ) { break; } else { addargs = sprintf( "%s", $0 ); printf( "OK. arguments = %s %s %s\n\n", addargs, arguments, outargs ) > "/dev/tty"; break; } } arguments = sprintf( "%s %s %s", addargs, arguments, outargs ); print "" command = sprintf( "\"%s\" %s \"%s\" > \"%s\"", myself, arguments, infile, outfile ); gsub( /\\/, "/", command ); printf( "command=\n%s\n", command ) > "/dev/tty"; while( 1 ) { go = 0; printf( "Type Y or just enter to run this command.\n" ) > "/dev/tty"; printf( "@ " ) > "/dev/tty"; res = getline < "/dev/tty"; close( "/dev/tty" ); if( res == 0 ) continue; else if( NF == 0 || $0 ~ /^[Yy]/ ) { go=1; break; } else break; } if( go ) break; printf( "\n" ) > "/dev/tty"; printf( "\n" ) > "/dev/tty"; } system( command ); command = sprintf( "more \"%s\"", outfile ); system( command ); printf( "Press Enter to exit." ) > "/dev/tty"; res = getline < "/dev/tty"; } ' ) exit 0; mafft-7.505-without-extensions/core/Makefile0000644000175000017500000005152414224501721020450 0ustar nileshnileshPREFIX = /usr/local LIBDIR = $(PREFIX)/libexec/mafft BINDIR = $(PREFIX)/bin MANDIR = $(PREFIX)/share/man/man1 DESTDIR = #MNO_CYGWIN = -mno-cygwin ENABLE_MULTITHREAD = -Denablemultithread # Comment out the above line if your compiler # does not support TLS (thread-local strage). #ENABLE_ATOMIC = -Denableatomic # Comment out the above line if your compiler # does not support "atomic_int". #DASH_CLIENT = dash_client # Uncomment the above line to use protein 3D # structural information. Go language is required. CC = gcc #CC = icc CFLAGS = -O3 #CFLAGS = -O3 -fPIC # add -fPIC when building .so files #CC = icc #CFLAGS = -fast # if you have icc, use this. #CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3 #CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3 -fprofile-arcs -ftest-coverage #CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark, valgrind #CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 -lprofiler # ? ifdef ENABLE_MULTITHREAD LIBS = -lm -lpthread else LIBS = -lm endif ifdef ENABLE_ATOMIC STDF = -std=c11 else STDF = -std=c99 endif MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(ENABLE_ATOMIC) $(STDF) $(CFLAGS) INSTALL = install STRIP = strip #STRIP = true # to disable strip PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \ multi2hat3s pairash addsingle maffttext2hex hex2maffttext \ splittbfast disttbfast tbfast nodepair mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \ seq2regtable regtable2seq score getlag dndpre setcore filter replaceu restoreu setdirection makedirectionlist version \ $(DASH_CLIENT) SOS = libdisttbfast.so DLLS = libdisttbfast.dll DYLIBS = libdisttbfast.dylib PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl SCRIPTS = mafft mafft-homologs.rb mafft-sparsecore.rb OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJFILTER = mtxutl.o io.o filter.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJSEQ2REGTABLE = mtxutl.o io.o seq2regtable.o defs.o OBJCOUNTLEN = mtxutl.o io.o countlen.o defs.o OBJF2CL = mtxutl.o io.o f2cl.o constants.o defs.o OBJMCCASKILLWRAP = mtxutl.o io.o mccaskillwrap.o constants.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \ multi2hat3s.o defs.o fft.o fftFunctions.o Galign11.o Lalign11.o genalign11.o OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o MSalignmm.o Galign11.o Lalign11.o genalign11.o MSalign11.o suboptalign11.o SAalignmm.o \ pairash.o defs.o fft.o fftFunctions.o OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o MSalignmm.o Galign11.o Lalign11.o genalign11.o MSalign11.o suboptalign11.o SAalignmm.o \ pairlocalalignmain.o pairlocalalign.o defs.o fft.o fftFunctions.o OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ disttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ splitfromaln.o defs.o fft.o fftFunctions.o OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ splittbfast.o defs.o fft.o fftFunctions.o OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ defs.o fft.o fftFunctions.o OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o OBJMAKEDIRECTIONLIST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ makedirectionlist.o defs.o fft.o fftFunctions.o addfunctions.o OBJTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o \ tbfast.o defs.o fft.o fftFunctions.o addfunctions.o \ pairlocalalign.o MSalign11.o OBJNODPAIR = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o \ nodepair.o defs.o fft.o fftFunctions.o addfunctions.o \ pairlocalalign.o MSalign11.o OBJADDSINGLE = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o \ addsingle.o defs.o fft.o fftFunctions.o addfunctions.o OBJSETCORE = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o \ setcore.o defs.o fft.o fftFunctions.o OBJTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o fftFunctions.o fft.o \ tditeration.o tditr.o defs.o SAalignmm.o treeOperation.o OBJDVTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o MSalignmm.o fftFunctions.o fft.o \ tditeration.o dvtditr.o defs.o SAalignmm.o treeOperation.o addfunctions.o OBJGETLAG = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o Lalign11.o genalign11.o SAalignmm.o MSalignmm.o Dalignmm.o \ getlag.o defs.o fft.o fftFunctions.o OBJGAPFILL = mtxutl.o io.o constants.o gapfill.o defs.o OBJDNDFAST5 = dndfast5.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDBLAST = dndblast.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o Galign11.o Lalign11.o genalign11.o OBJDNDFAST7 = dndfast7.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o Galign11.o Lalign11.o genalign11.o OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST4 = dndfast4.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJSEXTET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o sextet5.o defs.o Galign11.o Lalign11.o genalign11.o OBJDISTANCE = io.o constants.o mtxutl.o mltaln9.o tddis.o mafft-distance.o defs.o Galign11.o Lalign11.o genalign11.o OBJTRIPLET6 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet6.o defs.o OBJTRIPLET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet5.o defs.o OBJOCTET4 = io.o constants.o mtxutl.o mltaln9.o tddis.o octet4.o defs.o OBJDNDPRE = dndpre.o io.o constants.o mtxutl.o mltaln9.o defs.o Galign11.o Lalign11.o genalign11.o OBJGALN = io.o mtxutl.o mltaln9.o tddis.o constants.o partSalignmm.o MSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \ SAalignmm.o Galign11.o Lalign11.o genalign11.o Falign.o Falign_localhom.o fftFunctions.o fft.o mafft-profile.o defs.o OBJSCORE = io.o mtxutl.o mltaln9.o score.o constants.o defs.o Galign11.o Lalign11.o genalign11.o HEADER = mltaln.h mtxutl.h mafft.h FFTHEADER = fft.h MANPAGES = mafft.1 mafft-homologs.1 all : $(PERLPROGS) $(PROGS) $(SCRIPTS) cp $(SCRIPTS) ../scripts chmod 755 ../scripts/* cp $(PERLPROGS) $(PROGS) ../binaries chmod 755 ../binaries/* cp $(MANPAGES) ../binaries @echo done. sos : $(SOS) dylibs : $(DYLIBS) dlls : $(DLLS) $(DASH_CLIENT): dash_client.go # go build dash_client.go env CGO_ENABLED=0 go build --ldflags '-extldflags "-static"' dash_client.go # for conda univscript: univscript.tmpl Makefile sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript mafft: mafft.tmpl mltaln.h sed "s:_LIBDIR:$(LIBDIR):" mafft.tmpl > mafft mafft-homologs.rb: mafft-homologs.tmpl # cp mafft-homologs.tmpl mafft-homologs.rb sed "s:_BINDIR:$(BINDIR):" mafft-homologs.tmpl > mafft-homologs.rb mafft-sparsecore.rb: mafft-sparsecore.tmpl # cp mafft-sparsecore.tmpl mafft-sparsecore.rb sed "s:_BINDIR:$(BINDIR):" mafft-sparsecore.tmpl > mafft-sparsecore.rb mltaln.h : functions.h touch mltaln.h version : version.c mltaln.h $(CC) -o $@ version.c $(MYCFLAGS) $(LDFLAGS) $(LIBS) maffttext2hex : maffttext2hex.c $(CC) -o $@ maffttext2hex.c $(MYCFLAGS) $(LDFLAGS) $(LIBS) hex2maffttext : hex2maffttext.c $(CC) -o $@ hex2maffttext.c $(MYCFLAGS) $(LDFLAGS) $(LIBS) tbfast : $(OBJTBFAST) $(CC) -o $@ $(OBJTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) nodepair : $(OBJNODPAIR) $(CC) -o $@ $(OBJNODPAIR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) addsingle : $(OBJADDSINGLE) $(CC) -o $@ $(OBJADDSINGLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) disttbfast : $(OBJDISTTBFAST) $(CC) -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) libdisttbfast.so : $(OBJDISTTBFAST) $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) libdisttbfast.dylib : $(OBJDISTTBFAST) $(CC) -dynamiclib -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) libdisttbfast.dll : $(OBJDISTTBFAST) $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) makedirectionlist : $(OBJMAKEDIRECTIONLIST) $(CC) -o $@ $(OBJMAKEDIRECTIONLIST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splittbfast : $(OBJSPLITTBFAST) $(CC) -o $@ $(OBJSPLITTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splitfromaln : $(OBJSPLITFROMALN) $(CC) -o $@ $(OBJSPLITFROMALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dummy : $(OBJDUMMY) $(CC) -o $@ $(OBJDUMMY) $(MYCFLAGS) $(LDFLAGS) $(LIBS) setcore : $(OBJSETCORE) $(CC) -o $@ $(OBJSETCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) countlen : $(OBJCOUNTLEN) $(CC) -o $@ $(OBJCOUNTLEN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) seq2regtable : $(OBJSEQ2REGTABLE) $(CC) -o $@ $(OBJSEQ2REGTABLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) regtable2seq : $(OBJREGTABLE2SEQ) $(CC) -o $@ $(OBJREGTABLE2SEQ) $(MYCFLAGS) $(LDFLAGS) $(LIBS) setdirection : $(OBJSETDIRECTION) $(CC) -o $@ $(OBJSETDIRECTION) $(MYCFLAGS) $(LDFLAGS) $(LIBS) replaceu : $(OBJREPLACEU) $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) filter : $(OBJFILTER) $(CC) -o $@ $(OBJFILTER) $(MYCFLAGS) $(LDFLAGS) $(LIBS) restoreu : $(OBJRESTOREU) $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) f2cl : $(OBJF2CL) $(CC) -o $@ $(OBJF2CL) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mccaskillwrap : $(OBJMCCASKILLWRAP) $(CC) -o $@ $(OBJMCCASKILLWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS) contrafoldwrap : $(OBJCONTRAFOLDWRAP) $(CC) -o $@ $(OBJCONTRAFOLDWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pairlocalalign : $(OBJPAIRLOCALALIGN) $(CC) -o $@ $(OBJPAIRLOCALALIGN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pairash : $(OBJPAIRASH) $(CC) -o $@ $(OBJPAIRASH) $(MYCFLAGS) $(LDFLAGS) $(LIBS) multi2hat3s : $(OBJMULTI2HAT3S) $(CC) -o $@ $(OBJMULTI2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS) getlag : $(OBJGETLAG) $(CC) -o $@ $(OBJGETLAG) $(MYCFLAGS) $(LDFLAGS) $(LIBS) tditr : $(OBJTDITR) $(CC) -o $@ $(OBJTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dvtditr : $(OBJDVTDITR) $(CC) -o $@ $(OBJDVTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafft-profile : $(OBJGALN) $(CC) -o $@ $(OBJGALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) gapfill : $(OBJGAPFILL) $(CC) -o $@ $(OBJGAPFILL) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast4 : $(OBJDNDFAST4) $(CC) -o $@ $(OBJDNDFAST4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast5 : $(OBJDNDFAST5) $(CC) -o $@ $(OBJDNDFAST5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast6 : $(OBJDNDFAST6) $(CC) -o $@ $(OBJDNDFAST6) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast7 : $(OBJDNDFAST7) $(CC) -o $@ $(OBJDNDFAST7) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndblast : $(OBJDNDBLAST) $(CC) -o $@ $(OBJDNDBLAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast3 : $(OBJDNDFAST3) $(CC) -o $@ $(OBJDNDFAST3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet : $(OBJTRIPLET) $(CC) -o $@ $(OBJTRIPLET) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet3 : $(OBJTRIPLET3) $(CC) -o $@ $(OBJTRIPLET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet3 : $(OBJSEXTET3) $(CC) -o $@ $(OBJSEXTET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet4 : $(OBJSEXTET4) $(CC) -o $@ $(OBJSEXTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet5 : $(OBJSEXTET5) $(CC) -o $@ $(OBJSEXTET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafft-distance : $(OBJDISTANCE) $(CC) -o $@ $(OBJDISTANCE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet5 : $(OBJTRIPLET5) $(CC) -o $@ $(OBJTRIPLET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet6 : $(OBJTRIPLET6) $(CC) -o $@ $(OBJTRIPLET6) $(MYCFLAGS) $(LDFLAGS) $(LIBS) octet4 : $(OBJOCTET4) $(CC) -o $@ $(OBJOCTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndpre : $(OBJDNDPRE) $(CC) -o $@ $(OBJDNDPRE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) score : $(OBJSCORE) $(CC) -o $@ $(OBJSCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) genMtx : $(OBJGENMTX) $(CC) -o $@ $(OBJGENMTX) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafftash_premafft.pl : mafftash_premafft.tmpl cp mafftash_premafft.tmpl mafftash_premafft.pl seekquencer_premafft.pl : seekquencer_premafft.tmpl cp seekquencer_premafft.tmpl seekquencer_premafft.pl gapfill.o : gapfill.c $(HEADER) $(CC) $(MYCFLAGS) -c gapfill.c mltaln9.o : mltaln9.c $(HEADER) $(CC) $(MYCFLAGS) -c mltaln9.c tddis.o : tddis.c $(HEADER) $(CC) $(MYCFLAGS) -c tddis.c constants.o : constants.c miyata.h miyata5.h blosum.c DNA.h JTT.c $(HEADER) $(CC) $(MYCFLAGS) -c constants.c defs.o : defs.c $(CC) $(MYCFLAGS) -c defs.c #A+++alignmm.o : SA+++alignmm.c $(HEADER) # $(CC) $(MYCFLAGS) -c SA+++alignmm.c -o A+++alignmm.o Salignmm.o : Salignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Salignmm.c Dalignmm.o : Dalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Dalignmm.c MSalignmm.o : MSalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c MSalignmm.c partSalignmm.o : partSalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c partSalignmm.c Lalign11.o : Lalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c Lalign11.c genalign11.o : genalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c genalign11.c suboptalign11.o : suboptalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c suboptalign11.c Galign11.o : Galign11.c $(HEADER) $(CC) $(MYCFLAGS) -c Galign11.c MSalign11.o : MSalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c MSalign11.c SAalignmm.o : SAalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c SAalignmm.c -o SAalignmm.o Lalignmm.o : Lalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Lalignmm.c rna.o : rna.c $(HEADER) $(CC) $(MYCFLAGS) -c rna.c disttbfast.o : disttbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c disttbfast.c splitfromaln.o : splitfromaln.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splitfromaln.c splittbfast.o : splittbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splittbfast.c splittbfast2.o : splittbfast2.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splittbfast2.c makedirectionlist.o : makedirectionlist.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c makedirectionlist.c disttbfast_dummy.o : disttbfast_dummy.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c disttbfast_dummy.c dummy.o : dummy.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c dummy.c tbfast.o : tbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c tbfast.c nodepair.o : nodepair.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c nodepair.c addsingle.o : addsingle.c $(HEADER) $(FFTHEADER) addsingle.o : addsingle.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c addsingle.c tbfast2.o : tbfast2.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c tbfast2.c setcore.o : setcore.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c setcore.c getlag.o : getlag.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c getlag.c tditr.o : tditr.c $(HEADER) $(CC) $(MYCFLAGS) -c tditr.c dvtditr.o : dvtditr.c $(HEADER) $(CC) $(MYCFLAGS) -c dvtditr.c tditeration.o : tditeration.c $(HEADER) $(CC) $(MYCFLAGS) -c tditeration.c mafft-profile.o : mafft-profile.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c mafft-profile.c dndfast4.o : dndfast4.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast4.c dndfast5.o : dndfast5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast5.c dndfast6.o : dndfast6.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast6.c dndfast7.o : dndfast7.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast7.c dndblast.o : dndblast.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndblast.c dndfast3.o : dndfast3.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast3.c dndpre.o : dndpre.c $(HEADER) $(CC) $(MYCFLAGS) -c dndpre.c countlen.o : countlen.c $(HEADER) $(CC) $(MYCFLAGS) -c countlen.c seq2regtable.o : seq2regtable.c $(HEADER) $(CC) $(MYCFLAGS) -c seq2regtable.c regtable2seq.o : regtable2seq.c $(HEADER) $(CC) $(MYCFLAGS) -c regtable2seq.c f2cl.o : f2cl.c $(HEADER) $(CC) $(MYCFLAGS) -c f2cl.c setdirection.o : setdirection.c $(HEADER) $(CC) $(MYCFLAGS) -c setdirection.c replaceu.o : replaceu.c $(HEADER) $(CC) $(MYCFLAGS) -c replaceu.c restoreu.o : restoreu.c $(HEADER) $(CC) $(MYCFLAGS) -c restoreu.c mccaskillwrap.o : mccaskillwrap.c $(HEADER) $(CC) $(MYCFLAGS) -c mccaskillwrap.c contrafoldwrap.o : contrafoldwrap.c $(HEADER) $(CC) $(MYCFLAGS) -c contrafoldwrap.c pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairlocalalign.c pairlocalalignmain.o : pairlocalalignmain.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairlocalalignmain.c pairash.o : pairash.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairash.c multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c multi2hat3s.c io.o : io.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c io.c nj.o : nj.c $(HEADER) $(CC) $(MYCFLAGS) -c nj.c treeOperation.o : treeOperation.c $(HEADER) $(CC) $(MYCFLAGS) -c treeOperation.c sextet5.o : sextet5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c sextet5.c mafft-distance.o : mafft-distance.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c mafft-distance.c maffttext2hex.o : maffttext2hex.c $(CC) $(MYCFLAGS) -c maffttext2hex.c hex2maffttext.o : hex2maffttext.c $(CC) $(MYCFLAGS) -c hex2maffttext.c triplet6.o : triplet6.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c triplet6.c fft.o : fft.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c fft.c fftFunctions.o : fftFunctions.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c fftFunctions.c Falign.o : Falign.c $(HEADER) $(FFTHEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c Falign.c Falign_localhom.o : Falign_localhom.c $(HEADER) $(FFTHEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c Falign_localhom.c mtxutl.o : mtxutl.c $(CC) $(MYCFLAGS) -c mtxutl.c addfunctions.o : addfunctions.c $(HEADER) $(CC) $(MYCFLAGS) -c addfunctions.c score.o : score.c $(HEADER) $(CC) $(MYCFLAGS) -c score.c clean : rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS) *.gcda *.gcno $(DASH_CLIENT) # rm -f ../binaries/* ../scripts/* install : all mkdir -p $(DESTDIR)$(LIBDIR) chmod 755 $(DESTDIR)$(LIBDIR) mkdir -p $(DESTDIR)$(BINDIR) chmod 755 $(DESTDIR)$(BINDIR) chmod 755 $(SCRIPTS) $(INSTALL) $(SCRIPTS) $(DESTDIR)$(BINDIR) chmod 755 $(PROGS) ||: # in MinGW, it's ok if this fails # $(INSTALL) -s $(PROGS) $(DESTDIR)$(LIBDIR) $(STRIP) $(PROGS) ||: # may fail for dash_client on mac. $(INSTALL) $(PROGS) $(DESTDIR)$(LIBDIR) $(INSTALL) $(PERLPROGS) $(DESTDIR)$(LIBDIR) $(INSTALL) -m 644 $(MANPAGES) $(DESTDIR)$(LIBDIR) ( cd $(DESTDIR)$(BINDIR); \ rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \ rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi mafft-randomcore.rb ; \ ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \ ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \ ln -s mafft einsi; \ ln -s mafft mafft-linsi; ln -s mafft mafft-ginsi; ln -s mafft mafft-fftns; \ ln -s mafft mafft-fftnsi; ln -s mafft mafft-nwns; ln -s mafft mafft-nwnsi; \ ln -s mafft mafft-einsi; ln -s mafft mafft-xinsi; ln -s mafft mafft-qinsi;\ rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \ rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . ) mkdir -p $(DESTDIR)$(MANDIR) chmod 755 $(DESTDIR)$(MANDIR) $(INSTALL) -m 644 $(MANPAGES) $(DESTDIR)$(MANDIR) # remove incorrectly installed manpages by previous versions # rm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1 mafft-7.505-without-extensions/core/Makefile.sos0000644000175000017500000004473414224501721021260 0ustar nileshnileshPREFIX = /usr/local LIBDIR = $(PREFIX)/libexec/mafft BINDIR = $(PREFIX)/bin MANDIR = $(PREFIX)/share/man/man1 #MNO_CYGWIN = -mno-cygwin ENABLE_MULTITHREAD = -Denablemultithread # Comment out the above line if your compiler # does not support TLS (thread-local strage). CC = gcc #CFLAGS = -O3 #CFLAGS = -O3 -fPIC # add -fPIC when building .so files #CC = icc #CFLAGS = -fast # if you have icc, use this. #CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3 CFLAGS = -fPIC -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark, valgrind MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(CFLAGS) ifdef ENABLE_MULTITHREAD LIBS = -lm -lpthread else LIBS = -lm endif INSTALL = install PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \ pair2hat3s multi2hat3s pairash addsingle \ splittbfast disttbfast tbfast mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \ seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version SOS = libdisttbfast.so DLLS = libdisttbfast.dll DYLIBS = libdisttbfast.dylib PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl SCRIPTS = mafft mafft-homologs.rb OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o OBJSEQ2REGTABLE = mtxutl.o io.o seq2regtable.o defs.o OBJCOUNTLEN = mtxutl.o io.o countlen.o defs.o OBJF2CL = mtxutl.o io.o f2cl.o constants.o defs.o OBJMCCASKILLWRAP = mtxutl.o io.o mccaskillwrap.o constants.o defs.o mltaln9.o OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \ multi2hat3s.o defs.o fft.o fftFunctions.o OBJPAIR2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \ pair2hat3s.o defs.o fft.o fftFunctions.o OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \ pairash.o defs.o fft.o fftFunctions.o OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \ pairlocalalign.o defs.o fft.o fftFunctions.o OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ disttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o splitfromaln.o defs.o fft.o fftFunctions.o OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o splittbfast.o defs.o fft.o fftFunctions.o OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o splittbfast2.o defs.o fft.o fftFunctions.o OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o defs.o fft.o fftFunctions.o OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o OBJMAKEDIRECTIONLIST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ Lalign11.o makedirectionlist.o defs.o fft.o fftFunctions.o addfunctions.o OBJTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \ tbfast.o defs.o fft.o fftFunctions.o addfunctions.o OBJADDSINGLE = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \ addsingle.o defs.o fft.o fftFunctions.o addfunctions.o OBJTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o MSalignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \ tbfast2.o defs.o fft.o fftFunctions.o OBJSETCORE = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ setcore.o defs.o fft.o fftFunctions.o OBJTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o fftFunctions.o fft.o \ tditeration.o tditr.o defs.o SAalignmm.o treeOperation.o OBJDVTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o MSalignmm.o fftFunctions.o fft.o \ tditeration.o dvtditr.o defs.o SAalignmm.o treeOperation.o addfunctions.o OBJGETLAG = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \ getlag.o defs.o fft.o fftFunctions.o OBJGAPFILL = mtxutl.o io.o constants.o gapfill.o defs.o OBJDNDFAST5 = dndfast5.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDBLAST = dndblast.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST7 = dndfast7.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST4 = dndfast4.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o OBJSEXTET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o sextet5.o defs.o OBJDISTANCE = io.o constants.o mtxutl.o mltaln9.o tddis.o mafft-distance.o defs.o OBJTRIPLET6 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet6.o defs.o OBJTRIPLET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet5.o defs.o OBJOCTET4 = io.o constants.o mtxutl.o mltaln9.o tddis.o octet4.o defs.o OBJDNDPRE = dndpre.o io.o constants.o mtxutl.o mltaln9.o defs.o OBJGALN = io.o mtxutl.o mltaln9.o tddis.o constants.o partSalignmm.o MSalignmm.o Lalignmm.o rna.o Salignmm.o \ SAalignmm.o Galign11.o Falign.o Falign_localhom.o fftFunctions.o fft.o mafft-profile.o defs.o OBJSCORE = io.o mtxutl.o mltaln9.o score.o constants.o defs.o HEADER = mltaln.h mtxutl.h mafft.h FFTHEADER = fft.h MANPAGES = mafft.1 mafft-homologs.1 all : $(PERLPROGS) $(PROGS) $(SCRIPTS) cp $(SCRIPTS) ../scripts chmod 755 ../scripts/* cp $(PERLPROGS) $(PROGS) ../binaries chmod 755 ../binaries/* cp $(MANPAGES) ../binaries @echo done. sos : $(SOS) dylibs : $(DYLIBS) dlls : $(DLLS) univscript: univscript.tmpl Makefile sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript mafft: mafft.tmpl mltaln.h sed "s:_LIBDIR:$(LIBDIR):" mafft.tmpl > mafft mafft-homologs.rb: mafft-homologs.tmpl # cp mafft-homologs.tmpl mafft-homologs.rb sed "s:_BINDIR:$(BINDIR):" mafft-homologs.tmpl > mafft-homologs.rb mltaln.h : functions.h touch mltaln.h version : version.c mltaln.h $(CC) -o $@ version.c $(MYCFLAGS) $(LDFLAGS) tbfast : $(OBJTBFAST) $(CC) -o $@ $(OBJTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) addsingle : $(OBJADDSINGLE) $(CC) -o $@ $(OBJADDSINGLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) tbfast2 : $(OBJTBFAST2) $(CC) -o $@ $(OBJTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS) disttbfast : $(OBJDISTTBFAST) $(CC) -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) libdisttbfast.so : $(OBJDISTTBFAST) $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) libdisttbfast.dylib : $(OBJDISTTBFAST) $(CC) -dynamiclib -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) libdisttbfast.dll : $(OBJDISTTBFAST) $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) makedirectionlist : $(OBJMAKEDIRECTIONLIST) $(CC) -o $@ $(OBJMAKEDIRECTIONLIST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splittbfast : $(OBJSPLITTBFAST) $(CC) -o $@ $(OBJSPLITTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splitfromaln : $(OBJSPLITFROMALN) $(CC) -o $@ $(OBJSPLITFROMALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) splittbfast2 : $(OBJSPLITTBFAST2) $(CC) -o $@ $(OBJSPLITTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dummy : $(OBJDUMMY) $(CC) -o $@ $(OBJDUMMY) $(MYCFLAGS) $(LDFLAGS) $(LIBS) setcore : $(OBJSETCORE) $(CC) -o $@ $(OBJSETCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) countlen : $(OBJCOUNTLEN) $(CC) -o $@ $(OBJCOUNTLEN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) seq2regtable : $(OBJSEQ2REGTABLE) $(CC) -o $@ $(OBJSEQ2REGTABLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) regtable2seq : $(OBJREGTABLE2SEQ) $(CC) -o $@ $(OBJREGTABLE2SEQ) $(MYCFLAGS) $(LDFLAGS) $(LIBS) setdirection : $(OBJSETDIRECTION) $(CC) -o $@ $(OBJSETDIRECTION) $(MYCFLAGS) $(LDFLAGS) $(LIBS) replaceu : $(OBJREPLACEU) $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) restoreu : $(OBJRESTOREU) $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) f2cl : $(OBJF2CL) $(CC) -o $@ $(OBJF2CL) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mccaskillwrap : $(OBJMCCASKILLWRAP) $(CC) -o $@ $(OBJMCCASKILLWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS) contrafoldwrap : $(OBJCONTRAFOLDWRAP) $(CC) -o $@ $(OBJCONTRAFOLDWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pairlocalalign : $(OBJPAIRLOCALALIGN) $(CC) -o $@ $(OBJPAIRLOCALALIGN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pairash : $(OBJPAIRASH) $(CC) -o $@ $(OBJPAIRASH) $(MYCFLAGS) $(LDFLAGS) $(LIBS) pair2hat3s : $(OBJPAIR2HAT3S) $(CC) -o $@ $(OBJPAIR2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS) multi2hat3s : $(OBJMULTI2HAT3S) $(CC) -o $@ $(OBJMULTI2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS) getlag : $(OBJGETLAG) $(CC) -o $@ $(OBJGETLAG) $(MYCFLAGS) $(LDFLAGS) $(LIBS) tditr : $(OBJTDITR) $(CC) -o $@ $(OBJTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dvtditr : $(OBJDVTDITR) $(CC) -o $@ $(OBJDVTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafft-profile : $(OBJGALN) $(CC) -o $@ $(OBJGALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS) gapfill : $(OBJGAPFILL) $(CC) -o $@ $(OBJGAPFILL) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast4 : $(OBJDNDFAST4) $(CC) -o $@ $(OBJDNDFAST4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast5 : $(OBJDNDFAST5) $(CC) -o $@ $(OBJDNDFAST5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast6 : $(OBJDNDFAST6) $(CC) -o $@ $(OBJDNDFAST6) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast7 : $(OBJDNDFAST7) $(CC) -o $@ $(OBJDNDFAST7) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndblast : $(OBJDNDBLAST) $(CC) -o $@ $(OBJDNDBLAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndfast3 : $(OBJDNDFAST3) $(CC) -o $@ $(OBJDNDFAST3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet : $(OBJTRIPLET) $(CC) -o $@ $(OBJTRIPLET) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet3 : $(OBJTRIPLET3) $(CC) -o $@ $(OBJTRIPLET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet3 : $(OBJSEXTET3) $(CC) -o $@ $(OBJSEXTET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet4 : $(OBJSEXTET4) $(CC) -o $@ $(OBJSEXTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) sextet5 : $(OBJSEXTET5) $(CC) -o $@ $(OBJSEXTET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafft-distance : $(OBJDISTANCE) $(CC) -o $@ $(OBJDISTANCE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet5 : $(OBJTRIPLET5) $(CC) -o $@ $(OBJTRIPLET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS) triplet6 : $(OBJTRIPLET6) $(CC) -o $@ $(OBJTRIPLET6) $(MYCFLAGS) $(LDFLAGS) $(LIBS) octet4 : $(OBJOCTET4) $(CC) -o $@ $(OBJOCTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS) dndpre : $(OBJDNDPRE) $(CC) -o $@ $(OBJDNDPRE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) score : $(OBJSCORE) $(CC) -o $@ $(OBJSCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS) genMtx : $(OBJGENMTX) $(CC) -o $@ $(OBJGENMTX) $(MYCFLAGS) $(LDFLAGS) $(LIBS) mafftash_premafft.pl : mafftash_premafft.tmpl cp mafftash_premafft.tmpl mafftash_premafft.pl seekquencer_premafft.pl : seekquencer_premafft.tmpl cp seekquencer_premafft.tmpl seekquencer_premafft.pl gapfill.o : gapfill.c $(HEADER) $(CC) $(MYCFLAGS) -c gapfill.c mltaln9.o : mltaln9.c $(HEADER) $(CC) $(MYCFLAGS) -c mltaln9.c tddis.o : tddis.c $(HEADER) $(CC) $(MYCFLAGS) -c tddis.c constants.o : constants.c miyata.h miyata5.h blosum.c DNA.h JTT.c $(HEADER) $(CC) $(MYCFLAGS) -c constants.c defs.o : defs.c $(CC) $(MYCFLAGS) -c defs.c #A+++alignmm.o : SA+++alignmm.c $(HEADER) # $(CC) $(MYCFLAGS) -c SA+++alignmm.c -o A+++alignmm.o Salignmm.o : Salignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Salignmm.c MSalignmm.o : MSalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c MSalignmm.c partSalignmm.o : partSalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c partSalignmm.c Lalign11.o : Lalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c Lalign11.c genalign11.o : genalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c genalign11.c suboptalign11.o : suboptalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c suboptalign11.c Galign11.o : Galign11.c $(HEADER) $(CC) $(MYCFLAGS) -c Galign11.c MSalign11.o : MSalign11.c $(HEADER) $(CC) $(MYCFLAGS) -c MSalign11.c SAalignmm.o : SAalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c SAalignmm.c -o SAalignmm.o Lalignmm.o : Lalignmm.c $(HEADER) $(CC) $(MYCFLAGS) -c Lalignmm.c rna.o : rna.c $(HEADER) $(CC) $(MYCFLAGS) -c rna.c disttbfast.o : disttbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c disttbfast.c splitfromaln.o : splitfromaln.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splitfromaln.c splittbfast.o : splittbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splittbfast.c splittbfast2.o : splittbfast2.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c splittbfast2.c makedirectionlist.o : makedirectionlist.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c makedirectionlist.c disttbfast_dummy.o : disttbfast_dummy.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c disttbfast_dummy.c dummy.o : dummy.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c dummy.c tbfast.o : tbfast.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c tbfast.c addsingle.o : addsingle.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c addsingle.c tbfast2.o : tbfast2.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c tbfast2.c setcore.o : setcore.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c setcore.c getlag.o : getlag.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c getlag.c tditr.o : tditr.c $(HEADER) $(CC) $(MYCFLAGS) -c tditr.c dvtditr.o : dvtditr.c $(HEADER) $(CC) $(MYCFLAGS) -c dvtditr.c tditeration.o : tditeration.c $(HEADER) $(CC) $(MYCFLAGS) -c tditeration.c mafft-profile.o : mafft-profile.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c mafft-profile.c dndfast4.o : dndfast4.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast4.c dndfast5.o : dndfast5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast5.c dndfast6.o : dndfast6.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast6.c dndfast7.o : dndfast7.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast7.c dndblast.o : dndblast.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndblast.c dndfast3.o : dndfast3.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c dndfast3.c dndpre.o : dndpre.c $(HEADER) $(CC) $(MYCFLAGS) -c dndpre.c countlen.o : countlen.c $(HEADER) $(CC) $(MYCFLAGS) -c countlen.c seq2regtable.o : seq2regtable.c $(HEADER) $(CC) $(MYCFLAGS) -c seq2regtable.c regtable2seq.o : regtable2seq.c $(HEADER) $(CC) $(MYCFLAGS) -c regtable2seq.c f2cl.o : f2cl.c $(HEADER) $(CC) $(MYCFLAGS) -c f2cl.c setdirection.o : setdirection.c $(HEADER) $(CC) $(MYCFLAGS) -c setdirection.c replaceu.o : replaceu.c $(HEADER) $(CC) $(MYCFLAGS) -c replaceu.c restoreu.o : restoreu.c $(HEADER) $(CC) $(MYCFLAGS) -c restoreu.c mccaskillwrap.o : mccaskillwrap.c $(HEADER) $(CC) $(MYCFLAGS) -c mccaskillwrap.c contrafoldwrap.o : contrafoldwrap.c $(HEADER) $(CC) $(MYCFLAGS) -c contrafoldwrap.c pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairlocalalign.c pairash.o : pairash.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pairash.c multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c multi2hat3s.c pair2hat3s.o : pair2hat3s.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c pair2hat3s.c io.o : io.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c io.c nj.o : nj.c $(HEADER) $(CC) $(MYCFLAGS) -c nj.c treeOperation.o : treeOperation.c $(HEADER) $(CC) $(MYCFLAGS) -c treeOperation.c sextet5.o : sextet5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c sextet5.c mafft-distance.o : mafft-distance.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c mafft-distance.c triplet5.o : triplet5.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c triplet5.c triplet6.o : triplet6.c $(HEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c triplet6.c fft.o : fft.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c fft.c fftFunctions.o : fftFunctions.c $(HEADER) $(FFTHEADER) $(CC) $(MYCFLAGS) -c fftFunctions.c Falign.o : Falign.c $(HEADER) $(FFTHEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c Falign.c Falign_localhom.o : Falign_localhom.c $(HEADER) $(FFTHEADER) $(MTXHEADER) $(CC) $(MYCFLAGS) -c Falign_localhom.c mtxutl.o : mtxutl.c $(CC) $(MYCFLAGS) -c mtxutl.c addfunctions.o : addfunctions.c $(HEADER) $(CC) $(MYCFLAGS) -c addfunctions.c score.o : score.c $(HEADER) $(CC) $(MYCFLAGS) -c score.c clean : rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS) # rm -f ../binaries/* ../scripts/* install : all mkdir -p $(LIBDIR) chmod 755 $(LIBDIR) mkdir -p $(BINDIR) chmod 755 $(BINDIR) chmod 755 $(SCRIPTS) $(INSTALL) $(SCRIPTS) $(BINDIR) chmod 755 $(PROGS) $(INSTALL) -s $(PROGS) $(LIBDIR) $(INSTALL) $(PERLPROGS) $(LIBDIR) $(INSTALL) -m 644 $(MANPAGES) $(LIBDIR) ( cd $(BINDIR); \ rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \ rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi; \ ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \ ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \ ln -s mafft einsi; \ ln -s mafft mafft-linsi; ln -s mafft mafft-ginsi; ln -s mafft mafft-fftns; \ ln -s mafft mafft-fftnsi; ln -s mafft mafft-nwns; ln -s mafft mafft-nwnsi; \ ln -s mafft mafft-einsi; ln -s mafft mafft-xinsi; ln -s mafft mafft-qinsi;\ rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \ rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . ) mkdir -p $(MANDIR) chmod 755 $(MANDIR) $(INSTALL) -m 644 $(MANPAGES) $(MANDIR) # remove incorrectly installed manpages by previous versions # rm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1 mafft-7.505-without-extensions/core/mafft-profile.c0000644000175000017500000002764714224501721021720 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #if DEBUG #include #include #include double getrusage_sec() { struct rusage t; struct timeval tv; getrusage(RUSAGE_SELF, &t); tv = t.ru_utime; return tv.tv_sec + (double)tv.tv_usec*1e-6; } #endif int intcmp( int *str1, int *str2 ) { while( *str1 != -1 && *str2 != -1 ) if( *str1++ != *str2++ ) return( 1 ); if( *str1 != *str2 ) return( 1 ); return( 0 ); } char **arguments( int argc, char *argv[] ) { int c = 0; fmodel = 0; nblosum = 62; calledByXced = 0; devide = 0; fftscore = 1; use_fft = 1; nevermemsave = 0; alg = 'A'; weight = 0; utree = 1; tbutree = 0; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 0; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; scoremtx = 1; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = 0; // chokusetsu yobareru kara kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; TMorJTT = JTT; treemethod = 'x'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'P': dorp = 'p'; break; case 'D': dorp = 'd'; break; case 'F': use_fft = 1; break; case 'N': use_fft = 0; break; case 'n': nevermemsave = 1; break; case 'e': fftscore = 0; break; case 'Q': alg = 'Q'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'd': disp = 1; break; case 'O': outgap = 0; break; case 'a': fmodel = 1; break; case 'u': tbrweight = 0; break; case 'U': tbrweight = -1; break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 2 ) { fprintf( stderr, "options: Check source file ! %c ?\n", c ); exit( 1 ); } fprintf( stderr, "tbitr = %d, tbrweight = %d, tbweight = %d\n", tbitr, tbrweight, tbweight ); // readOtherOptions( &ppid, &fftThreshold, &fftWinSize ); return( argv ); } void GroupAlign( int nseq1, int nseq2, char **name, int *nlen, char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double **len, double *eff, int alloclen ) { int i; int clus1, clus2; int s1, s2; double pscore; static char **name1, **name2; double *effarr = eff; double *effarr1 = NULL; double *effarr2 = NULL; static char *indication1, *indication2; // double dumfl = 0.0; double dumdb = 0.0; int intdum; #if DEBUG double time1, time2; #endif // fprintf( stderr, "in GroupAlign fftWinSize = %d\n", fftWinSize ); // fprintf( stderr, "in GroupAlign fftThreshold = %d\n", fftThreshold ); if( effarr1 == NULL ) { name1 = AllocateCharMtx( nseq1, B ); name2 = AllocateCharMtx( nseq2, B ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); #if 0 #else #endif } for( i=0; i 0 ) pscore = AllocateDoubleMtx( njob, njob ); eff = AllocateDoubleVec( njob ); #if 0 njob=nseq2; FRead( gp2, name+nseq1, nlen+nseq1, seq2 ); njob=nseq1; FRead( gp1, name, nlen, seq1 ); #else njob=nseq2; readDataforgaln( gp2, name+nseq1, nlen+nseq1, seq2 ); njob=nseq1; readDataforgaln( gp1, name, nlen, seq1 ); #endif njob = nseq1 + nseq2; #if 0 // CHUUI commongappick( nseq1, seq1 ); commongappick( nseq2, seq2 ); #endif for( i=0; i 0 ) { for( i=0; i 0 ) { fprintf( stderr, "Constructing dendrogram ... " ); if( treemethod == 'x' ) veryfastsupg( njob, pscore, topol, len ); else ErrorExit( "Incorrect tree\n" ); fprintf( stderr, "done.\n" ); weight = 3; counteff_simple( njob, topol, len, eff ); // for( i=0; i 30000 || len2 > 30000 ) ) ) // if( len1 > 30000 || len2 > 30000 ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; } reporterr( "GroupAglin..\n" ); GroupAlign( nseq1, nseq2, name, nlen, seq, aseq, mseq1, mseq2, topol, len, eff, alloclen ); #if 0 writePre( njob, name, nlen, aseq, 1 ); #else writeDataforgaln( stdout, njob, name, nlen, aseq ); #endif SHOWVERSION; return( 0 ); } mafft-7.505-without-extensions/core/Dalignmm.c0000644000175000017500000055211514224501721020706 0ustar nileshnilesh#include "mltaln.h" #include "dp.h" #define DEBUG 0 #define WMCHECK 1 #define ALGZSTRAIGHT 0 #define ALGZGAP 0 #define USEGAPLENMTX 0 #define USEGAPLENHALF 0 #define FREEFREQUENTLY 1 #define IDATEND 0 #define MACHIGAI 0 #define OUTGAP0TRY 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 #define FASTMATCHCALC 1 #define SLOW 0 #define zero 0 #define one 1 #if USEGAPLENHALF #define USEGAPLENHALFORMTX 1 #endif #if USEGAPLENMTX #define USEGAPLENHALFORMTX 1 #endif #if WMCHECK static int PFACERROR = 0; #endif static TLS double **impmtx = NULL; static TLS int impalloclen = 0; double imp_match_out_scD( int i1, int j1 ) { // fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold ); // fprintf( stderr, "val = %f\n", impmtx[i1][j1] ); return( impmtx[i1][j1] ); } typedef struct _gaplenvec { int relend; #if USEGAPLENHALFORMTX int idatend; #endif int idatnext; int idatprev; int npat; int len; // sukoshi muda. double freq; } Gaplen; #if 0 static void imp_match_out_vead_gapmap( double *imp, int i1, int lgth2, int *gapmap2 ) { #if FASTMATCHCALC double *pt = impmtx[i1]; int *gapmappt = gapmap2; while( lgth2-- ) *imp++ += pt[*gapmappt++]; #else int j; double *pt = impmtx[i1]; for( j=0; j ", match[k], mid ); match[k] -= matrices[mid][c1][c2] * eff1[i] * eff2[j]; // fprintf( stderr, "match[k] = %f (mid=%d)\n", match[k], mid ); } } // fprintf( stderr, "done\n" ); return; } #if SLOW static void match_calc_slow( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, double **doublework, int **intwork, int initialize, int flip ) { // osoi! int i, j, k; int c1, c2; int mid; // fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 ); // fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] ); // fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] ); // for( i=0; i-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); // fprintf( stderr, "done\n" ); #else int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void match_calc( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { #if FASTMATCHCALC // fprintf( stderr, "\nmatch_calc... %d", i1 ); int j, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *matchpt, *cpmxpdpt, **cpmxpdptpt; int *cpmxpdnpt, **cpmxpdnptpt; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); if( initialize ) { int count = 0; for( j=0; j-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); // fprintf( stderr, "done\n" ); #else int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( double *impwmpt, double *lasthorizontalw, double *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int icyc, int jcyc, int *warpis, int *warpjs, int warpbase ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk; double wm; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i= warpbase ) { ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin; while( --l >= 0 ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; } break; } else { l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += (double)imp_match_out_scD( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i= warpbase ) { ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin; while( --l >= 0 ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; } break; } else { l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= first ) { if( *s-- != '-' ) return( v ); v++; } if( s == first-1 ) return( v ); return( -1 ); } static void fillgaplen( Gaplen **mtx, int l ) { int i, j, n, k, len, pos, idatnext; double freq; for( i=0; i<=l; i++ ) { // reporterr( "i=%d\n", i ); if( mtx[i] == NULL ) continue; for( n=0; (len=mtx[i][n].len)!=-1; n++ ) { freq = mtx[i][n].freq; idatnext = n; for( j=0; j 0 ) { if( known[gl] ) { ; } else { nknown++; } } } val += nknown; } free( known ); return( val ); } static void gaplencount( int n, int l, Gaplen **mtx, char **seq, double *eff ) { int i, j, k, gl, *known, nknown; known = calloc( l+1, sizeof( int ) ); // for( i=0; i 0 ) { if( known[gl] ) { // reporterr( "gl=%d, Known!\n", gl ); for( k=0; mtx[j][k].len!=-1; k++ ) if( mtx[j][k].len == gl ) break; if( mtx[j][k].len == -1 ) { reporterr( "Unexpected error!\n" ); exit( 1 ); } mtx[j][k].freq += eff[i]; } else { // reporterr( "gl=%d, First!\n", gl ); mtx[j] = realloc( mtx[j], sizeof( Gaplen ) * (nknown+2) ); mtx[j][nknown].len = gl; mtx[j][nknown].relend = 0; mtx[j][nknown].freq = eff[i]; mtx[j][nknown].idatnext = -2; mtx[j][nknown+1].len = -1; mtx[j][nknown+1].idatnext = -1; mtx[j][nknown+1].relend = -1; mtx[j][nknown+1].freq = 0.0; mtx[j][nknown+1].npat = -1; #if USEGAPLENHALFORMTX mtx[j][nknown].idatend = nknown; mtx[j][nknown+1].idatend = -1; #endif known[gl]++; nknown++; mtx[j][0].npat = nknown; } } } } fillgaplen( mtx, l ); #if 0 reporterr( "Gaplen:\n" ); for( i=0; i<=l; i++ ) { // reporterr( "i=%d, gaplen[i] = %p\n", i, mtx[i] ); if( mtx[i] ) { for( j=0; mtx[i][j].len!=-1; j++ ) reporterr( "i=%d, len = %d, relend = %d, freq = %f\n", i, mtx[i][j].len, mtx[i][j].relend, mtx[i][j].freq ); } } #endif free( known ); } #if DEBUG static void showgaplen( Gaplen **mtx, int seqlen ) { int i, l; #if USEGAPLENHALFORMTX int id, pos; #endif // for( i=0; i<=seqlen; i++ ) for( i=0; ; i++ ) { // reporterr( "chain[%d] = %d\n", i, chain[i] ); if( mtx[i] == NULL ) continue; if( mtx[i] == (Gaplen *)1 ) break; for( l=0; mtx[i][l].idatnext!=-1; l++ ) { #if USEGAPLENHALFORMTX reporterr( "i=%d, l=%d, len=%d, relend=%d, idatend=%d, idatnext=%d, idatprev=%d, freq=%f\n", i, l, mtx[i][l].len, mtx[i][l].relend, mtx[i][l].idatend, mtx[i][l].idatnext, mtx[i][l].idatprev, mtx[i][l].freq ); pos = mtx[i][l].relend; id = mtx[i][l].idatend; if( mtx[i+pos] == NULL ) { // reporterr( "Error in SOURCE\n" ); reporterr( ".len and .freq were lost when i=%d!\n", i ); // exit( 1 ); } #else reporterr( "i=%d, l=%d, len=%d, relend=%d, idatnext=%d, idatprev=%d, freq=%f\n", i, l, mtx[i][l].len, mtx[i][l].relend, mtx[i][l].idatnext, mtx[i][l].idatprev, mtx[i][l].freq ); #endif } } } #endif #if WMCHECK static int pairgapcount( char *s1, char *s2 ) { char **tmpseq; int i, len, st, k; int v = 0; len = strlen( s1 ); tmpseq = calloc( sizeof( char * ), 2 ); tmpseq[0] = malloc( sizeof( char ) * ( len + 1 ) ); tmpseq[1] = malloc( sizeof( char ) * ( len + 1 ) ); strcpy( tmpseq[0], s1 ); strcpy( tmpseq[1], s2 ); commongappick( 2, tmpseq ); len = strlen( tmpseq[0] ); for( k=0; k<2; k++ ) { st = 0; for( i=0; iidatnext!=-1; k++ ) { pos1 = g1->relend; if( pos1 != 0 ) { pfac2 = 0.0; if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) { pos2 = g2->relend; if( pos2 == 0 && g2->len >g1->len - (pos1) + newgaplen ) { pfac2 += g2->freq; // reporterr( "hit! pfac2=%f, .freq=%f\n", pfac2, gaplen2[j][l].freq ); } // else // reporterr( "does not hit! pfac2=%f, gaplen1[i][k].len=%d, gaplen[i][k].relend=%d, newgaplen=%d\n", pfac2, gaplen1[i][k].len, gaplen1[i][k].relend, newgaplen ); } pfac += pfac2 * g1->freq; pfac1 += g1->freq; } else if( pos1 == 0 ) { pfac2 = 1.0; if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) { pos2 = g2->relend; if( pos2 == 0 && g2->len == g1->len+newgaplen ) pfac2 -= g2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai. if( pos2 != 0 && g2->len - (pos2-1) > g1->len+newgaplen ) pfac2 -= g2->freq;// keizoku suru gap, gaplen1 ha kangaenai. } // reporterr( "pfac2 in line 1056 = %f\n", pfac2 ); pfac += pfac2 * g1->freq; pfac10 += g1->freq; } } #if DEBUG reporterr( "pfac1 (step2) = %f\n", pfac1 ); reporterr( "pfac10 (step2) = %f\n", pfac10 ); reporterr( "pfac (step2) = %f\n", pfac ); #endif pfac1 = 1.0 - pfac1 - pfac10; pfac2 = 1.0; if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) { pos2 = g2->relend; if( pos2 == 0 && g2->len == newgaplen ) pfac2 -= g2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai. if( pos2 != 0 && g2->len - (pos2-1) > newgaplen ) pfac2 -= g2->freq;// keizoku suru gap, gaplen1 ha kangaenai. } #if DEBUG reporterr( "pfac1 (type3) = %f\n", pfac1 ); reporterr( "pfac2 (type3) = %f\n", pfac2 ); reporterr( "pfac (step3) = %f\n", pfac ); #endif pfac += pfac1 * pfac2; #if DEBUG reporterr( "incomplete pfac = %f, pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", pfac, seq1[i], i, seq2[j], j, pfac1, pfac2 ); #endif return( pfac ); #else double pfac, pfac1, pfac10, pfac2; int k, l, pos1, pos2, id1, id2; Gaplen *gaplen1i, *gaplen2j; #if 0 // .len no shouryaku ni taiou shiteinai int gl; if( disp ) { reporterr( "calcpfac_gap_noidatend, %c (%d) - %c (%d)\n", seq1[i], i, seq2[j], j ); reporterr( "newgaplen = %d\n", newgaplen ); reporterr( "In calcpfac_gap, gaplen1[%d(%c)] = \n", i, seq1[i] ); for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatend)!=-1; k++ ) { pos1 = gaplen1[i][k].relend; reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq[0] ); } reporterr( "In calcpfac_gap, gaplen2[%d(%c)] = \n", j, seq2[j] ); showgaplen( gaplen2, strlen(seq2) ); for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatend)!=-1; k++ ) { pos2 = gaplen2[j][k].relend; reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq[0] ); } } #endif gaplen2j = gaplen2[j]; gaplen1i = gaplen1[i]; pfac = 0.0; pfac1 = 0.0; pfac10 = 0.0; if( gaplen1i ) for( k=0; (gaplen1i[k].idatnext)!=-1; k++ ) { pos1 = gaplen1i[k].relend; if( pos1 != 0 ) { pfac2 = 0.0; if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) { pos2 = gaplen2j[l].relend; if( pos2 == 0 && gaplen2j[l].len > gaplen1i[k].len - (pos1) + newgaplen ) { pfac2 += gaplen2j[l].freq; // reporterr( "hit! pfac2=%f, .freq=%f\n", pfac2, gaplen2[j][l].freq ); } // else // reporterr( "does not hit! pfac2=%f, gaplen1[i][k].len=%d, gaplen[i][k].relend=%d, newgaplen=%d\n", pfac2, gaplen1[i][k].len, gaplen1[i][k].relend, newgaplen ); } pfac += pfac2 * gaplen1i[k].freq; pfac1 += gaplen1i[k].freq; } else if( pos1 == 0 ) { pfac2 = 1.0; if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) { pos2 = gaplen2j[l].relend; if( pos2 == 0 && gaplen2j[l].len == gaplen1i[k].len+newgaplen ) pfac2 -= gaplen2j[l].freq;// kokode shuryou suru gap, gaplen1 ha kangaenai. if( pos2 != 0 && gaplen2j[l].len - (pos2-1) > gaplen1i[k].len+newgaplen ) pfac2 -= gaplen2j[l].freq;// keizoku suru gap, gaplen1 ha kangaenai. } // reporterr( "pfac2 in line 1056 = %f\n", pfac2 ); pfac += pfac2 * gaplen1i[k].freq; pfac10 += gaplen1i[k].freq; } } #if DEBUG reporterr( "pfac1 (step2) = %f\n", pfac1 ); reporterr( "pfac10 (step2) = %f\n", pfac10 ); reporterr( "pfac (step2) = %f\n", pfac ); #endif pfac1 = 1.0 - pfac1 - pfac10; pfac2 = 1.0; if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) { pos2 = gaplen2j[l].relend; if( pos2 == 0 && gaplen2j[l].len == newgaplen ) pfac2 -= gaplen2j[l].freq;// kokode shuryou suru gap, gaplen1 ha kangaenai. if( pos2 != 0 && gaplen2j[l].len - (pos2-1) > newgaplen ) pfac2 -= gaplen2j[l].freq;// keizoku suru gap, gaplen1 ha kangaenai. } #if DEBUG reporterr( "pfac1 (type3) = %f\n", pfac1 ); reporterr( "pfac2 (type3) = %f\n", pfac2 ); reporterr( "pfac (step3) = %f\n", pfac ); #endif pfac += pfac1 * pfac2; #if DEBUG reporterr( "incomplete pfac = %f, pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", pfac, seq1[i], i, seq2[j], j, pfac1, pfac2 ); #endif return( pfac ); #endif } #if USEGAPLENHALFORMTX static double calcpfac_gap_incomplete( Gaplen **gaplen1, Gaplen **gaplen2, int newgaplen, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you { double pfac, pfac1, pfac10, pfac2; int k, l, pos1, pos2, id1, id2; Gaplen *gapend1, *gapend2; Gaplen *gaplen1i, *gaplen2j; #if 0 // .len no shouryaku ni taiou shiteinai int gl; if( disp ) { reporterr( "calcpfac_gap_incomplete, %c (%d) - %c (%d)\n", seq1[i], i, seq2[j], j ); reporterr( "newgaplen = %d\n", newgaplen ); reporterr( "In calcpfac_gap, gaplen1[%d(%c)] = \n", i, seq1[i] ); for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatend)!=-1; k++ ) { pos1 = gaplen1[i][k].relend; reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq[0] ); } reporterr( "In calcpfac_gap, gaplen2[%d(%c)] = \n", j, seq2[j] ); showgaplen( gaplen2, strlen(seq2) ); for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatend)!=-1; k++ ) { pos2 = gaplen2[j][k].relend; reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq[0] ); } } #endif gaplen2j = gaplen2[j]; gaplen1i = gaplen1[i]; pfac = 0.0; pfac1 = 0.0; pfac10 = 0.0; if( gaplen1i ) for( k=0; (id1=gaplen1i[k].idatend)!=-1; k++ ) { pos1 = gaplen1i[k].relend; gapend1 = gaplen1[i+pos1]+id1; if( pos1 != 0 ) { pfac2 = 0.0; if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) { pos2 = gaplen2j[l].relend; gapend2 = gaplen2[j+pos2]+id2; // if( pos2 == 0 && gapend2->len + 1 > gapend1->len - (pos1-1) + newgaplen ) if( pos2 == 0 && gapend2->len > gapend1->len - (pos1) + newgaplen ) { pfac2 += gapend2->freq; // reporterr( "hit! pfac2=%f, .freq=%f\n", pfac2, gaplen2[j][l].freq ); } // else // reporterr( "does not hit! pfac2=%f, gaplen1[i][k].len=%d, gaplen[i][k].relend=%d, newgaplen=%d\n", pfac2, gaplen1[i][k].len, gaplen1[i][k].relend, newgaplen ); } pfac += pfac2 * gapend1->freq; pfac1 += gapend1->freq; } else if( pos1 == 0 ) { pfac2 = 1.0; if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) { pos2 = gaplen2j[l].relend; gapend2 = gaplen2[j+pos2]+id2; if( pos2 == 0 && gapend2->len == gapend1->len+newgaplen ) pfac2 -= gapend2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai. if( pos2 != 0 && gapend2->len - (pos2-1) > gapend1->len+newgaplen ) pfac2 -= gapend2->freq;// keizoku suru gap, gaplen1 ha kangaenai. } // reporterr( "pfac2 in line 1056 = %f\n", pfac2 ); pfac += pfac2 * gapend1->freq; pfac10 += gapend1->freq; } } #if DEBUG reporterr( "pfac1 (step2) = %f\n", pfac1 ); reporterr( "pfac10 (step2) = %f\n", pfac10 ); reporterr( "pfac (step2) = %f\n", pfac ); #endif pfac1 = 1.0 - pfac1 - pfac10; pfac2 = 1.0; if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) { pos2 = gaplen2j[l].relend; gapend2 = gaplen2[j+pos2]+id2; if( pos2 == 0 && gapend2->len == newgaplen ) pfac2 -= gapend2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai. if( pos2 != 0 && gapend2->len - (pos2-1) > newgaplen ) pfac2 -= gapend2->freq;// keizoku suru gap, gaplen1 ha kangaenai. } #if DEBUG reporterr( "pfac1 (type3) = %f\n", pfac1 ); reporterr( "pfac2 (type3) = %f\n", pfac2 ); reporterr( "pfac (step3) = %f\n", pfac ); #endif pfac += pfac1 * pfac2; #if DEBUG reporterr( "incomplete pfac = %f, pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", pfac, seq1[i], i, seq2[j], j, pfac1, pfac2 ); #endif return( pfac ); } static double calcpfac_gapex( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, int newgaplen, char *seq1, char *seq2, int disp ) { double pfac, pfac1, pfac2, pfac10; int k, l, id1, id2, pos1, pos2; Gaplen *gapend1, *gapend2; Gaplen *gaplen1i, *gaplen2j; gaplen1i = gaplen1[i]; gaplen2j = gaplen2[j]; pfac = 0.0; pfac2 = 0.0; // for( k=0; gaplen2[j]&&(gl=gaplen2[j][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou if( gaplen2j ) for( k=0; (id2=gaplen2j[k].idatend)!=-1; k++ ) // ososugi! hash ni atode henkou { #if DEBUG int gl; pos2 = gaplen2j[k].relend; id2 = gaplen2j[k].idatend; gl = gaplen2[j+pos2][id2].len; if( disp ) reporterr( "gaplen2[][].len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq ); if( disp ) reporterr( "gl = %d, newgaplen=%d\n", gl, newgaplen ); #endif if( (pos2=gaplen2[j][k].relend) != 0 ) continue; gapend2 = gaplen2[j+pos2]+id2; pfac1 = 1.0; pfac10 = 1.0; if( gaplen1i ) for( l=0; (id1=gaplen1i[l].idatend)!=-1; l++ ) // ososugi! hash ni atode henkou { pos1 = gaplen1i[l].relend; gapend1 = gaplen1[i+pos1]+id1; pfac10 -= gapend1->freq; #if DEBUG if( disp ) reporterr( "gaplen1[][].len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][l].relend, gaplen1[i+pos1][id1].freq ); #endif if( newgaplen + gapend1->len - (pos1) > gapend2->len - (pos2) ) pfac1 -= gapend1->freq; // reporterr( "pfac1 = %f\n", pfac1 ); } pfac += pfac1 * gapend2->freq; /* ???? */ if( newgaplen >= gapend2->len - (pos2-1) ) // >= or >?? { pfac -= pfac10 * gapend2->freq; // reporterr( "Hit! pfac1 = %f\n", pfac1 ); } /* ???? */ // if( gaplen2[j][k].relend == -1 ) pfac += gaplen2[j][k].freq; } return( pfac ); } static double calcpfac( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you { double pfac, pfac1, pfac2; int k, l, pos1, pos2, id1, id2; Gaplen *gapend1, *gapend2; Gaplen *gaplen1i, *gaplen2j; gaplen1i = gaplen1[i]; gaplen2j = gaplen2[j]; #if DEBUG if( disp ) { reporterr( "seq1[0] = %s\n", seq1 ); reporterr( "seq2[0] = %s\n", seq2 ); reporterr( "i,j=%d,%d\n", i, j ); reporterr( "In calcpfac(), gaplen1[%d(%c)] = \n", i, seq1[i] ); // showgaplen( gaplen1, seqlen( seq1 ) ); for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatend)!=-1; k++ ) { pos1 = gaplen1[i][k].relend; reporterr( "pos1=%d, id1=%d\n", pos1, id1 ); reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq ); } reporterr( "In calcpfac(), gaplen2[%d(%c)] = \n", j, seq2[j] ); // showgaplen( gaplen2, seqlen( seq2 ) ); for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatend)!=-1; k++ ) { pos2 = gaplen2[j][k].relend; reporterr( "j=%d, k=%d, id2=%d, pos2=%d\n", j, k, id2, pos2 ); reporterr( ".len=%d, .relend=%d\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend ); reporterr( ".freq=%f\n", gaplen2[j+pos2][id2].freq ); } } #endif pfac1 = pfac2 = 0.0; if( gaplen1i ) for( k=0; (id1=gaplen1i[k].idatend)!=-1; k++ ) { if( (pos1=gaplen1i[k].relend) == 0 ) pfac1 += gaplen1[i+pos1][id1].freq; } if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) // ososugi! hash ni atode henkou { if( (pos2=gaplen2j[l].relend) == 0 ) pfac2 += gaplen2[j+pos2][id2].freq; } #if DEBUG reporterr( "\n\nInitial pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", seq1[i], i, seq2[j], j, pfac1, pfac2 ); #endif pfac = pfac1 * pfac2 + pfac1 * (1-pfac2) + pfac2 * (1-pfac1); #if DEBUG reporterr( "\n\nInitial pfac (%d,%d) = %f\n", i, j, pfac ); #endif #if 1 // if( pfac ) reporterr( "i,j=%d,%d, Cancel (eq len)? pfac = %f -> ", i, j, pfac ); if( gaplen1i ) for( k=0; (id1=gaplen1i[k].idatend)!=-1; k++ ) // ososugi! hash ni atode henkou { pos1=gaplen1i[k].relend; gapend1 = gaplen1[i+pos1]+id1; if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) // ososugi! hash ni atode henkou { pos2 = gaplen2j[l].relend; gapend2 = gaplen2[j+pos2]+id2; if ( pos1 == 0 && pos2 == 0 && gapend1->len == gapend2->len ) pfac -= gapend1->freq * gapend2->freq; else if( pos1 == 0 && pos2 != 0 && gapend2->len - (pos2-1) > gapend1->len ) pfac -= gapend1->freq * gapend2->freq; else if( pos1 != 0 && pos2 == 0 && gapend1->len - (pos1-1) > gapend2->len ) pfac -= gapend1->freq * gapend2->freq; } } #if DEBUG reporterr( "\n\nFinal pfac1,pfac2 (%c%d,%c%d, straight) = %f\n\n", seq1[i], i, seq2[j], j, pfac ); #endif #else #endif return( pfac ); } #endif static double calcpfac_gapex_noidatend( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, int newgaplen, char *seq1, char *seq2, int disp ) { #if 1 double pfac, pfac1, pfac2, pfac10; int k, l, pos1, pos2; Gaplen *gaplen1i, *gaplen2j, *g1, *g2; gaplen1i = gaplen1[i]; gaplen2j = gaplen2[j]; pfac = 0.0; pfac2 = 0.0; if( gaplen2j ) for( k=0; (g2=gaplen2j+k)->idatnext!=-1; k++ ) { #if DEBUG int gl; pos2 = gaplen2j[k].relend; gl = gaplen2j[k].len; if( disp ) reporterr( "gaplen2[][].len=%d, .relend=%d, .freq=%f\n", gaplen2[j][k].len, gaplen2[j][k].relend, gaplen2[j][k].freq ); if( disp ) reporterr( "gl = %d, newgaplen=%d\n", gl, newgaplen ); #endif if( (pos2=g2->relend) != 0 ) continue; pfac1 = 1.0; pfac10 = 1.0; if( gaplen1i ) for( l=0; (g1=gaplen1i+l)->idatnext!=-1; l++ ) { pos1 = g1->relend; pfac10 -= g1->freq; #if DEBUG if( disp ) reporterr( "gaplen1[][].len=%d, .relend=%d, .freq=%f\n", gaplen1[i][l].len, gaplen1[i][l].relend, gaplen1[i][l].freq ); #endif if( newgaplen + g1->len - (pos1) > g2->len - (pos2) ) pfac1 -= g1->freq; // reporterr( "pfac1 = %f\n", pfac1 ); } pfac += pfac1 * g2->freq; /* ???? */ if( newgaplen >= g2->len - (pos2-1) ) // >= or >?? { pfac -= pfac10 * g2->freq; // reporterr( "Hit! pfac1 = %f\n", pfac1 ); } /* ???? */ // if( gaplen2[j][k].relend == -1 ) pfac += gaplen2[j][k].freq; } return( pfac ); #else double pfac, pfac1, pfac2, pfac10; int k, l, id1, id2, pos1, pos2; Gaplen *gaplen1i, *gaplen2j; gaplen1i = gaplen1[i]; gaplen2j = gaplen2[j]; pfac = 0.0; pfac2 = 0.0; // for( k=0; gaplen2[j]&&(gl=gaplen2[j][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou if( gaplen2j ) for( k=0; (gaplen2j[k].idatnext)!=-1; k++ ) // ososugi! hash ni atode henkou { #if DEBUG int gl; pos2 = gaplen2j[k].relend; gl = gaplen2j[k].len; if( disp ) reporterr( "gaplen2[][].len=%d, .relend=%d, .freq=%f\n", gaplen2[j][k].len, gaplen2[j][k].relend, gaplen2[j][k].freq ); if( disp ) reporterr( "gl = %d, newgaplen=%d\n", gl, newgaplen ); #endif if( (pos2=gaplen2[j][k].relend) != 0 ) continue; pfac1 = 1.0; pfac10 = 1.0; if( gaplen1i ) for( l=0; (gaplen1i[l].idatnext)!=-1; l++ ) // ososugi! hash ni atode henkou { pos1 = gaplen1i[l].relend; pfac10 -= gaplen1i[l].freq; #if DEBUG if( disp ) reporterr( "gaplen1[][].len=%d, .relend=%d, .freq=%f\n", gaplen1[i][l].len, gaplen1[i][l].relend, gaplen1[i][l].freq ); #endif if( newgaplen + gaplen1i[l].len - (pos1) > gaplen2j[k].len - (pos2) ) pfac1 -= gaplen1i[l].freq; // reporterr( "pfac1 = %f\n", pfac1 ); } pfac += pfac1 * gaplen2j[k].freq; /* ???? */ if( newgaplen >= gaplen2j[k].len - (pos2-1) ) // >= or >?? { pfac -= pfac10 * gaplen2j[k].freq; // reporterr( "Hit! pfac1 = %f\n", pfac1 ); } /* ???? */ // if( gaplen2[j][k].relend == -1 ) pfac += gaplen2[j][k].freq; } return( pfac ); #endif } static double calcpfacnoidatend( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you { double pfac, pfac1, pfac2; int k, l, pos1, pos2; Gaplen *gaplen1i, *gaplen2j, *g1, *g2; gaplen1i = gaplen1[i]; gaplen2j = gaplen2[j]; #if DEBUG if( disp ) { reporterr( "seq1[0] = %s\n", seq1 ); reporterr( "seq2[0] = %s\n", seq2 ); reporterr( "i,j=%d,%d\n", i, j ); reporterr( "In calcpfacnoidatend(), gaplen1[%d(%c)] = \n", i, seq1[i] ); showgaplen( gaplen1, seqlen( seq1 ) ); for( k=0; gaplen1[i]&&gaplen1[i][k].idatnext!=-1; k++ ) { pos1 = gaplen1[i][k].relend; reporterr( ".len=%d, .relend=%d, .freq=%f (i=%d)\n", gaplen1[i][k].len, gaplen1[i][k].relend, gaplen1[i][k].freq, i ); } reporterr( "In calcpfacnoidatend(), gaplen2[%d(%c)] = \n", j, seq2[j] ); showgaplen( gaplen2, seqlen( seq2 ) ); for( k=0; gaplen2[j]&&gaplen2[j][k].idatnext!=-1; k++ ) { pos2 = gaplen2[j][k].relend; reporterr( ".len=%d, .relend=%d (j=%d)\n", gaplen2[j][k].len, gaplen2[j][k].relend, j ); reporterr( ".freq=%f\n", gaplen2[j][k].freq ); } } #endif #if 1 pfac1 = pfac2 = 0.0; if( gaplen1i ) for( k=0; (g1=gaplen1i+k)->idatnext!=-1; k++ ) { if( (pos1=g1->relend) == 0 ) pfac1 += g1->freq; } if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) // ososugi! hash ni atode henkou { if( (pos2=g2->relend) == 0 ) pfac2 += g2->freq; } #if DEBUG reporterr( "\n\nInitial pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", seq1[i], i, seq2[j], j, pfac1, pfac2 ); #endif pfac = pfac1 * pfac2 + pfac1 * (1-pfac2) + pfac2 * (1-pfac1); #if DEBUG reporterr( "\n\nInitial pfac (%d,%d) = %f\n", i, j, pfac ); #endif // if( pfac ) reporterr( "i,j=%d,%d, Cancel (eq len)? pfac = %f -> ", i, j, pfac ); if( gaplen1i ) for( k=0; (g1=gaplen1i+k)->idatnext!=-1; k++ ) // ososugi! hash ni atode henkou { pos1=g1->relend; if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) // ososugi! hash ni atode henkou { pos2 = gaplen2j[l].relend; if ( pos1 == 0 && pos2 == 0 && g1->len == g2->len ) pfac -= g1->freq * g2->freq; else if( pos1 == 0 && pos2 != 0 && g2->len - (pos2-1) > g1->len ) pfac -= g1->freq * g2->freq; else if( pos1 != 0 && pos2 == 0 && g1->len - (pos1-1) > g2->len ) pfac -= g1->freq * g2->freq; } } #else pfac1 = pfac2 = 0.0; if( gaplen1i ) for( k=0; (gaplen1i[k].idatnext)!=-1; k++ ) { if( gaplen1i[k].relend == 0 ) pfac1 += gaplen1[i][k].freq; } if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) // ososugi! hash ni atode henkou { if( gaplen2j[l].relend == 0 ) pfac2 += gaplen2[j][l].freq; } #if DEBUG reporterr( "\n\nInitial pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", seq1[i], i, seq2[j], j, pfac1, pfac2 ); #endif pfac = pfac1 * pfac2 + pfac1 * (1-pfac2) + pfac2 * (1-pfac1); #if DEBUG reporterr( "\n\nInitial pfac (%d,%d) = %f\n", i, j, pfac ); #endif #if 1 // if( pfac ) reporterr( "i,j=%d,%d, Cancel (eq len)? pfac = %f -> ", i, j, pfac ); if( gaplen1i ) for( k=0; (gaplen1i[k].idatnext)!=-1; k++ ) // ososugi! hash ni atode henkou { pos1=gaplen1i[k].relend; if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) // ososugi! hash ni atode henkou { pos2 = gaplen2j[l].relend; if ( pos1 == 0 && pos2 == 0 && gaplen1i[k].len == gaplen2j[l].len ) pfac -= gaplen1i[k].freq * gaplen2j[l].freq; else if( pos1 == 0 && pos2 != 0 && gaplen2j[l].len - (pos2-1) > gaplen1i[k].len ) pfac -= gaplen1i[k].freq * gaplen2j[l].freq; else if( pos1 != 0 && pos2 == 0 && gaplen1i[k].len - (pos1-1) > gaplen2j[l].len ) pfac -= gaplen1i[k].freq * gaplen2j[l].freq; } } #endif #endif #if DEBUG reporterr( "\n\nFinal pfac1,pfac2 (%c%d,%c%d, straight) = %f\n\n", seq1[i], i, seq2[j], j, pfac ); #endif return( pfac ); } static void extendgaplencompactx( Gaplen **cpy, Gaplen **orig, int start ) { Gaplen *opt, *cpt; int l, id; #if DEBUG Gaplen cpybk; #endif // if( start < 0 ) start = 0; if( orig[start] == NULL ) { if( cpy[start] ) { free( cpy[start] ); cpy[start] = NULL; } return; } #if DEBUG reporterr( "At first, cpy -> \n" ); showgaplen( cpy, 100 ); reporterr( "Look at %d \n", start ); #endif if( cpy[start] == NULL ) { l = orig[start][0].npat; cpy[start] = realloc( cpy[start], (l+2) * sizeof( Gaplen ) ); #if 0 for( l=0; (gl=orig[start][l].idatend)!=-1; l++ ) cpy[start][l] = orig[start][l]; // freq ha pointer de copy cpy[start][l] = orig[start][l]; // dekiru? #else for( opt = orig[start],cpt = cpy[start]; opt->idatnext!=-1; ) *cpt++ = *opt++; *cpt = *opt; #endif } #if DEBUG cpybk = cpy[start][0]; #endif #if 0 for( l=0; (opt=orig[start]+l)->idatend!=-1; l++ ) { if( (pos=opt->relend) == 0 ) continue; if( cpy[posplus=start+pos] != NULL ) { id = opt->idatend; // reporterr( "cpy[%d][%d].len: %d -> %d (relend=%d)\n", start, l, cpy[start][l].len, cpy[posplus][id].len, pos ); cpy[start][l].len = cpy[posplus][id].len; // Ato de posplus wo tsukawanaiyouni henkou. continue; // HITSUYOU!!! } else { // reporterr( "cpy[%d][%d].len: %d (relend=%d)\n", start, l, cpy[start][l].len, pos ); } #if 0 for( k=0; orig[start+pos][k].idatend!=-1; k++ ) ; #else optplus = orig[posplus]; k = optplus->npat; #endif cptplus = cpy[posplus] = realloc( cpy[posplus], (k+2) * sizeof( Gaplen ) ); // cptplus = realloc( cptplus, (k+2) * sizeof( Gaplen ) ); #if 0 for( k=0; optplus[k].idatend!=-1; k++ ) { cptplus[k] = optplus[k]; // dekiru? } cptplus[k] = optplus[k]; // dekiru? #else while( optplus->idatend!=-1 ) *cptplus++ = *optplus++; *cptplus = *optplus; #endif } #endif if( start == 0 ) return; if( cpy[start-1] == NULL ) return; #if DEBUG reporterr( "cpy -> \n" ); showgaplen( cpy, 100 ); reporterr( "Look at %d \n", start ); #endif for( l=0; orig[start][l].idatnext!=-1; l++ ) { if( (id=orig[start][l].idatprev) == -1 ) continue; // if( cpy[start][l].relend != 0 ) cpy[start][l].len = cpy[start-1][id].len; // Shinchou ni cpy[start][l].len = cpy[start-1][id].len; // Shinchou ni // if( cpy[start][l].len != cpy[start-1][id].len ) #if DEBUG if( 1 || cpy[start][l].len != cpy[start-1][id].len ) { reporterr( "Check!! cpy[%d][%d].len=%d, but [start-1][].len=%d, relend=%d\n", start, l, cpy[start][l].len, cpy[start-1][id].len, cpy[start][l].relend ); reporterr( "orig[%d][%d].len=%d, relend=%d\n", start, l, orig[start][l].len, orig[start][l].relend ); reporterr( "cpybk.len=%d, relend=%d\n", cpybk.len, cpybk.relend ); } else { // reporterr( "OK, cpy[%d][%d].len=%d, relend=%d\n", start, l, cpy[start][l].len, cpy[start][l].relend ); } #endif } } #if USEGAPLENHALFORMTX static void extendgaplenpartly( Gaplen **cpy, Gaplen **orig, int start, int end ) { int i, l, gl, extrascope; Gaplen *pt; if( start < 0 ) start = 0; // for( i=start; i<=end; i++ ) // { // if( cpy[i] == (Gaplen *)1 ) // { // end = i-1; // break; //// reporterr( "Okashii! i=%d\n", i ); //// exit( 1 ); // } // if( cpy[i] ) free( cpy[i] ); // cpy[i] = NULL; // } extrascope = 0; #if 0 for( i=start; i<=end; i++ ) if( orig[i] ) { for( pt=orig[i]; (pt->idatend)!=-1; ) { if( (gl=pt++->relend) > extrascope ) extrascope = i+gl-end+1; } // extrascope = 10; // Kinji } #else if( orig[end] ) { for( pt=orig[end]; (pt->idatend)!=-1; ) { if( (gl=pt++->relend) > extrascope ) extrascope = gl; } // extrascope = 10; // Kinji } #endif end += extrascope; for( i=start; i<=end; i++ ) { if( cpy[i] != NULL ) continue; if( orig[i] == NULL ) { if( cpy[i] ) free( cpy[i] ); // muda dakedo cpy[i] = NULL; continue; } for( l=0; (gl=orig[i][l].idatend)!=-1; l++ ) ; cpy[i] = realloc( cpy[i], (l+2) * sizeof( Gaplen ) ); // cpy[i] = calloc( sizeof( Gaplen ), l+2 ); for( l=0; (gl=orig[i][l].idatend)!=-1; l++ ) { #if 1 cpy[i][l] = orig[i][l]; // freq ha pointer de copy #else cpy[i][l].len = gl; cpy[i][l].relend = orig[i][l].relend; cpy[i][l].freq = orig[i][l].freq; cpy[i][l].gapidatend = orig[i][l].gapidatend; #endif // reporterr( "i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } cpy[i][l] = orig[i][l]; // dekiru? // cpy[i][l].relend = -1; // cpy[i][l].len = -1; } } #endif static void duplicategaplencompactx( Gaplen **cpy, Gaplen **orig, int maxlen, int start, int end ) { int i, l; if( start < 0 ) start = 0; for( i=start; i<=end; i++ ) { // reporterr( "i=%d / %d\n", i, maxlen ); if( cpy[i] == (Gaplen *)1 ) { end = i-1; break; // reporterr( "Okashii! i=%d\n", i ); // exit( 1 ); } if( cpy[i] ) free( cpy[i] ); cpy[i] = NULL; } for( i=start; i<=end; i++ ) { if( orig[i] == NULL ) { if( cpy[i] ) free( cpy[i] ); // muda dakedo cpy[i] = NULL; continue; } #if 0 for( l=0; (gl=orig[i][l].idatend)!=-1; l++ ) ; #else l = orig[i][0].npat; #endif cpy[i] = realloc( cpy[i], (l+2) * sizeof( Gaplen ) ); // cpy[i] = calloc( sizeof( Gaplen ), l+2 ); for( l=0; orig[i][l].idatnext!=-1; l++ ) { cpy[i][l] = orig[i][l]; // freq ha pointer de copy // reporterr( "i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } cpy[i][l] = orig[i][l]; // dekiru? // cpy[i][l].relend = -1; // cpy[i][l].len = -1; } return; } #if USEGAPLENHALFORMTX static void duplicategaplenpartly( Gaplen **cpy, Gaplen **orig, int start, int end ) { int i, l, gl, extrascope; Gaplen *pt; if( start < 0 ) start = 0; for( i=start; i<=end; i++ ) { if( cpy[i] == (Gaplen *)1 ) { end = i-1; break; // reporterr( "Okashii! i=%d\n", i ); // exit( 1 ); } if( cpy[i] ) free( cpy[i] ); cpy[i] = NULL; } extrascope = 0; #if 0 for( i=start; i<=end; i++ ) if( orig[i] ) { for( pt=orig[i]; (pt->idatend)!=-1; ) { if( (gl=pt++->relend) > extrascope ) extrascope = i+gl-end+1; } // extrascope = 10; // Kinji } #else if( orig[end] ) { for( pt=orig[end]; (pt->idatend)!=-1; ) { if( (gl=pt++->relend) > extrascope ) extrascope = gl; } // extrascope = 10; // Kinji } #endif end += extrascope; for( i=start; i<=end; i++ ) { if( orig[i] == NULL ) { if( cpy[i] ) free( cpy[i] ); // muda dakedo cpy[i] = NULL; continue; } for( l=0; (gl=orig[i][l].idatend)!=-1; l++ ) ; cpy[i] = realloc( cpy[i], (l+2) * sizeof( Gaplen ) ); // cpy[i] = calloc( sizeof( Gaplen ), l+2 ); for( l=0; (gl=orig[i][l].idatend)!=-1; l++ ) { #if 1 cpy[i][l] = orig[i][l]; // freq ha pointer de copy #else cpy[i][l].len = gl; cpy[i][l].relend = orig[i][l].relend; cpy[i][l].freq = orig[i][l].freq; cpy[i][l].gapidatend = orig[i][l].gapidatend; #endif // reporterr( "i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } cpy[i][l] = orig[i][l]; // dekiru? // cpy[i][l].relend = -1; // cpy[i][l].len = -1; } } #endif static void gaplenextendnoidatend( Gaplen **cpy, int gapstartpos, int insertionlen ) { int l, id, idn, pos, len; #if 0 // reporterr( "inserting %d gaps at position %d\n", insertionlen, gapstartpos ); for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatend) !=-1; l++ ) { pos = cpy[gapstartpos][l].relend; cpy[gapstartpos+pos][id].len += insertionlen; } #endif #if 1 for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatnext) !=-1; l++ ) { len = cpy[gapstartpos][l].len + insertionlen; // reporterr( "ext\n" ); for( pos=gapstartpos, idn=l; cpy[pos] != NULL && cpy[pos][idn].relend != 0; pos++ ) { // reporterr( "%d, plus %d %d->%d\n", pos, insertionlen, cpy[pos][idn].len, cpy[pos][idn].len+insertionlen ); cpy[pos][idn].len = len; idn = cpy[pos][idn].idatnext; // if( pos == gapstartpos + 1 ) break; break; } // reporterr( "end\n" ); idn = cpy[gapstartpos][l].idatprev; if( gapstartpos != 0 && idn != -1 && cpy[gapstartpos-1] ) cpy[gapstartpos-1][idn].len = len; } #endif } #if USEGAPLENHALFORMTX static void gaplenextend( Gaplen **cpy, int gapstartpos, int insertionlen ) { int l, id, idn, pos, len; #if 1 // reporterr( "inserting %d gaps at position %d\n", insertionlen, gapstartpos ); for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatend) !=-1; l++ ) { pos = cpy[gapstartpos][l].relend; cpy[gapstartpos+pos][id].len += insertionlen; } #endif #if 1 for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatend) !=-1; l++ ) { len = cpy[gapstartpos][l].len + insertionlen; // reporterr( "ext\n" ); for( pos=gapstartpos, idn=l; cpy[pos] != NULL && cpy[pos][idn].relend != 0; pos++ ) { // reporterr( "%d, plus %d %d->%d\n", pos, insertionlen, cpy[pos][idn].len, cpy[pos][idn].len+insertionlen ); cpy[pos][idn].len = len; idn = cpy[pos][idn].idatnext; // if( pos == gapstartpos + 1 ) break; // break; } // reporterr( "end\n" ); idn = cpy[gapstartpos][l].idatprev; if( gapstartpos != 0 && idn != -1 && cpy[gapstartpos-1] ) cpy[gapstartpos-1][idn].len = len; } #endif } #endif static void copygaplencompactx( Gaplen **cpy, Gaplen **orig, int seqlen, int gapstartpos, int insertionlen, int posincopy, int posinori ) { Gaplen *pt, *cpt; #if DEBUG reporterr( "At the head of copygaplencompactx, cpy=\n" ); showgaplen( cpy+posincopy, 100 ); reporterr( "At the head of copygaplencompactx, orig=\n" ); showgaplen( orig+posinori, 100 ); reporterr( "posinori=%d\n", posinori ); #endif if( orig[posinori] == NULL ) return; // for( pt=orig[posinori],cpt=cpy[posincopy]; pt->relend==0; ) // zenhan ni relend=0 ga matomatteirukara. for( pt=orig[posinori],cpt=cpy[posincopy]; pt->idatnext!=-1; ) // kouhan mo copy { cpt++->len = pt++->len; } #if 0 for( l=0; (id=orig[posinori][l].idatend)!=-1; l++ ) { pos = orig[posinori][l].relend; if( pos == 0 ) continue; if( orig[posinori+pos] == NULL ) { reporterr( "Okashii\n" ); PFACERROR = 1; continue; } #if 0 for( k=0; orig[posinori+pos][k].relend==0; k++ ) // zenhan dake { cpy[posincopy+pos][k].len = orig[posinori+pos][k].len; // dekiru? } #else cpy[posincopy+pos][id].len = orig[posinori+pos][id].len; // dekiru? #endif } #endif if( gapstartpos == -1 ) gapstartpos = posincopy; gaplenextendnoidatend( cpy, gapstartpos, insertionlen ); #if DEBUG reporterr( "At the end of copygaplencompactx, cpy=\n" ); showgaplen( cpy+posincopy, 100 ); #endif } #if USEGAPLENHALF static void copygaplenrestricted_zurasu( Gaplen **cpy, Gaplen **orig, int seqlen, int gapstartpos, int insertionlen, int startincopy, int endincopy, int startinori, int endinori ) { int i, extrascope, gl, j; int zure, newend; Gaplen *pt, *cpt; // int ncopied = 0; #if 0 // mae houkou nimo renzoku gap de enchou suru hitsuyou ga aru to omou. for( i=startinori-1; 0<=i&&i<=seqlen; i-- ) { // reporterr( "i=%d\n", i ); if( orig[i] == NULL ) break; for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; ) cpt++->len = gl; } #endif zure = startincopy - startinori; // end ha check shinai // int ncopied = 0; if( orig[endinori] ) { extrascope = 0; for( pt=orig[endinori]; (pt->idatend)!=-1; ) { if( (gl=pt++->relend) > extrascope ) extrascope = gl; } // extrascope = 10; // Kinji newend = endinori + extrascope; } else newend = endinori; // reporterr( "ncopy = %d\n", newend - startinori ); // #if 0 // extra end wo shizen ni kimereba iranai if( newend > seqlen ) newend = seqlen; // if( startinori < 0 ) startinori = 0; #endif for( i=startinori, j=startincopy; i<=newend; i++, j++ ) { if( orig[i] == NULL ) continue; // ncopied += 1; #if 0 for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; ) cpt++->len = gl; #else for( pt=orig[i],cpt=cpy[j]; pt->relend==0; ) // zenhan ni relend=0 ga matomatteirukara. // int k; // for( k=0; orig[i][k].relend==0; k++ ) // zenhan ni relend=0 ga matomatteirukara. { cpt++->len = pt++->len; // reporterr( "i=%d, k=%d\n", i, k ); // cpy[i][k].len = orig[i][k].len; } #endif } #if 0 for( i=0; i<=seqlen; i++ ) { for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ ) reporterr( "after copy, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } #endif if( gapstartpos < 0 ) return; gaplenextend( cpy, gapstartpos, insertionlen ); // return; // TEST // for( i=endinori+1; i<=newend; i++ ) for( i=endincopy+1; i<=newend+zure; i++ ) { if( cpy[i] == NULL ) continue; for( j=0; cpy[i][j].idatend!=-1; j++ ) { if( cpy[i][j].relend == 0 ) { break; } } if( cpy[i][j].idatend == -1 ) { free( cpy[i] ); cpy[i] = NULL; } } #if 0 reporterr( "\n" ); for( i=0; i<=seqlen; i++ ) { for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ ) reporterr( "after add, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } #endif } #endif #if USEGAPLENHALFORMTX static void copygaplenrestricted( Gaplen **cpy, Gaplen **orig, int seqlen, int gapstartpos, int insertionlen, int scopestart, int scopeend ) { int i, j, extrascope, gl, endinori, newend; Gaplen *pt, *cpt; // int ncopied = 0; #if 0 // mae houkou nimo renzoku gap de enchou suru hitsuyou ga aru to omou. for( i=scopestart-1; 0<=i&&i<=seqlen; i-- ) { // reporterr( "i=%d\n", i ); if( orig[i] == NULL ) break; for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; ) cpt++->len = gl; } #endif // int ncopied = 0; endinori = scopeend; if( orig[scopeend] ) { extrascope = 0; for( pt=orig[scopeend]; (pt->idatend)!=-1; ) { if( (gl=pt++->relend) > extrascope ) extrascope = gl; } // extrascope = 10; // Kinji scopeend += extrascope; } newend = scopeend; // reporterr( "ncopy = %d\n", scopeend - scopestart ); // #if 0 // extra end wo shizen ni kimereba iranai if( scopeend > seqlen ) scopeend = seqlen; // if( scopestart < 0 ) scopestart = 0; #endif if( scopestart < 0 ) scopestart = 0; for( i=scopestart; i<=scopeend; i++ ) { if( orig[i] == NULL ) continue; // ncopied += 1; #if 0 for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; ) cpt++->len = gl; #else for( pt=orig[i],cpt=cpy[i]; pt->relend==0; ) // zenhan ni relend=0 ga matomatteirukara. // int k; // for( k=0; orig[i][k].relend==0; k++ ) // zenhan ni relend=0 ga matomatteirukara. { cpt++->len = pt++->len; // reporterr( "i=%d, k=%d\n", i, k ); // cpy[i][k].len = orig[i][k].len; } #endif } #if 0 for( i=0; i<=seqlen; i++ ) { for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ ) reporterr( "after copy, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } #endif if( gapstartpos < 0 ) return; gaplenextend( cpy, gapstartpos, insertionlen ); return; // TEST extra scope de tsukaunoha end dake? for( i=endinori+1; i<=newend; i++ ) { if( cpy[i] == NULL ) continue; for( j=0; cpy[i][j].idatend!=-1; j++ ) { if( cpy[i][j].relend == 0 ) break; } if( cpy[i][j].idatend == -1 ) { free( cpy[i] ); cpy[i] = NULL; } } #if 0 reporterr( "\n" ); for( i=0; i<=seqlen; i++ ) { for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ ) reporterr( "after add, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend ); } #endif } #endif #if 1 static void freegaplenpartly( Gaplen **mtx, int startpos, int endpos ) { int i; Gaplen **pt; if( startpos < 0 ) startpos = 0; for( i=startpos; i<=endpos; i++ ) { if( *(pt=mtx+i) == (Gaplen *)1 ) break; if( *pt ) free( *pt ); *pt = NULL; } } #else static void freegaplenpartly( Gaplen **mtx, int startpos, int endpos ) { int i; if( startpos < 0 ) startpos = 0; for( i=startpos; i<=endpos; i++ ) { if( mtx[i] == (Gaplen *)1 ) break; if( mtx[i] ) free( mtx[i] ); mtx[i] = NULL; } } #endif double D__align( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; register int i, j; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; // double fpenalty = (double)penalty; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; #endif #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS double **cpmx1; static TLS double **cpmx2; static TLS int **intwork; static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; #if USEGAPLENHALF Gaplen ****gaplen1half = NULL; // NULL ga iru to omou. Gaplen ****gaplen2half = NULL; // NULL ga iru to omou. #endif #if USEGAPLENMTX Gaplen ****gaplen1mtx = NULL; // NULL ga iru to omou. Gaplen ****gaplen2mtx = NULL; // NULL ga iru to omou. #endif static TLS Gaplen **gaplen1 = NULL; // NULL ga iru to omou. static TLS Gaplen **gaplen2 = NULL; // NULL ga iru to omou. static TLS Gaplen ***gaplen1jprev = NULL; static TLS Gaplen ***gaplen2jprev = NULL; static TLS Gaplen ***gaplen1jcurr = NULL; static TLS Gaplen ***gaplen2jcurr = NULL; static TLS Gaplen ***gaplen1icurr = NULL; static TLS Gaplen ***gaplen2icurr = NULL; static TLS Gaplen ***gaplen1jbestkamo = NULL; static TLS Gaplen ***gaplen2jbestkamo = NULL; static TLS Gaplen ***gaplen1ibestkamo = NULL; static TLS Gaplen ***gaplen2ibestkamo = NULL; static TLS Gaplen ***gaplen1jbest = NULL; static TLS Gaplen ***gaplen2jbest = NULL; double fpenalty = (double)penalty; double fpenalty_shift = (double)penalty_shift; static TLS Gaplen ****gaplens = NULL; Gaplen ***gaplentmp = NULL; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; int k; double pfac, pfactmp; int newgaplen; // for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); free( gaplens ); if( gaplen1ibestkamo ) FreeGaplenCub( gaplen1ibestkamo ); gaplen1ibestkamo = NULL; if( gaplen2ibestkamo ) FreeGaplenCub( gaplen2ibestkamo ); gaplen2ibestkamo = NULL; if( gaplen1icurr ) FreeGaplenCub( gaplen1icurr ); gaplen1icurr = NULL; if( gaplen2icurr ) FreeGaplenCub( gaplen2icurr ); gaplen2icurr = NULL; if( gaplen1jcurr ) FreeGaplenCub( gaplen1jcurr ); gaplen1jcurr = NULL; if( gaplen1jprev ) FreeGaplenCub( gaplen1jprev ); gaplen1jprev = NULL; if( gaplen2jcurr ) FreeGaplenCub( gaplen2jcurr ); gaplen2jcurr = NULL; if( gaplen2jprev ) FreeGaplenCub( gaplen2jprev ); gaplen2jprev = NULL; if( gaplen1jbestkamo ) FreeGaplenCub( gaplen1jbestkamo ); gaplen1jbestkamo = NULL; if( gaplen2jbestkamo ) FreeGaplenCub( gaplen2jbestkamo ); gaplen2jbestkamo = NULL; if( gaplen1jbest ) FreeGaplenCub( gaplen1jbest ); gaplen1jbest = NULL; if( gaplen2jbest ) FreeGaplenCub( gaplen2jbest ); gaplen2jbest = NULL; if( gaplen1 ) FreeGaplenMtx( gaplen1, 1 ); gaplen1 = NULL; if( gaplen2 ) FreeGaplenMtx( gaplen2, 1 ); gaplen2 = NULL; } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); #if FASTMATCHCALC doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; // reporterr( "Allocating gaplen1 and gaplen2\n" ); gaplen1 = (Gaplen ** )calloc( ll1+2, sizeof( Gaplen * ) ); gaplen1[ll1+1] = (Gaplen *)1; gaplen2 = (Gaplen ** )calloc( ll2+2, sizeof( Gaplen * ) ); gaplen2[ll2+1] = (Gaplen *)1; // reporterr( "Allocating gaplen*\n" ); gaplen1ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen2ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen1icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen2icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen1jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen1jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen1jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen1jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplens = calloc( sizeof( Gaplen ***), 12 ); gaplens[0] = gaplen1ibestkamo; gaplens[1] = gaplen2ibestkamo; gaplens[2] = gaplen1icurr; gaplens[3] = gaplen2icurr; gaplens[4] = gaplen1jbestkamo; gaplens[5] = gaplen2jbestkamo; gaplens[6] = gaplen1jbest; gaplens[7] = gaplen2jbest; gaplens[8] = gaplen1jcurr; gaplens[9] = gaplen2jcurr; gaplens[10] = gaplen1jprev; gaplens[11] = gaplen2jprev; // reporterr( "Allocation end\n" ); } { int ll1 = lgth1; int ll2 = lgth2; // reporterr( "Allocating gaplen*i\n" ); for(i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { double t = 0.0; for( i=0; i zurasu -> error? // duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1, lgth1, 0, 1 ); // duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 1 ); // half duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 0 ); // half // duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, lgth2 ); duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, 0 ); // copygaplenrestricted( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error? // copygaplenrestricted_zurasu( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, lgth2, 0, lgth2 ); // -> zurasu -> error? copygaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error? } // reporterr( "Duplicating gaplen*j*curr \n" ); // int nduplicated = 0; for( j=0; j tbfast.c // impossible if( localhom ) imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif for( j=1; j ", seq1[0][0], seq2[0][j], j, currentw[j] ); currentw[j] += fpenalty * pfac; // tekitou // reporterr( " %f\n", currentw[j] ); } for( i=1; ifull, 2j->half #if USEGAPLENMTX //reporterr( "#### FULL, i,j=%d,%d\n", i, j ); pfactmp = calcpfac( gaplen1mtx[i-1][j-1], gaplen2mtx[i-1][j-1], i, j, seq1[0], seq2[0], one ); #endif #if USEGAPLENHALF //reporterr( "#### HALF, i,j=%d/%d,%d/%d\n", i, lgth1, j, lgth2 ); // showgaplen( gaplen2half[i-1][j-1], lgth2-j ); pfactmp = calcpfac( gaplen1half[i-1][j-1], gaplen2half[i-1][j-1], 1, 1, seq1[0]+i, seq2[0]+j, zero ); #endif #if USEGAPLENMTX + USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(straight) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); PFACERROR = 1; exit( 1 ); } #endif //if( i==50 && j==135 ) exit( 1 ); // reporterr( "i,j=%d,%d, *prept = %f\n", i, j, *prept ); #if ALGZSTRAIGHT wm = *prept; // Machigai!! #else wm = *prept + fpenalty * pfac; #endif *ijppt = 0; #if DEBUG if( i == j ) { fprintf( stderr, "\n i=%d, j=%d %c, %c ", i, j, seq1[0][i], seq2[0][j] ); fprintf( stderr, "%5.0f, pfac for straight =%f\n", wm, pfac ); } #endif newgaplen = j-mpi-1; //if( i == 53 && j == 93 ) exit( 1 ); // pfac = calcpfac_gap_incomplete( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, i, j, seq1[0], seq2[0], 0 ); // i-1 pfac = calcpfac_gap_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, 1, j, seq1[0]+i-1, seq2[0], 0 ); // i-1 #if USEGAPLENMTX pfactmp = calcpfac_gap_incomplete( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], newgaplen, i, j, seq1[0], seq2[0], 1 ); #endif #if USEGAPLENHALF pfactmp = calcpfac_gap_incomplete( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], newgaplen, 1, j-mpi, seq1[0]+i-1, seq2[0]+mpi, 1 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(igap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); PFACERROR = 1; } #endif #if DEBUG reporterr( "%c-%c pfac for igap end incomplete = %f\n", seq1[0][i], seq2[0][j], pfac ); reporterr( "mi when igap end checking = %f\n", mi ); reporterr( "wm = %f, mi+fpenalty*pfac=%f\n", wm, mi+fpenalty*pfac ); #endif #if ALGZGAP if( (g=mi+*fgcp2pt*gf1va) > wm ) #else if( (g=mi+fpenalty*pfac) > wm ) #endif { wm = g; *ijppt = -( j - mpi ); #if DEBUG80 reporterr( "Selected as igap end! wm = %f, mi = %f\n", wm, mi ); fprintf( stderr, "Jump from %d-%d (%c-%c) to %d (%c-%c)!\n", i, j, seq1[0][i], seq2[0][j], mpi, seq1[0][i-1], seq2[0][mpi] ); #endif } #if 0 fprintf( stderr, "%5.0f->", wm ); #endif // if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) #if 0 // reporterr( "Checking %c, (current pos = %c)\n", seq2[0][j+1], seq2[0][j] ); sfac = 1.0; for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou { // reporterr( ".len = %d, .relend = %d\n", gaplen2[j+1][k].len, gaplen2[j+1][k].relend ); if( gl - 1 == gaplen2[j+1][k].relend ) { sfac -= gaplen2[j+1][k].freq; // reporterr( "Hit! sfac = %f\n", sfac ); } } sfac2 = 1.0; for( k=0; gaplen1[i+1]&&(gl=gaplen1[i+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou if( gaplen1[i+1][k].relend != -1 ) sfac2 -= gaplen1[i+1][k].freq; sfac *= sfac2; #else // sfac = 0.0; #endif #if ALGZGAP if( (g=*prept+*ogcp2pt*gf1vapre) >= mi ) #else // if( (g=*prept + fpenalty * sfac ) >= mi ) if( (g=*prept ) >= mi ) #endif { // mpibk = mpi; // mi = g - fpenalty * sfac; mi = g; mpi = j-1; #if DEBUG80 reporterr( "Selected as igap start! %c%d-%c%d, mi=%f, g=%f\n", seq1[0][i-1], i-1, seq2[0][mpi], mpi, mi, g ); #endif #if FREEFREQUENTLY // freegaplenpartly( gaplen1ibestkamo[i-1], 0, i-1 ); freegaplenpartly( gaplen2ibestkamo[i-1], j-3, j-2 ); #endif // freegaplenpartly( gaplen1jprev[mpibk], 0, lgth2 ); // full // freegaplenpartly( gaplen2jprev[mpibk], 0, lgth2-mpibk ); // half // if( gaplen1jprev[mpibk] ) FreeGaplenMtx( gaplen1jprev[mpibk], 0 ); // gaplen1jprev[mpibk] = NULL; // if( gaplen2jprev[mpibk] ) FreeGaplenMtx( gaplen2jprev[mpibk], 0 ); // gaplen2jprev[mpibk] = NULL; // addnewgaplen( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 ); // copygaplenrestricted( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // i-1, i copygaplencompactx( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, 1, i ); // half // copygaplenrestricted( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // mpi, j copygaplencompactx( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); //half } // reporterr( "g=%f, *prept=%f, mi=%f\n", g, *prept, mi ); #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if ALGZGAP pfac = 0.0; // CHUUI! #else // pfac = calcpfac_gapex( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], i, j, j-mpi, seq1[0], seq2[0], 1 ); // i-1 pfac = calcpfac_gapex_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], 1, j, j-mpi, seq1[0]+i, seq2[0], 1 ); // 1ibest->half, 2ibest->full #if USEGAPLENMTX pfactmp = calcpfac_gapex( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], i, j, j-mpi, seq1[0], seq2[0], 1 ); #endif #if USEGAPLENHALF pfactmp = calcpfac_gapex( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], 1, j-mpi, j-mpi, seq1[0]+i, seq2[0]+mpi, 1 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(igapex) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); PFACERROR = 1; } #endif #if DEBUG reporterr( "%c-%c, igap extension check, pfac = %f\n\n", '=', seq2[0][j], pfac ); #endif #endif // reporterr( "mi = %f -> ", mi ); mi += fpenalty * pfac; // reporterr( "mi = %f\n", mi ); // reporterr( "using %d-%d, %d, %d\n", *mpjpt, j-1, i, j ); newgaplen = i-*mpjpt-1; // pfac = calcpfac_gap_incomplete( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, j, i, seq2[0], seq1[0], 0 ); // j-1 deha??? pfac = calcpfac_gap_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, 1, i, seq2[0]+j-1, seq1[0], 1 ); // 2jbestkamo->half, 1jbestkamo->full #if USEGAPLENMTX pfactmp = calcpfac_gap_incomplete( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], newgaplen, j, i, seq2[0], seq1[0], 1 ); #endif #if USEGAPLENHALF pfactmp = calcpfac_gap_incomplete( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], newgaplen, 1, i-*mpjpt, seq2[0]+j-1, seq1[0]+*mpjpt, 1 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(jgap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); // exit( 1 ); PFACERROR = 1; } #endif #if ALGZGAP if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) #else if( (g=*mjpt + fpenalty*pfac) > wm ) #endif { wm = g; *ijppt = +( i - *mpjpt ); #if FREEFREQUENTLY freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 ); // freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); #endif #if DEBUG reporterr( "Selected as jgap end!, pfac = %f\n", pfac ); fprintf( stderr, "Jump from %d (%c) to %d (%c)!\n", j, seq1[0][j], *mpjpt, seq1[0][*mpjpt] ); #endif // addnewgaplen( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], gaplen2, lgth2, -1, 0 ); copygaplencompactx( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], lgth1, -1, 0, i, i );// *mpjpt, i // copygaplenrestricted( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, j, j ); // j-1, j copygaplencompactx( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, 1, 1 ); // half! } // extendgaplenpartly( gaplen1jbest[j-1], gaplen1, i, i ); // tmptmptmp // extendgaplenpartly( gaplen2jbest[j-1], gaplen2, 0, 0 ); // tmptmptmp #if 0 sfac = 1.0; for( l=0; gaplen1[i+1]&&(gl=gaplen1[i+1][l].len)!=-1; l++ ) // ososugi! hash ni atode henkou if( gl - 1 == gaplen1[i+1][l].relend ) sfac -= gaplen1[i+1][l].freq; sfac2 = 1.0; for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou if( gaplen2[j+1][k].relend != -1 ) sfac2 -= gaplen2[j+1][k].freq; sfac *= sfac2; #else // sfac = 0.0; #endif #if DEBUG reporterr( " (jgap start check i=%d) -> *prept=%f, *mjpt=%f\n", i, seq1[0][i], seq2[0][j], *prept, *mjpt ); #endif #if ALGZGAP if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt ) #else // if( (g=*prept + fpenalty * sfac ) >= *mjpt ) if( (g=*prept ) >= *mjpt ) #endif { // *mjpt = g - fpenalty * sfac; *mjpt = g; *mpjpt = i-1; #if DEBUG reporterr( "Selected as jgap start!\n" ); #endif #if FREEFREQUENTLY freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 ); // freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); #endif // addnewgaplen( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 ); // reporterr( "copying gaplen1jbestkamo[%d-1] from galpen1jprev, j=%d, i=%d\n", j, j, i ); copygaplencompactx( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // *mpjpt, i // copygaplenrestricted( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // j-1, j // copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); // half! // reporterr( "copying gaplen2jbestkamo[%d-1] from galpen2jprev\n", j ); copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2-j, -1, 0, 1, 1 ); // ryouhou half! // if( j==2 && i==1 ) exit( 1 ); } // extendgaplenpartly( gaplen1ibestkamo[i-1], gaplen1, 0, 0 ); // tmptmptmp // extendgaplenpartly( gaplen2ibestkamo[i-1], gaplen2, j, j ); // tmptmptmp // extendgaplenpartly( gaplen1jbestkamo[j-1], gaplen1, i, i ); // tmptmptmp // extendgaplenpartly( gaplen2jbestkamo[j-1], gaplen2, 0, 0 ); // tmptmptmp #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if ALGZGAP pfac = 0.0; #else // pfactmp = calcpfac_gapex( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 ); // j-1 pfactmp = calcpfac_gapex_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], 1, i, i-*mpjpt, seq2[0]+j, seq1[0], 0 ); // 2jbestkamo->half, 1jbestkamo->full #if USEGAPLENMTX pfac = calcpfac_gapex( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 ); #endif #if USEGAPLENHALF pfac = calcpfac_gapex( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], 1, i-*mpjpt, i-*mpjpt, seq2[0]+j, seq1[0]+*mpjpt, 0 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(jgapex) pfac=%f, but pfactmp=%f (i,j=%d,%d) diff=%f\n", pfac, pfactmp, i, j, pfac-pfactmp ); // exit( 1 ); PFACERROR = 1; } #endif pfac = pfactmp; #if DEBUG reporterr( "%c-%c, jgap extension check (j=%d), pfac = %f\n", seq1[0][i], '=', j, pfac ); #endif #endif m[j] += fpenalty * pfac; if( trywarp ) { #if USE_PENALTY_EX if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai #else if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai #endif { // fprintf( stderr, "WARP!!\n" ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif curm = *curpt + wm; if( *wmrecords1pt > *wmrecordspt ) { *wmrecordspt = *wmrecords1pt; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); } if( curm > *wmrecordspt ) { *wmrecordspt = curm; *warpipt = i; *warpjpt = j; } wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #if DEBUG reporterr( "extention-x 1j???, before extention-x, j=%d\n", j ); showgaplen( gaplen1jcurr[j], 100 ); #endif extendgaplencompactx( gaplen1jcurr[j], gaplen1, i ); #if DEBUG reporterr( "after extention-x\n" ); showgaplen( gaplen1jcurr[j], 100 ); reporterr( "extention-x 2j???\n" ); #endif extendgaplencompactx( gaplen2jcurr[j], gaplen2+j, 0 ); #if 1 if( *ijppt < 0 ) { #if DEBUG reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j+*ijppt, i, seq1[0][i], j, seq2[0][j], *ijppt ); reporterr( "Inserting %d gaps to gaplen1 and copying gaplen2 (%c%d-%c%d)\n", -*ijppt-1, seq1[0][i], i, seq2[0][j], j ); #endif #if USEGAPLENMTX // addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 ); // addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], gaplen2, lgth2, -1, 0 ); copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // 0, lgth1 copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], lgth2, -1, 0, j, j ); // 0, lgth2 #endif #if USEGAPLENHALF copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j+*ijppt], lgth1, 0, -*ijppt-1, 0, 0, 1, 1 ); // 0, lgth1 copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j+*ijppt], lgth2, -1, 0, 0, 0, -*ijppt, -*ijppt ); // 0, lgth2 #endif // addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 ); // addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], gaplen2, lgth2, -1, 0 ); // reporterr( "copying gaplen1jcurr from gaplen1jbest, with a %d insertion\n", -*ijppt-1 ); copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // scope: i+*ijppt+1, i ? // reporterr( "copy end\n" ); // copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, j, j ); copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, 0, -*ijppt ); // half! ryouho zureteru } else if( *ijppt > 0 ) { #if DEBUG reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-*ijppt, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt ); reporterr( "Copying gaplen1 inserting %d gaps to gaplen2 (%c%d-%c%d)\n", *ijppt-1, seq1[0][i], i, seq2[0][j], j ); #endif #if USEGAPLENMTX // addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], gaplen2, lgth2, j, *ijppt-1 ); copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], lgth1, -1, 0, i, i ); // 0, lgth1 copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], lgth2, j, *ijppt-1, j, j ); // 0, lgth2 #endif #if USEGAPLENHALF copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-*ijppt][j-1], lgth1, -1, 0, 0, 0, *ijppt, *ijppt ); // 0, lgth1 copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-*ijppt][j-1], lgth2, 0, *ijppt-1, 0, 0, 1, 1 ); // 0, lgth2 #endif // addnewgaplen( gaplen1jcurr[j], gaplen1jbest[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jcurr[j], gaplen2jbest[j-1], gaplen2, lgth2, j, *ijppt-1 ); copygaplencompactx( gaplen1jcurr[j], gaplen1jbest[j-1], lgth1, -1, 0, i, i ); // copygaplenrestricted( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, j, *ijppt-1, j, j ); // j-*ijppt+1? // copygaplenrestricted_zurasu( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 0, j, j ); // 2jcurr->half, but 2jbest->full, imanotokoro copygaplencompactx( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 1 ); //ryouhou half } else #endif { #if DEBUG reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt ); reporterr( "Copying gaplen1 and gaplen2 (%c%d-%c%d)\n", seq1[0][i], i, seq2[0][j], j ); #endif #if USEGAPLENMTX // addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], gaplen2, lgth2, -1, 0 ); copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], lgth1, -1, 0, i, i ); // 0, lgth1 copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], lgth2, -1, 0, j, j ); // 0, lgth2 #endif #if USEGAPLENHALF copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j-1], lgth1, -1, 0, 0, 0, 1, 1 ); // 0, lgth1 copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j-1], lgth2, -1, 0, 0, 0, 1, 1 ); // 0, lgth2 #endif // addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 ); copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, 0, 1 ); // half } #if DEBUG reporterr( "at the end of j loop, gaplen1jcurr[%d] = \n", j ); showgaplen( gaplen1jcurr[j], 100 ); reporterr( "at the end of j loop, gaplen1prev[%d] = \n", j ); showgaplen( gaplen1jprev[j], 100 ); #endif #if 1 freegaplenpartly( gaplen1jcurr[j-1], i-3, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half! freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2?? freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2?? #else freegaplenpartly( gaplen1jprev[j-1], 0, i-2 ); // -1 dehanaku -2?? freegaplenpartly( gaplen1jcurr[j-1], 0, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half! freegaplenpartly( gaplen1jbestkamo[j-1], 0, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2?? freegaplenpartly( gaplen1jbest[j-1], 0, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2?? #endif #if USEGAPLENMTX // freegaplenpartly( gaplen1mtx[i-1][j-1], 0, i-2 ); // freegaplenpartly( gaplen2mtx[i-1][j-1], 0, j-2 ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } lastverticalw[i] = currentw[lgth2-1]; #if 1 // freegaplenpartly( gaplen1icurr[i-1], i-1, i-1 ); freegaplenpartly( gaplen1icurr[i-1], 0, lgth1-i ); freegaplenpartly( gaplen2icurr[i-1], 0, lgth2 ); // freegaplenpartly( gaplen1ibestkamo[i-1], i-1, i-1 ); freegaplenpartly( gaplen1ibestkamo[i-1], 0, lgth1-i ); freegaplenpartly( gaplen2ibestkamo[i-1], 0, lgth2 ); #endif if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } #if 0 fprintf( stderr, "i=%d, %15.5f \n", i, wm ); #endif //if( i == 2 ) exit( 1 ); } if( trywarp ) { // fprintf( stderr, "wm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i %f\n", kenzan, (double)kenzan /( icyc*jcyc ) ); double pairscore, nogappairscore; char **pseq; pseq = AllocateCharMtx( 2, strlen( seq1[0] ) + 1 ); pairscore = nogappairscore = 0.0; for( i=0; i 0.01 ) || PFACERROR ) // abs() -> fabs(), 2019/Jan/25 // if( abs( pairscore - wm +*impmatch ) > 0.01 ) #else if( abs( pairscore - wm +*impmatch ) > 0.01 ) #endif // if( abs( pairscore - wm +*impmatch ) > 0.01 ) { for( i=0; igroup1\n%s\n", seq1[i] ); for( j=0; jgroup2\n%s\n", seq2[j] ); exit( 1 ); } #else reporterr( "\n" ); #endif #if 0 // if( strlen( seq1[0] ) - lgth1 > 100 && icyc > 1 || strlen( seq2[0] ) - lgth2 > 100 & jcyc > 1 ) if( strstr( seq1[0], "LNDDAT" ) && icyc == 1 || strstr( seq2[0], "LNDDAT" ) && jcyc==1) { for( i=0; igroup1\n%s\n", seq1[i] ); for( j=0; jgroup2\n%s\n", seq2[j] ); exit( 1 ); } #endif return( wm ); } double D__align_ls( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp ) { int v1, v2; double val; #if 1 v1 = gapvariety( icyc, strlen( seq1[0] ), seq1 ); v2 = gapvariety( jcyc, strlen( seq2[0] ), seq2 ); #else v1 = icyc; v2 = jcyc; #endif // reporterr( "\nicyc,jcyc = %d,%d\n", icyc, jcyc ); reporterr( " v1,v2 = %d,%d\n", v1, v2 ); if( v1 >= v2 ) { val = D__align( n_dynamicmtx, seq1, seq2, eff1, eff2, icyc, jcyc, alloclen, constraint, impmatch, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp ); } else { val = D__align( n_dynamicmtx, seq2, seq1, eff2, eff1, jcyc, icyc, alloclen, constraint, impmatch, sgap2, sgap1, egap2, egap1, chudanpt, chudanref, chudanres, headgp, tailgp ); } return val; } double D__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, int *gapmap1, int *gapmap2 ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { fprintf( stderr, "Unexpected error. Please contact katoh@ifrec.osaka-u.ac.jp\n" ); exit( 1 ); } double D__align_variousdist( int **which, double ***matrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; register int i, j, c; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; // double fpenalty = (double)penalty; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; #endif #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS double ***cpmx1s; static TLS double ***cpmx2s; static TLS int ***intwork; static TLS double ***doublework; static TLS int orlgth1 = 0, orlgth2 = 0; #if USEGAPLENHALF Gaplen ****gaplen1half = NULL; // NULL ga iru to omou. Gaplen ****gaplen2half = NULL; // NULL ga iru to omou. #endif #if USEGAPLENMTX Gaplen ****gaplen1mtx = NULL; // NULL ga iru to omou. Gaplen ****gaplen2mtx = NULL; // NULL ga iru to omou. #endif static TLS Gaplen **gaplen1 = NULL; // NULL ga iru to omou. static TLS Gaplen **gaplen2 = NULL; // NULL ga iru to omou. static TLS Gaplen ***gaplen1jprev = NULL; static TLS Gaplen ***gaplen2jprev = NULL; static TLS Gaplen ***gaplen1jcurr = NULL; static TLS Gaplen ***gaplen2jcurr = NULL; static TLS Gaplen ***gaplen1icurr = NULL; static TLS Gaplen ***gaplen2icurr = NULL; static TLS Gaplen ***gaplen1jbestkamo = NULL; static TLS Gaplen ***gaplen2jbestkamo = NULL; static TLS Gaplen ***gaplen1ibestkamo = NULL; static TLS Gaplen ***gaplen2ibestkamo = NULL; static TLS Gaplen ***gaplen1jbest = NULL; static TLS Gaplen ***gaplen2jbest = NULL; double fpenalty = (double)penalty; double fpenalty_shift = (double)penalty_shift; static TLS Gaplen ****gaplens = NULL; Gaplen ***gaplentmp = NULL; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; int k; double pfac, pfactmp; int newgaplen; int **masklist1 = NULL, **masklist2 = NULL; int *nmask; // for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatCub( cpmx1s ); FreeFloatCub( cpmx2s ); FreeFloatCub( doublework ); FreeIntCub( intwork ); free( gaplens ); if( gaplen1ibestkamo ) FreeGaplenCub( gaplen1ibestkamo ); gaplen1ibestkamo = NULL; if( gaplen2ibestkamo ) FreeGaplenCub( gaplen2ibestkamo ); gaplen2ibestkamo = NULL; if( gaplen1icurr ) FreeGaplenCub( gaplen1icurr ); gaplen1icurr = NULL; if( gaplen2icurr ) FreeGaplenCub( gaplen2icurr ); gaplen2icurr = NULL; if( gaplen1jcurr ) FreeGaplenCub( gaplen1jcurr ); gaplen1jcurr = NULL; if( gaplen1jprev ) FreeGaplenCub( gaplen1jprev ); gaplen1jprev = NULL; if( gaplen2jcurr ) FreeGaplenCub( gaplen2jcurr ); gaplen2jcurr = NULL; if( gaplen2jprev ) FreeGaplenCub( gaplen2jprev ); gaplen2jprev = NULL; if( gaplen1jbestkamo ) FreeGaplenCub( gaplen1jbestkamo ); gaplen1jbestkamo = NULL; if( gaplen2jbestkamo ) FreeGaplenCub( gaplen2jbestkamo ); gaplen2jbestkamo = NULL; if( gaplen1jbest ) FreeGaplenCub( gaplen1jbest ); gaplen1jbest = NULL; if( gaplen2jbest ) FreeGaplenCub( gaplen2jbest ); gaplen2jbest = NULL; if( gaplen1 ) FreeGaplenMtx( gaplen1, 1 ); gaplen1 = NULL; if( gaplen2 ) FreeGaplenMtx( gaplen2, 1 ); gaplen2 = NULL; } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1s = AllocateFloatCub( maxdistclass, nalphabets, ll1+2 ); cpmx2s = AllocateFloatCub( maxdistclass, nalphabets, ll2+2 ); doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets+1 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; // reporterr( "Allocating gaplen1 and gaplen2\n" ); gaplen1 = (Gaplen ** )calloc( ll1+2, sizeof( Gaplen * ) ); gaplen1[ll1+1] = (Gaplen *)1; gaplen2 = (Gaplen ** )calloc( ll2+2, sizeof( Gaplen * ) ); gaplen2[ll2+1] = (Gaplen *)1; // reporterr( "Allocating gaplen*\n" ); gaplen1ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen2ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen1icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen2icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) ); gaplen1jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen1jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen1jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen1jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplen2jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) ); gaplens = calloc( sizeof( Gaplen ***), 12 ); gaplens[0] = gaplen1ibestkamo; gaplens[1] = gaplen2ibestkamo; gaplens[2] = gaplen1icurr; gaplens[3] = gaplen2icurr; gaplens[4] = gaplen1jbestkamo; gaplens[5] = gaplen2jbestkamo; gaplens[6] = gaplen1jbest; gaplens[7] = gaplen2jbest; gaplens[8] = gaplen1jcurr; gaplens[9] = gaplen2jcurr; gaplens[10] = gaplen1jprev; gaplens[11] = gaplen2jprev; // reporterr( "Allocation end\n" ); } { int ll1 = lgth1; int ll2 = lgth2; // reporterr( "Allocating gaplen*i\n" ); for(i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { double t = 0.0; for( i=0; i zurasu -> error? // duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1, lgth1, 0, 1 ); // duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 1 ); // half duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 0 ); // half // duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, lgth2 ); duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, 0 ); // copygaplenrestricted( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error? // copygaplenrestricted_zurasu( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, lgth2, 0, lgth2 ); // -> zurasu -> error? copygaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error? } // reporterr( "Duplicating gaplen*j*curr \n" ); // int nduplicated = 0; for( j=0; j tbfast.c // impossible if( localhom ) imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif for( j=1; j ", seq1[0][0], seq2[0][j], j, currentw[j] ); currentw[j] += fpenalty * pfac; // tekitou // reporterr( " %f\n", currentw[j] ); } for( i=1; ifull, 2j->half #if USEGAPLENMTX //reporterr( "#### FULL, i,j=%d,%d\n", i, j ); pfactmp = calcpfac( gaplen1mtx[i-1][j-1], gaplen2mtx[i-1][j-1], i, j, seq1[0], seq2[0], one ); #endif #if USEGAPLENHALF //reporterr( "#### HALF, i,j=%d/%d,%d/%d\n", i, lgth1, j, lgth2 ); // showgaplen( gaplen2half[i-1][j-1], lgth2-j ); pfactmp = calcpfac( gaplen1half[i-1][j-1], gaplen2half[i-1][j-1], 1, 1, seq1[0]+i, seq2[0]+j, zero ); #endif #if USEGAPLENMTX + USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(straight) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); PFACERROR = 1; exit( 1 ); } #endif //if( i==50 && j==135 ) exit( 1 ); // reporterr( "i,j=%d,%d, *prept = %f\n", i, j, *prept ); #if ALGZSTRAIGHT wm = *prept; // Machigai!! #else wm = *prept + fpenalty * pfac; #endif *ijppt = 0; #if DEBUG if( i == j ) { fprintf( stderr, "\n i=%d, j=%d %c, %c ", i, j, seq1[0][i], seq2[0][j] ); fprintf( stderr, "%5.0f, pfac for straight =%f\n", wm, pfac ); } #endif newgaplen = j-mpi-1; //if( i == 53 && j == 93 ) exit( 1 ); // pfac = calcpfac_gap_incomplete( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, i, j, seq1[0], seq2[0], 0 ); // i-1 pfac = calcpfac_gap_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, 1, j, seq1[0]+i-1, seq2[0], 0 ); // i-1 #if USEGAPLENMTX pfactmp = calcpfac_gap_incomplete( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], newgaplen, i, j, seq1[0], seq2[0], 1 ); #endif #if USEGAPLENHALF pfactmp = calcpfac_gap_incomplete( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], newgaplen, 1, j-mpi, seq1[0]+i-1, seq2[0]+mpi, 1 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(igap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); PFACERROR = 1; } #endif #if DEBUG reporterr( "%c-%c pfac for igap end incomplete = %f\n", seq1[0][i], seq2[0][j], pfac ); reporterr( "mi when igap end checking = %f\n", mi ); reporterr( "wm = %f, mi+fpenalty*pfac=%f\n", wm, mi+fpenalty*pfac ); #endif #if ALGZGAP if( (g=mi+*fgcp2pt*gf1va) > wm ) #else if( (g=mi+fpenalty*pfac) > wm ) #endif { wm = g; *ijppt = -( j - mpi ); #if DEBUG80 reporterr( "Selected as igap end! wm = %f, mi = %f\n", wm, mi ); fprintf( stderr, "Jump from %d-%d (%c-%c) to %d (%c-%c)!\n", i, j, seq1[0][i], seq2[0][j], mpi, seq1[0][i-1], seq2[0][mpi] ); #endif } #if 0 fprintf( stderr, "%5.0f->", wm ); #endif // if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) #if 0 // reporterr( "Checking %c, (current pos = %c)\n", seq2[0][j+1], seq2[0][j] ); sfac = 1.0; for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou { // reporterr( ".len = %d, .relend = %d\n", gaplen2[j+1][k].len, gaplen2[j+1][k].relend ); if( gl - 1 == gaplen2[j+1][k].relend ) { sfac -= gaplen2[j+1][k].freq; // reporterr( "Hit! sfac = %f\n", sfac ); } } sfac2 = 1.0; for( k=0; gaplen1[i+1]&&(gl=gaplen1[i+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou if( gaplen1[i+1][k].relend != -1 ) sfac2 -= gaplen1[i+1][k].freq; sfac *= sfac2; #else // sfac = 0.0; #endif #if ALGZGAP if( (g=*prept+*ogcp2pt*gf1vapre) >= mi ) #else // if( (g=*prept + fpenalty * sfac ) >= mi ) if( (g=*prept ) >= mi ) #endif { // mpibk = mpi; // mi = g - fpenalty * sfac; mi = g; mpi = j-1; #if DEBUG80 reporterr( "Selected as igap start! %c%d-%c%d, mi=%f, g=%f\n", seq1[0][i-1], i-1, seq2[0][mpi], mpi, mi, g ); #endif #if FREEFREQUENTLY // freegaplenpartly( gaplen1ibestkamo[i-1], 0, i-1 ); freegaplenpartly( gaplen2ibestkamo[i-1], j-3, j-2 ); #endif // freegaplenpartly( gaplen1jprev[mpibk], 0, lgth2 ); // full // freegaplenpartly( gaplen2jprev[mpibk], 0, lgth2-mpibk ); // half // if( gaplen1jprev[mpibk] ) FreeGaplenMtx( gaplen1jprev[mpibk], 0 ); // gaplen1jprev[mpibk] = NULL; // if( gaplen2jprev[mpibk] ) FreeGaplenMtx( gaplen2jprev[mpibk], 0 ); // gaplen2jprev[mpibk] = NULL; // addnewgaplen( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 ); // copygaplenrestricted( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // i-1, i copygaplencompactx( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, 1, i ); // half // copygaplenrestricted( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // mpi, j copygaplencompactx( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); //half } // reporterr( "g=%f, *prept=%f, mi=%f\n", g, *prept, mi ); #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if ALGZGAP pfac = 0.0; // CHUUI! #else // pfac = calcpfac_gapex( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], i, j, j-mpi, seq1[0], seq2[0], 1 ); // i-1 pfac = calcpfac_gapex_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], 1, j, j-mpi, seq1[0]+i, seq2[0], 1 ); // 1ibest->half, 2ibest->full #if USEGAPLENMTX pfactmp = calcpfac_gapex( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], i, j, j-mpi, seq1[0], seq2[0], 1 ); #endif #if USEGAPLENHALF pfactmp = calcpfac_gapex( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], 1, j-mpi, j-mpi, seq1[0]+i, seq2[0]+mpi, 1 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(igapex) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); PFACERROR = 1; } #endif #if DEBUG reporterr( "%c-%c, igap extension check, pfac = %f\n\n", '=', seq2[0][j], pfac ); #endif #endif // reporterr( "mi = %f -> ", mi ); mi += fpenalty * pfac; // reporterr( "mi = %f\n", mi ); // reporterr( "using %d-%d, %d, %d\n", *mpjpt, j-1, i, j ); newgaplen = i-*mpjpt-1; // pfac = calcpfac_gap_incomplete( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, j, i, seq2[0], seq1[0], 0 ); // j-1 deha??? pfac = calcpfac_gap_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, 1, i, seq2[0]+j-1, seq1[0], 1 ); // 2jbestkamo->half, 1jbestkamo->full #if USEGAPLENMTX pfactmp = calcpfac_gap_incomplete( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], newgaplen, j, i, seq2[0], seq1[0], 1 ); #endif #if USEGAPLENHALF pfactmp = calcpfac_gap_incomplete( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], newgaplen, 1, i-*mpjpt, seq2[0]+j-1, seq1[0]+*mpjpt, 1 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(jgap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j ); // exit( 1 ); PFACERROR = 1; } #endif #if ALGZGAP if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) #else if( (g=*mjpt + fpenalty*pfac) > wm ) #endif { wm = g; *ijppt = +( i - *mpjpt ); #if FREEFREQUENTLY freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 ); // freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); #endif #if DEBUG reporterr( "Selected as jgap end!, pfac = %f\n", pfac ); fprintf( stderr, "Jump from %d (%c) to %d (%c)!\n", j, seq1[0][j], *mpjpt, seq1[0][*mpjpt] ); #endif // addnewgaplen( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], gaplen2, lgth2, -1, 0 ); copygaplencompactx( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], lgth1, -1, 0, i, i );// *mpjpt, i // copygaplenrestricted( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, j, j ); // j-1, j copygaplencompactx( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, 1, 1 ); // half! } // extendgaplenpartly( gaplen1jbest[j-1], gaplen1, i, i ); // tmptmptmp // extendgaplenpartly( gaplen2jbest[j-1], gaplen2, 0, 0 ); // tmptmptmp #if 0 sfac = 1.0; for( l=0; gaplen1[i+1]&&(gl=gaplen1[i+1][l].len)!=-1; l++ ) // ososugi! hash ni atode henkou if( gl - 1 == gaplen1[i+1][l].relend ) sfac -= gaplen1[i+1][l].freq; sfac2 = 1.0; for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou if( gaplen2[j+1][k].relend != -1 ) sfac2 -= gaplen2[j+1][k].freq; sfac *= sfac2; #else // sfac = 0.0; #endif #if DEBUG reporterr( " (jgap start check i=%d) -> *prept=%f, *mjpt=%f\n", i, seq1[0][i], seq2[0][j], *prept, *mjpt ); #endif #if ALGZGAP if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt ) #else // if( (g=*prept + fpenalty * sfac ) >= *mjpt ) if( (g=*prept ) >= *mjpt ) #endif { // *mjpt = g - fpenalty * sfac; *mjpt = g; *mpjpt = i-1; #if DEBUG reporterr( "Selected as jgap start!\n" ); #endif #if FREEFREQUENTLY freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 ); // freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); #endif // addnewgaplen( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 ); // reporterr( "copying gaplen1jbestkamo[%d-1] from galpen1jprev, j=%d, i=%d\n", j, j, i ); copygaplencompactx( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // *mpjpt, i // copygaplenrestricted( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // j-1, j // copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); // half! // reporterr( "copying gaplen2jbestkamo[%d-1] from galpen2jprev\n", j ); copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2-j, -1, 0, 1, 1 ); // ryouhou half! // if( j==2 && i==1 ) exit( 1 ); } // extendgaplenpartly( gaplen1ibestkamo[i-1], gaplen1, 0, 0 ); // tmptmptmp // extendgaplenpartly( gaplen2ibestkamo[i-1], gaplen2, j, j ); // tmptmptmp // extendgaplenpartly( gaplen1jbestkamo[j-1], gaplen1, i, i ); // tmptmptmp // extendgaplenpartly( gaplen2jbestkamo[j-1], gaplen2, 0, 0 ); // tmptmptmp #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if ALGZGAP pfac = 0.0; #else // pfactmp = calcpfac_gapex( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 ); // j-1 pfactmp = calcpfac_gapex_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], 1, i, i-*mpjpt, seq2[0]+j, seq1[0], 0 ); // 2jbestkamo->half, 1jbestkamo->full #if USEGAPLENMTX pfac = calcpfac_gapex( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 ); #endif #if USEGAPLENHALF pfac = calcpfac_gapex( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], 1, i-*mpjpt, i-*mpjpt, seq2[0]+j, seq1[0]+*mpjpt, 0 ); #endif #if USEGAPLENMTX || USEGAPLENHALF if( pfac != pfactmp ) { reporterr( "(jgapex) pfac=%f, but pfactmp=%f (i,j=%d,%d) diff=%f\n", pfac, pfactmp, i, j, pfac-pfactmp ); // exit( 1 ); PFACERROR = 1; } #endif pfac = pfactmp; #if DEBUG reporterr( "%c-%c, jgap extension check (j=%d), pfac = %f\n", seq1[0][i], '=', j, pfac ); #endif #endif m[j] += fpenalty * pfac; if( trywarp ) { #if USE_PENALTY_EX if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai #else if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai #endif { // fprintf( stderr, "WARP!!\n" ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif curm = *curpt + wm; if( *wmrecords1pt > *wmrecordspt ) { *wmrecordspt = *wmrecords1pt; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); } if( curm > *wmrecordspt ) { *wmrecordspt = curm; *warpipt = i; *warpjpt = j; } wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #if DEBUG reporterr( "extention-x 1j???, before extention-x, j=%d\n", j ); showgaplen( gaplen1jcurr[j], 100 ); #endif extendgaplencompactx( gaplen1jcurr[j], gaplen1, i ); #if DEBUG reporterr( "after extention-x\n" ); showgaplen( gaplen1jcurr[j], 100 ); reporterr( "extention-x 2j???\n" ); #endif extendgaplencompactx( gaplen2jcurr[j], gaplen2+j, 0 ); #if 1 if( *ijppt < 0 ) { #if DEBUG reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j+*ijppt, i, seq1[0][i], j, seq2[0][j], *ijppt ); reporterr( "Inserting %d gaps to gaplen1 and copying gaplen2 (%c%d-%c%d)\n", -*ijppt-1, seq1[0][i], i, seq2[0][j], j ); #endif #if USEGAPLENMTX // addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 ); // addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], gaplen2, lgth2, -1, 0 ); copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // 0, lgth1 copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], lgth2, -1, 0, j, j ); // 0, lgth2 #endif #if USEGAPLENHALF copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j+*ijppt], lgth1, 0, -*ijppt-1, 0, 0, 1, 1 ); // 0, lgth1 copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j+*ijppt], lgth2, -1, 0, 0, 0, -*ijppt, -*ijppt ); // 0, lgth2 #endif // addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 ); // addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], gaplen2, lgth2, -1, 0 ); // reporterr( "copying gaplen1jcurr from gaplen1jbest, with a %d insertion\n", -*ijppt-1 ); copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // scope: i+*ijppt+1, i ? // reporterr( "copy end\n" ); // copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, j, j ); copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, 0, -*ijppt ); // half! ryouho zureteru } else if( *ijppt > 0 ) { #if DEBUG reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-*ijppt, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt ); reporterr( "Copying gaplen1 inserting %d gaps to gaplen2 (%c%d-%c%d)\n", *ijppt-1, seq1[0][i], i, seq2[0][j], j ); #endif #if USEGAPLENMTX // addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], gaplen2, lgth2, j, *ijppt-1 ); copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], lgth1, -1, 0, i, i ); // 0, lgth1 copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], lgth2, j, *ijppt-1, j, j ); // 0, lgth2 #endif #if USEGAPLENHALF copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-*ijppt][j-1], lgth1, -1, 0, 0, 0, *ijppt, *ijppt ); // 0, lgth1 copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-*ijppt][j-1], lgth2, 0, *ijppt-1, 0, 0, 1, 1 ); // 0, lgth2 #endif // addnewgaplen( gaplen1jcurr[j], gaplen1jbest[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jcurr[j], gaplen2jbest[j-1], gaplen2, lgth2, j, *ijppt-1 ); copygaplencompactx( gaplen1jcurr[j], gaplen1jbest[j-1], lgth1, -1, 0, i, i ); // copygaplenrestricted( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, j, *ijppt-1, j, j ); // j-*ijppt+1? // copygaplenrestricted_zurasu( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 0, j, j ); // 2jcurr->half, but 2jbest->full, imanotokoro copygaplencompactx( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 1 ); //ryouhou half } else #endif { #if DEBUG reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt ); reporterr( "Copying gaplen1 and gaplen2 (%c%d-%c%d)\n", seq1[0][i], i, seq2[0][j], j ); #endif #if USEGAPLENMTX // addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], gaplen2, lgth2, -1, 0 ); copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], lgth1, -1, 0, i, i ); // 0, lgth1 copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], lgth2, -1, 0, j, j ); // 0, lgth2 #endif #if USEGAPLENHALF copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j-1], lgth1, -1, 0, 0, 0, 1, 1 ); // 0, lgth1 copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j-1], lgth2, -1, 0, 0, 0, 1, 1 ); // 0, lgth2 #endif // addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 ); // addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 ); copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, 0, 1 ); // half } #if DEBUG reporterr( "at the end of j loop, gaplen1jcurr[%d] = \n", j ); showgaplen( gaplen1jcurr[j], 100 ); reporterr( "at the end of j loop, gaplen1prev[%d] = \n", j ); showgaplen( gaplen1jprev[j], 100 ); #endif #if 1 freegaplenpartly( gaplen1jcurr[j-1], i-3, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half! freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2?? freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2?? #else freegaplenpartly( gaplen1jprev[j-1], 0, i-2 ); // -1 dehanaku -2?? freegaplenpartly( gaplen1jcurr[j-1], 0, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half! freegaplenpartly( gaplen1jbestkamo[j-1], 0, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2?? freegaplenpartly( gaplen1jbest[j-1], 0, i-2 ); // -1 dehanaku -2?? // freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2?? #endif #if USEGAPLENMTX // freegaplenpartly( gaplen1mtx[i-1][j-1], 0, i-2 ); // freegaplenpartly( gaplen2mtx[i-1][j-1], 0, j-2 ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; } lastverticalw[i] = currentw[lgth2-1]; #if 1 // freegaplenpartly( gaplen1icurr[i-1], i-1, i-1 ); freegaplenpartly( gaplen1icurr[i-1], 0, lgth1-i ); freegaplenpartly( gaplen2icurr[i-1], 0, lgth2 ); // freegaplenpartly( gaplen1ibestkamo[i-1], i-1, i-1 ); freegaplenpartly( gaplen1ibestkamo[i-1], 0, lgth1-i ); freegaplenpartly( gaplen2ibestkamo[i-1], 0, lgth2 ); #endif if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } #if 0 fprintf( stderr, "i=%d, %15.5f \n", i, wm ); #endif //if( i == 2 ) exit( 1 ); } if( trywarp ) { // fprintf( stderr, "wm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i %f\n", kenzan, (double)kenzan /( icyc*jcyc ) ); double pairscore, nogappairscore, diff; char **pseq; pseq = AllocateCharMtx( 2, strlen( seq1[0] ) + 1 ); pairscore = nogappairscore = 0.0; #if 1 for( i=0; i 0.01 ) || PFACERROR ) // if( abs( pairscore - wm +*impmatch ) > 0.01 ) #else if( abs( pairscore - wm +*impmatch ) > 0.01 ) #endif // if( abs( pairscore - wm +*impmatch ) > 0.01 ) { for( i=0; igroup1\n%s\n", seq1[i] ); for( j=0; jgroup2\n%s\n", seq2[j] ); exit( 1 ); } #else reporterr( "\n" ); #endif #if 0 // if( strlen( seq1[0] ) - lgth1 > 100 && icyc > 1 || strlen( seq2[0] ) - lgth2 > 100 & jcyc > 1 ) if( strstr( seq1[0], "LNDDAT" ) && icyc == 1 || strstr( seq2[0], "LNDDAT" ) && jcyc==1) { for( i=0; igroup1\n%s\n", seq1[i] ); for( j=0; jgroup2\n%s\n", seq2[j] ); exit( 1 ); } #endif return( wm ); } mafft-7.505-without-extensions/core/constants.c0000644000175000017500000013047014224501721021166 0ustar nileshnilesh#include "mltaln.h" #include "miyata.h" #include "miyata5.h" #include "DNA.h" #include "JTT.c" #include "blosum.c" #define DEBUG 0 #define TEST 0 #define NORMALIZE1 1 static int shishagonyuu( double in ) { int out; if ( in > 0.0 ) out = ( (int)( in + 0.5 ) ); else if( in == 0.0 ) out = ( 0 ); else if( in < 0.0 ) out = ( (int)( in - 0.5 ) ); else out = 0; return( out ); } static void nscore( int *amino_n, int **n_dis ) { int i; for( i=0; i<26; i++ ) { // reporterr( "i=%d (%c), n_dis[%d][%d] = %d\n", i, amino[i], i, amino_n['n'], n_dis[i][amino_n['n']] ); n_dis[i][amino_n['n']] = shishagonyuu( (double)0.25 * n_dis[i][i] ); // reporterr( "-> i=%d, n_dis[%d][%d] = %d\n", i, i, amino_n['n'], n_dis[i][amino_n['n']] ); n_dis[amino_n['n']][i] = n_dis[i][amino_n['n']]; } // n_dis[amino_n['n']][amino_n['n']] = shishagonyuu( (double)0.25 * 0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); n_dis[amino_n['n']][amino_n['n']] = shishagonyuu( (double)0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); // 2017/Jan/2 #if 0 // Ato de kakunin for( i=0; i<26; i++ ) { n_dis[i][amino_n['-']] = shishagonyuu( (double)0.25 * n_dis[i][i] ); n_dis[amino_n['-']][i] = n_dis[i][amino_n['-']]; } // n_dis[amino_n['-']][amino_n['-']] = shishagonyuu( (double)0.25 * 0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); // DAME! #endif } static void ambiguousscore( int *amino_n, int **n_dis ) { int i; for( i=0; i<26; i++ ) { n_dis[i][amino_n['r']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] ) ); n_dis[i][amino_n['y']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['k']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['m']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] ) ); n_dis[i][amino_n['s']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['c']][i] ) ); n_dis[i][amino_n['w']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['b']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['d']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['h']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) ); n_dis[i][amino_n['v']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] ) ); n_dis[amino_n['r']][i] = n_dis[i][amino_n['r']]; n_dis[amino_n['y']][i] = n_dis[i][amino_n['y']]; n_dis[amino_n['k']][i] = n_dis[i][amino_n['k']]; n_dis[amino_n['m']][i] = n_dis[i][amino_n['m']]; n_dis[amino_n['s']][i] = n_dis[i][amino_n['s']]; n_dis[amino_n['w']][i] = n_dis[i][amino_n['w']]; n_dis[amino_n['b']][i] = n_dis[i][amino_n['b']]; n_dis[amino_n['d']][i] = n_dis[i][amino_n['d']]; n_dis[amino_n['h']][i] = n_dis[i][amino_n['h']]; n_dis[amino_n['v']][i] = n_dis[i][amino_n['v']]; } i = amino_n['r']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] ) ); i = amino_n['y']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['k']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['m']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] ) ); i = amino_n['s']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['c']][amino_n['c']] ) ); i = amino_n['w']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['b']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['d']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['h']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) ); i = amino_n['v']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] ) ); } static void calcfreq_nuc( int nseq, char **seq, double *datafreq ) { int i, j, l; int aan; double total; for( i=0; i<4; i++ ) datafreq[i] = 0.0; total = 0.0; for( i=0; i= 0 && aan < 4 ) { datafreq[aan] += 1.0; total += 1.0; } } } total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i]; for( i=0; i<4; i++ ) datafreq[i] /= (double)total; for( i=0; i<4; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001; total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i]; // reporterr( "total = %f\n", total ); for( i=0; i<4; i++ ) datafreq[i] /= (double)total; #if 0 reporterr( "\ndatafreq = " ); for( i=0; i<4; i++ ) reporterr( "%10.5f ", datafreq[i] ); reporterr( "\n" ); exit( 1 ); #endif } static void calcfreq( int nseq, char **seq, double *datafreq ) { int i, j, l; int aan; double total; for( i=0; i= 0 && aan < nscoredalphabets && seq[i][j] != '-' ) { datafreq[aan] += 1.0; total += 1.0; } } } total = 0.0; for( i=0; i= 0 && aan < nscoredalphabets && seq[i][j] != '-' ) { datafreq[aan] += 1.0; total += 1.0; } } } total = 0.0; for( i=0; i= 10 ) trywarp = 0; else trywarp = 1; if( dorp == 'd' ) /* DNA */ { int k, m; double average; double **pamx = AllocateDoubleMtx( 11,11 ); double **pam1 = AllocateDoubleMtx( 4, 4 ); double *freq = AllocateDoubleVec( 4 ); nalphabets = 26; nscoredalphabets = 10; charsize = 0x80; n_dis = AllocateIntMtx( nalphabets, nalphabets ); n_disLN = AllocateDoubleMtx( nalphabets, nalphabets ); scoremtx = -1; if( RNAppenalty == NOTSPECIFIED ) RNAppenalty = DEFAULTRNAGOP_N; if( RNAppenalty_ex == NOTSPECIFIED ) RNAppenalty_ex = DEFAULTRNAGEP_N; if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_N; if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty; if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_N; if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_N; if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_N; if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_N; if( RNApthr == NOTSPECIFIED ) RNApthr = DEFAULTRNATHR_N; if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN; if( kimuraR == NOTSPECIFIED ) kimuraR = 2; RNApenalty = (int)( 3 * 600.0 / 1000.0 * RNAppenalty + 0.5 ); RNApenalty_ex = (int)( 3 * 600.0 / 1000.0 * RNAppenalty_ex + 0.5 ); // reporterr( "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N ); // reporterr( "RNAppenalty = %d\n", RNAppenalty ); // reporterr( "RNApenalty = %d\n", RNApenalty ); RNAthr = (int)( 3 * 600.0 / 1000.0 * RNApthr + 0.5 ); penalty = (int)( 3 * 600.0 / 1000.0 * ppenalty + 0.5); penalty_dist = (int)( 3 * 600.0 / 1000.0 * ppenalty_dist + 0.5); penalty_shift = (int)( penalty_shift_factor * penalty ); penalty_OP = (int)( 3 * 600.0 / 1000.0 * ppenalty_OP + 0.5); penalty_ex = (int)( 3 * 600.0 / 1000.0 * ppenalty_ex + 0.5); penalty_EX = (int)( 3 * 600.0 / 1000.0 * ppenalty_EX + 0.5); offset = (int)( 1 * 600.0 / 1000.0 * poffset + 0.5); offsetFFT = (int)( 1 * 600.0 / 1000.0 * (-0) + 0.5); offsetLN = (int)( 1 * 600.0 / 1000.0 * 100 + 0.5); penaltyLN = (int)( 3 * 600.0 / 1000.0 * -2000 + 0.5); penalty_exLN = (int)( 3 * 600.0 / 1000.0 * -100 + 0.5); if( trywarp ) sprintf( shiftmodel, "%4.2f (%4.2f)", -(double)penalty_shift/1800, -(double)penalty_shift/600 ); else sprintf( shiftmodel, "noshift" ); sprintf( modelname, "%s%d (%d), %4.2f (%4.2f), %4.2f (%4.2f), %s", rnakozo?"RNA":"DNA", pamN, kimuraR, -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003, shiftmodel ); for( i=0; i<26; i++ ) amino[i] = locaminon[i]; for( i=0; i<0x80; i++ ) amino_n[i] = -1; for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i; if( fmodel == 1 ) { calcfreq_nuc( nseq, seq, freq ); reporterr( "a, freq[0] = %f\n", freq[0] ); reporterr( "g, freq[1] = %f\n", freq[1] ); reporterr( "c, freq[2] = %f\n", freq[2] ); reporterr( "t, freq[3] = %f\n", freq[3] ); } else { freq[0] = 0.25; freq[1] = 0.25; freq[2] = 0.25; freq[3] = 0.25; } if( kimuraR == 9999 ) { for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] = (double)locn_disn[i][j]; #if NORMALIZE1 average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) average += pamx[i][j]; average /= 16.0; if( disp ) reporterr( "average = %f\n", average ); for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= average; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] *= 600.0 / average; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= offset; #endif } else { #if 0 double f = 0.99; double s = (double)kimuraR / ( 2 + kimuraR ) * 0.01; double v = (double)1 / ( 2 + kimuraR ) * 0.01; pam1[0][0] = f; pam1[0][1] = s; pam1[0][2] = v; pam1[0][3] = v; pam1[1][0] = s; pam1[1][1] = f; pam1[1][2] = v; pam1[1][3] = v; pam1[2][0] = v; pam1[2][1] = v; pam1[2][2] = f; pam1[2][3] = s; pam1[3][0] = v; pam1[3][1] = v; pam1[3][2] = s; pam1[3][3] = f; #else generatenuc1pam( pam1, kimuraR, freq ); #endif reporterr( "generating a scoring matrix for nucleotide (dist=%d) ... ", pamN ); if( disp ) { reporterr( " TPM \n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+#6.10f", pam1[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } MtxuntDouble( pamx, 4 ); for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 4 ); if( disp ) { reporterr( " TPM \n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+#6.10f", pamx[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] /= freq[j]; // pamx[i][j] /= 0.25; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) { if( pamx[i][j] == 0.0 ) { reporterr( "WARNING: pamx[i][j] = 0.0 ?\n" ); pamx[i][j] = 0.00001; /* by J. Thompson */ } pamx[i][j] = log10( pamx[i][j] ) * 1000.0; } if( disp ) { reporterr( " after log\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+10.6f ", pamx[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } // ????? average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) average += pamx[i][j] * freq[i] * freq[j]; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= average; average = 0.0; for( i=0; i<4; i++ ) average += pamx[i][i] * 1.0 / 4.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] *= 600.0 / average; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= offset; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] = shishagonyuu( pamx[i][j] ); if( disp ) { reporterr( " after shishagonyuu\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+#6.10f", pamx[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } reporterr( "done\n" ); } for( i=0; i<5; i++ ) { pamx[4][i] = pamx[3][i]; pamx[i][4] = pamx[i][3]; } for( i=5; i<10; i++ ) for( j=5; j<10; j++ ) { pamx[i][j] = pamx[i-5][j-5]; } if( disp ) { reporterr( " before dis\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+#6.10f", pamx[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } if( disp ) { reporterr( " score matrix \n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+#6.10f", pamx[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); exit( 1 ); } for( i=0; i<26; i++ ) amino[i] = locaminon[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpn[i]; for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0; for( i=0; i<10; i++ ) for( j=0; j<10; j++ ) n_dis[i][j] = shishagonyuu( pamx[i][j] ); ambiguousscore( amino_n, n_dis ); if( nwildcard ) nscore( amino_n, n_dis ); if( disp ) { reporterr( " score matrix \n" ); for( i=0; i<26; i++ ) { for( j=0; j<26; j++ ) reporterr( "%+6d", n_dis[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); reporterr( "penalty = %d, penalty_ex = %d\n", penalty, penalty_ex ); //exit( 1 ); } // RIBOSUM #if 1 average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) average += ribosum4[i][j] * freq[i] * freq[j]; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] -= average; average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) for( k=0; k<4; k++ ) for( m=0; m<4; m++ ) { // if( i%4==0&&j%4==3 || i%4==3&&j%4==0 || i%4==1&&j%4==2 || i%4==2&&j%4==1 || i%4==1&&j%4==3 || i%4==3&&j%4==1 ) // if( k%4==0&&m%4==3 || k%4==3&&m%4==0 || k%4==1&&m%4==2 || k%4==2&&m%4==1 || k%4==1&&m%4==3 || k%4==3&&m%4==1 ) average += ribosum16[i*4+j][k*4+m] * freq[i] * freq[j] * freq[k] * freq[m]; } for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] -= average; average = 0.0; for( i=0; i<4; i++ ) average += ribosum4[i][i] * freq[i]; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] *= 600.0 / average; average = 0.0; average += ribosum16[0*4+3][0*4+3] * freq[0] * freq[3]; // AU average += ribosum16[3*4+0][3*4+0] * freq[3] * freq[0]; // UA average += ribosum16[1*4+2][1*4+2] * freq[1] * freq[2]; // CG average += ribosum16[2*4+1][2*4+1] * freq[2] * freq[1]; // GC average += ribosum16[1*4+3][1*4+3] * freq[1] * freq[3]; // GU average += ribosum16[3*4+1][3*4+1] * freq[3] * freq[1]; // UG for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] *= 600.0 / average; #if 1 for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] -= offset; /* extending gap cost ?????*/ for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] -= offset; /* extending gap cost ?????*/ #endif for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] = shishagonyuu( ribosum4[i][j] ); for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] = shishagonyuu( ribosum16[i][j] ); if( disp ) { reporterr( "ribosum after shishagonyuu\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) reporterr( "%+#6.10f", ribosum4[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); reporterr( "ribosum16 after shishagonyuu\n" ); for( i=0; i<16; i++ ) { for( j=0; j<16; j++ ) reporterr( "%+#7.0f", ribosum16[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } // reporterr( "done\n" ); #if 1 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = ribosum4[i][j]; // loop-loop // for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+4][j+4] = ribosum16[i][j]; // stem5-stem5 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+20][j+20] = ribosum16[i][j]; // stem5-stem5 #else // do not use ribosum for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop #endif if( disp ) { reporterr( "ribosumdis\n" ); for( i=0; i<37; i++ ) { for( j=0; j<37; j++ ) reporterr( "%+5d", ribosumdis[i][j] ); reporterr( "\n" ); } reporterr( "\n" ); } // reporterr( "done\n" ); #endif FreeDoubleMtx( pam1 ); FreeDoubleMtx( pamx ); free( freq ); } else if( dorp == 'p' && scoremtx == 1 && nblosum == -2 ) /* extended */ { double *freq; double *freq1; double *datafreq; double average; // double tmp; double **n_distmp; nalphabets = 0x100; nscoredalphabets = 0x100; charsize = 0x100; reporterr( "nalphabets = %d\n", nalphabets ); n_dis = AllocateIntMtx( nalphabets, nalphabets ); n_disLN = AllocateDoubleMtx( nalphabets, nalphabets ); n_distmp = AllocateDoubleMtx( nalphabets, nalphabets ); datafreq = AllocateDoubleVec( nalphabets ); freq = AllocateDoubleVec( nalphabets ); if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B; if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty; if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B; if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B; if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B; if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B; if( pamN == NOTSPECIFIED ) pamN = 0; if( kimuraR == NOTSPECIFIED ) kimuraR = 1; penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 ); penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 ); penalty_shift = (int)( penalty_shift_factor * penalty ); penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 ); penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 ); penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 ); offset = (int)( 600.0 / 1000.0 * poffset + 0.5 ); offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5); offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5); penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5); penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5); extendedmtx( n_distmp, freq, amino, amino_grp ); if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 ); else sprintf( shiftmodel, "noshift" ); sprintf( modelname, "Extended, %4.2f, %+4.2f, %+4.2f, %s", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel ); #if 0 for( i=0; i<26; i++ ) amino[i] = locaminod[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i]; for( i=0; i<0x80; i++ ) amino_n[i] = 0; for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i; #endif for( i=0; i<0x100; i++ )amino_n[i] = -1; for( i=0; i #include #include #include int main( int ac, char **av ) { unsigned int c; FILE *fp; unsigned char buf[10000]; unsigned char *bpt; char format; if( ac == 1 || ( ac == 2 && av[1][0] == '-' ) ) { fp = stdin; } else if( ac == 2 ) { fp = fopen( av[1], "rb" ); if( fp == NULL ) { fprintf( stderr, "%s: Cannot open %s.\n", av[0], av[1] ); exit( 1 ); } } else { fprintf( stderr, "Usage %s input > output\n", av[0] ); exit( 1 ); } #if mingw setmode( fileno( fp ), O_BINARY ); #endif #if 1 format = 'f'; c = fgetc( fp ); if( c == 'C' ) format = 'c'; ungetc( c, fp ); #else fgets( (char *)buf, 999, fp ); rewind( fp ); if( !strncmp( (char *)buf, "CLUSTAL", 7 ) ) format = 'c'; else format = 'f'; #endif if( format == 'c' ) // clustal { int ln = 0; int titlelen = -1; while( 1 ) { fgets( (char *)buf, 999, fp ); if( feof( fp ) ) break; if( ln == 0 ) { ln = 1; printf( "%s", buf ); continue; } bpt = (unsigned char *)strchr( (char *)buf, ' ' ); if( bpt == NULL ) { printf( "\n" ); continue; } if( titlelen == -1 ) { while( *++bpt == ' ' ) ; titlelen = bpt - buf; } else { bpt = buf + titlelen; } *(bpt-1) = 0; printf( "%s ", buf ); while( (c=(unsigned int)*bpt++)!='\n' ) { if( c == '-' ) printf( "-- " ); else if( c == '=' ) printf( "== " ); else if( c == '*' ) printf( "** " ); else if( c == ' ' ) printf( " " ); else printf( "%02x ", c ); } printf( "\n" ); } } else // fasta { while( 1 ) { c = fgetc( fp ); if( c == EOF ) break; else if( c == '\n' ) printf( "\n" ); else if( c == '\r' ) printf( "\r" ); // nai else if( c == '-' ) printf( "-- " ); else if( c == '=' ) printf( "== " ); // nai else if( c == ' ' ) printf( " " ); // nai else if( c == '>' || c == '<' ) { printf( "%c", c ); while( 1 ) { c = fgetc( fp ); printf( "%c", c ); if( c == '\n' ) break; } } else printf( "%02x ", c ); } } fclose( fp ); return( 0 ); } mafft-7.505-without-extensions/core/share.h0000644000175000017500000000050314224501721020252 0ustar nileshnilesh#if 0 #include #include #include #endif #define IMA_YONDERU 'x' /* iranai */ #define IMA_KAITERU 0 /* iranai */ #define KAKIOWATTA 'w' #define YOMIOWATTA 'r' #define OSHIMAI 'd' #define ISRUNNING 0 #define SEMAPHORE 1 #define STATUS 2 #define IPC_ALLOC 0100000 mafft-7.505-without-extensions/core/pairash.c0000644000175000017500000007362414224501721020610 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 static int usecache; static char *whereispairalign; static char *odir; static char *pdir; static double scale; static int *alreadyoutput; static int equivthreshold; static int equivwinsize; static int equivshortestlen; static void cutpath( char *s ) { char *pos; pos = s + strlen( s ); while( --pos >= s ) { if( *pos == '/' ) break; } strcpy( s, pos+1 ); } static char getchainid( char *s ) { s += strlen( s ) - 2; if( isspace( s[0] ) && isalnum( s[1] ) ) return( s[1] ); else return( 'A' ); } static void extractfirstword( char *s ) { while( *s ) { if( isspace( *s ) ) break; s++; } *s = 0; } static char *strip( char *s ) { char *v; while( *s ) { if( !isspace( *s ) ) break; s++; } v = s; s += strlen( v ) - 1; while( s>=v ) { if( !isspace( *s ) ) { *(s+1) = 0; break; } s--; } return( v ); } #if 0 static void makeequivdouble( double *d, char *c ) { while( *c ) { *d++ = (double)( *c++ - '0' ); } } static void maskequiv( double *d, int n ) { int halfwin; int ok; int i, j; halfwin = (int)( equivwinsize / 2 ); for( i=0; i 0.0 ) { len += 1; d[i] = 0.0; } else { d[i] = 0.0; if( len >= equivshortestlen ) { len++; while( len-- ) d[i-len] = dbk[i-len]; } len = 0; } } if( len >= equivshortestlen ) { len++; while( len-- ) d[n-len] = dbk[n-len]; } free( dbk ); } #endif static void makeequivdouble_tmalign( double *d, char *c, int n ) { double tmpd; double *dbk; int tmpi; char s; dbk = d; while( *c ) { if( ( s=*c++ ) == ':' ) tmpi = 9; else if( s == '.' ) tmpi = 4; else tmpi = 0; // tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0; // if( tmpd < 0.0 ) tmpd = 0.0; tmpd = (double)( tmpi ); // *d++ = (int)tmpd; *d++ = tmpd; } d = dbk; // maskequiv( d, n ); } static void makeequivdouble_threshold( double *d, char *c, int n ) { double tmpd; double *dbk; int tmpi; dbk = d; while( *c ) { tmpi = (int)( *c++ - '0' ); tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0; if( tmpd < 0.0 ) tmpd = 0.0; // *d++ = (int)tmpd; *d++ = tmpd; } d = dbk; maskequiv( d, n ); } static void readtmalign( FILE *fp, char *seq1, char *seq2, double *equiv ) { static char *line = NULL; static char *equivchar = NULL; int n; if( equivchar == NULL ) { equivchar = calloc( nlenmax * 2 + 1, sizeof( char ) ); line = calloc( nlenmax * 2 + 1, sizeof( char ) ); } seq1[0] = 0; seq2[0] = 0; equivchar[0] = 0; // system( "vi _tmalignout" ); while( 1 ) { if( feof( fp ) ) { fprintf( stderr, "Error in TMalign\n" ); exit( 1 ); } fgets( line, 999, fp ); // fprintf( stdout, "line = :%s:\n", line ); if( !strncmp( line+5, "denotes the residue pairs", 20 ) ) break; } fgets( line, nlenmax*2, fp ); strcat( seq1, strip( line ) ); fgets( line, nlenmax*2, fp ); strcat( equivchar, strip( line ) ); fgets( line, nlenmax*2, fp ); strcat( seq2, strip( line ) ); #if 0 printf( "seq1=%s\n", seq1 ); printf( "seq2=%s\n", seq2 ); printf( "equi=%s\n", equivchar ); exit( 1 ); #endif n = strlen( seq1 ); makeequivdouble_tmalign( equiv, equivchar, n ); #if 0 fprintf( stdout, "\n" ); for( i=0; i_dum", fname1, fname1 ); res = system( com ); sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 ); res = system( com ); #endif sprintf( com, "\"%s/TMalign\" %s.pdb %s.pdb > _tmalignout 2>_dum", whereispairalign, fname1, fname2 ); fprintf( stderr, "command = %s\n", com ); res = system( com ); if( res ) { fprintf( stderr, "Error in TMalign\n" ); exit( 1 ); } } else { fprintf( stderr, "Cache is not supported!\n" ); exit( 1 ); } fp = fopen( "_tmalignout", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _tmalignout\n" ); exit( 1 ); } readtmalign( fp, *mseq1, *mseq2, equiv ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } static double callrash( int mem1, int mem2, char **mseq1, char **mseq2, double *equiv, char *fname1, char *fname2, int alloclen ) { FILE *fp; // int res; static char com[10000]; double value; char cachedir[10000]; char cachefile[10000]; int runnow; char pairid[1000]; sprintf( pairid, "%d-%d", mem1, mem2 ); // fprintf( stderr, "pairid = %s\n", pairid ); if( usecache ) { // sprintf( cachedir, "tmp" ); sprintf( cachedir, "%s", pdir ); sprintf( cachefile, "%s/%s.%s.rash", cachedir, fname1, fname2 ); runnow = 0; fp = fopen( cachefile, "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open %s\n", cachefile ); exit( 1 ); } else { fclose( fp ); } } else { fprintf( stderr, "Not supported!\n" ); exit( 1 ); } #if 0 if( 0 ) { #if 0 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname1, fname1 ); res = system( com ); sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 ); res = system( com ); #endif #if 0 // 091127, pdp nai! sprintf( com, "env PATH=%s PDP_ASH.pl --qf %s.pdb --qc %s --tf %s.pdb --tc %s > _rashout 2>_dum", whereispairalign, fname1, chain1, fname2, chain2 ); #else sprintf( com, "\"%s/rash\" --qf %s.pdb --qc %s --tf %s.pdb --tc %s --of %s.pdbpair > %s.rashout 2>%s.dum", whereispairalign, fname1, chain1, fname2, chain2, pairid, pairid, pairid ); #endif fprintf( stderr, "command = %s\n", com ); res = system( com ); if( res ) { fprintf( stderr, "Error in structural alignment\n" ); exit( 1 ); } sprintf( com, "awk '/^REMARK/,/^TER/' %s.pdbpair > %s.%s-x-%s.%s.pdbpair", pairid, fname1, chain1, fname2, chain2 ); res = system( com ); sprintf( com, "awk '/^REMARK/,/^TER/{next} 1' %s.pdbpair > %s.%s-x-%s.%s.pdbpair", pairid, fname2, chain2, fname1, chain1 ); res = system( com ); sprintf( com, "rm %s.pdbpair", pairid ); res = system( com ); } else #endif { fprintf( stderr, "Use cache! cachefile = %s\n", cachefile ); sprintf( com, "cat %s > %s.rashout", cachefile, pairid ); system( com ); } if( usecache && runnow ) { fprintf( stderr, "Okashii! usechache=%d, runnow=%d\n", usecache, runnow ); exit( 1 ); } sprintf( com, "%s.rashout", pairid ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s\n", com ); exit( 1 ); } readrash( fp, *mseq1, *mseq2, equiv ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } static void preparetmalign( FILE *fp, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt ) { int i, res; char *dumseq; char line[1000]; char fname[1000]; char command[1000]; int linenum, istr, nstr; FILE *checkfp; char *sline; int use[10000]; linenum = 0; nstr = 0; while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); use[linenum] = 1; if( sline[0] == '#' || strlen( sline ) < 2 ) { use[linenum] = 0; linenum++; continue; } extractfirstword( sline ); checkfp = fopen( sline, "r" ); if( checkfp == NULL ) { fprintf( stderr, "Cannot open %s.\n", sline ); exit( 1 ); } #if 0 fgets( linec, 999, checkfp ); if( strncmp( "HEADER ", linec, 7 ) ) { fprintf( stderr, "Check the format of %s.\n", sline ); exit( 1 ); } #endif if( checkcbeta( checkfp ) ) { fprintf( stderr, "%s has no C-beta atoms.\n", sline ); exit( 1 ); } else nstr++; fclose( checkfp ); linenum++; } njob = nstr; fprintf( stderr, "nstr = %d\n", nstr ); *strfiles = AllocateCharMtx( nstr, 1000 ); *chainids = AllocateCharMtx( nstr, 2 ); rewind( fp ); istr = 0; linenum = 0; while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); if( use[linenum++] ) { (*chainids)[istr][0] = getchainid( sline ); (*chainids)[istr][1] = 0; extractfirstword( sline ); sprintf( fname, "%s", sline ); cutpath( fname ); sprintf( command, "cp %s %s.pdb", sline, fname ); system( command ); sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname ); res = system( command ); if( res ) { fprintf( stderr, "error: Install clean.pl\n" ); exit( 1 ); } strcpy( (*strfiles)[istr++], fname ); } } *seqpt = AllocateCharMtx( njob, nlenmax*2+1 ); *mseq1pt = AllocateCharMtx( njob, 0 ); *mseq2pt = AllocateCharMtx( njob, 0 ); *equivpt = AllocateDoubleVec( nlenmax*2+1 ); *alloclenpt = nlenmax*2; dumseq = AllocateCharVec( nlenmax*2+1 ); alreadyoutput = AllocateIntVec( njob ); for( i=0; i%d_%s-%s\n%s\n", i+1, (*strfiles)[i], (*chainids)[i], (*seqpt)[i] ); alreadyoutput[i] = 1; } } static void prepareash( FILE *fp, char *inputfile, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt ) { int i, res; char *dumseq; char line[1000]; char fname[1000]; char command[1000]; int linenum, istr, nstr; // FILE *checkfp; char *sline; int use[10000]; linenum = 0; nstr = 0; fprintf( stderr, "inputfile = %s\n", inputfile ); while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); use[linenum] = 1; if( sline[0] == '#' || strlen( sline ) < 2 ) { use[linenum] = 0; linenum++; continue; } extractfirstword( sline ); #if 0 checkfp = fopen( sline, "r" ); if( checkfp == NULL ) { fprintf( stderr, "Cannot open %s.\n", sline ); exit( 1 ); } if( checkcbeta( checkfp ) ) { fprintf( stderr, "%s has no C-beta atoms.\n", sline ); exit( 1 ); } else nstr++; fclose( checkfp ); #else nstr++; #endif linenum++; } njob = nstr; fprintf( stderr, "nstr = %d\n", nstr ); *strfiles = AllocateCharMtx( nstr, 1000 ); *chainids = AllocateCharMtx( nstr, 2 ); rewind( fp ); istr = 0; linenum = 0; while( 1 ) { fgets( line, 999, fp ); if( feof( fp ) ) break; sline = strip( line ); fprintf( stderr, "sline = %s\n", sline ); if( use[linenum++] ) { (*chainids)[istr][0] = getchainid( sline ); (*chainids)[istr][1] = 0; extractfirstword( sline ); sprintf( fname, "%s", sline ); cutpath( fname ); #if 0 sprintf( command, "cp %s %s.pdb", sline, fname ); system( command ); sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname ); res = system( command ); if( res ) { fprintf( stderr, "error: Install clean.pl\n" ); exit( 1 ); } #endif strcpy( (*strfiles)[istr++], fname ); } } *seqpt = AllocateCharMtx( njob, nlenmax*2+1 ); *mseq1pt = AllocateCharMtx( njob, 0 ); *mseq2pt = AllocateCharMtx( njob, 0 ); *equivpt = AllocateDoubleVec( nlenmax*2+1 ); *alloclenpt = nlenmax*2; dumseq = AllocateCharVec( nlenmax*2+1 ); alreadyoutput = AllocateIntVec( njob ); for( i=0; i%d_%s\n%s\n", i+1, (*strfiles)[i], (*seqpt)[i] ); alreadyoutput[i] = 1; } } void arguments( int argc, char *argv[] ) { int c; usecache = 0; scale = 1.0; equivthreshold = 5; equivwinsize = 5; equivshortestlen = 1; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'R'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; odir = ""; pdir = ""; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'o': odir = *++argv; fprintf( stderr, "odir = %s\n", odir ); --argc; goto nextoption; case 'p': pdir = *++argv; fprintf( stderr, "pdir = %s\n", pdir ); --argc; goto nextoption; case 't': equivthreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': equivwinsize = myatoi( *++argv ); --argc; goto nextoption; case 'l': equivshortestlen = myatoi( *++argv ); --argc; goto nextoption; case 's': scale = atof( *++argv ); --argc; goto nextoption; case 'c': usecache = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; #if 0 case 'O': fftNoAnchStop = 1; break; #endif case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; #if 0 case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 'L': alg = 'L'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'K': alg = 'K'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; #endif default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *equiv, double *effarr, char **strfiles, char **chainids, int alloclen ) { int i, j, ilim; int clus1, clus2; int off1, off2; double pscore = 0.0; // by D.Mathog static char *indication1, *indication2; FILE *hat2p, *hat3p; static double **distancemtx; static double *effarr1 = NULL; static double *effarr2 = NULL; char *pt; char *hat2file = "hat2"; LocalHom **localhomtable, *tmpptr; static char **pair; // int intdum; double bunbo; char **checkseq; localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i%d_%s\n%s\n", i+1, strfiles[i], seq[i] ); strcpy( checkseq[i], seq[i] ); } else { gappick0( seq[i], mseq1[0] ); fprintf( stderr, "checking seq%d\n", i ); // fprintf( stderr, " seq=%s\n", seq[i] ); // fprintf( stderr, "checkseq=%s\n", checkseq[i] ); if( strcmp( checkseq[i], seq[i] ) ) { fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" ); fprintf( stderr, "i=%d\n", i ); fprintf( stderr, " seq=%s\n", seq[i] ); fprintf( stderr, "checkseq=%s\n", checkseq[i] ); exit( 1 ); } } if( alreadyoutput[j] == 0 ) { alreadyoutput[j] = 1; gappick0( seq[j], mseq2[0] ); fprintf( stdout, ">%d_%s-%s\n%s\n", j+1, strfiles[j], chainids[j], seq[j] ); strcpy( checkseq[j], seq[j] ); } else { gappick0( seq[j], mseq2[0] ); fprintf( stderr, "checking seq%d\n", j ); if( strcmp( checkseq[j], seq[j] ) ) { fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" ); fprintf( stderr, "j=%d\n", j ); fprintf( stderr, " seq=%s\n", seq[j] ); fprintf( stderr, "checkseq=%s\n", checkseq[j] ); exit( 1 ); } } #endif } } for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d k\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); } } } fclose( hat3p ); #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double *eff; static double *equiv; char **strfiles; char **chainids; int i; FILE *infp; char c; int alloclen; arguments( argc, argv ); if( equivthreshold < 1 || 9 < equivthreshold ) { fprintf( stderr, "-t n, n must be 1..9\n" ); exit( 1 ); } if( ( equivwinsize + 1 ) % 2 != 0 ) { fprintf( stderr, "equivwinsize = %d\n", equivwinsize ); fprintf( stderr, "It must be an odd number.\n" ); exit( 1 ); } if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; nlenmax = 10000; // tekitou if( alg == 'R' ) prepareash( infp, inputfile, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen ); else if( alg == 'T' ) preparetmalign( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen ); fclose( infp ); name = AllocateCharMtx( njob, B+1 ); aseq = AllocateCharMtx( njob, nlenmax*2+1 ); bseq = AllocateCharMtx( njob, nlenmax*2+1 ); eff = AllocateDoubleVec( njob ); for( i=0; i M ) { fprintf( stderr, "The number of structures must be < %d\n", M ); fprintf( stderr, "Please try sequence-based methods for such large data.\n" ); exit( 1 ); } #if 0 readData( infp, name, nlen, seq ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illegal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i #include #include static void fgetstilspace( unsigned char *b, FILE *fp ) { unsigned int c; int alreadyread = 0; while( 1 ) { c = fgetc( fp ); if( c == ' ' && alreadyread == 0 ) continue; alreadyread = 1; if( c == ' ' || c == '\n' || c == '\r' || c == EOF ) { ungetc( c, fp ); break; } *b++ = (unsigned char)c; } *b = 0; } int main( int ac, char **av ) { unsigned int c; unsigned char buf[100]; FILE *fp; int res; if( ac == 1 || ( ac == 2 && av[1][0] == '-' ) ) { fp = stdin; } else if( ac == 2 ) { fp = fopen( av[1], "rb" ); if( fp == NULL ) { fprintf( stderr, "%s: Cannot open %s.\n", av[0], av[1] ); exit( 1 ); } } else { fprintf( stderr, "Usage %s input > output\n", av[0] ); exit( 1 ); } { while( 1 ) { c = fgetc( fp ); if( c == EOF ) break; else if( c == '\n' ) printf( "\n" ); else if( c == '\r' ) printf( "\r" ); #if 0 else if( c == '-' ) { printf( "-" ); fprintf( stderr, "Warning: '-' will be removed\n" ); } else if( c == '=' ) { printf( "=" ); fprintf( stderr, "Warning: '=' will be removed\n" ); } // else if( c == ' ' ) printf( " " ); // nai #endif else if( c == '>' || c == '<' ) { printf( "%c", c ); while( 1 ) { c = fgetc( fp ); printf( "%c", c ); if( c == '\n' ) break; } } else { buf[0] = (unsigned char)c; fgetstilspace( buf+1, fp ); //fprintf( stderr, "buf=%s\n", buf ); if( strchr( (const char *)buf, '-' ) ) // added cast, 2019/Jan/25 { printf( "-" ); continue; } //res = sscanf( buf, " %x ", &c ); res = sscanf( (const char *)buf, " %x ", &c ); // added cast, 2019/Jan/25 if( res == EOF ) { //fprintf( stderr, "%s was ignored.\n", buf ); } else if( res != 1 ) { fprintf( stderr, "Error in reading %s\n", buf ); exit( 1 ); } else if( c <= 0 || c > 0xff ) { fprintf( stderr, "Out of range: 0x%x\n", c ); //exit( 1 ); } else if( c == 0x0d || c == 0x0a ) { fprintf( stderr, "Warning: skipped 0x%x (CR or LF) that cannot be used in mafft --text.\n", c ); //printf( "%c", c ); } else if( c == 0x20 || c == 0x3E || c == 0x3C || c == 0x3D ) { fprintf( stderr, "Warning: skipped 0x%x (%c) that cannot be used in mafft --text.\n", c, c ); //printf( "%c", c ); } else if( c == 0x2D ) { fprintf( stderr, "Warning: put 0x%x (%c) that is interpreted as gap in mafft --text.\n", c, c ); printf( "%c", c ); } else printf( "%c", c ); } } } fclose( fp ); return( 0 ); } mafft-7.505-without-extensions/core/dndfast7.c0000644000175000017500000001630314224501721020662 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define TEST 0 int howmanyx( char *s ) { int val = 0; if( scoremtx == -1 ) { do { if( !strchr( "atgcuATGCU", *s ) ) val++; } while( *++s ); } else { do { if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; } while( *++s ); } return( val ); } void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; disopt = 0; divpairscore = 0; swopt = ""; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': disopt = 1; break; case 'A': swopt = "-A"; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { int ktuple; int i, j; FILE *hat2p; FILE *hat3p; FILE *infp; char **seq = NULL; // by D.Mathog char **seq1; char **name; char **name1; static int nlen1[M]; double **mtx; double **mtx2; static int nlen[M]; static char b[B]; double max; char com[1000]; int opt[M]; int res; char *home; char *fastapath; char queryfile[B]; char datafile[B]; char fastafile[B]; char hat2file[B]; int pid = (int)getpid(); LocalHom **localhomtable, *tmpptr; #if 0 home = getenv( "HOME" ); #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ home = NULL; #endif fastapath = getenv( "FASTA_4_MAFFT" ); if( !fastapath ) fastapath = "fasta34"; #if DEBUG if( home ) fprintf( stderr, "home = %s\n", home ); #endif if( !home ) home = ""; sprintf( queryfile, "%s/tmp/query-%d", home, pid ); sprintf( datafile, "%s/tmp/data-%d", home, pid ); sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); sprintf( hat2file, "hat2-%d", pid ); arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else dorp = NOTSPECIFIED; getnumlen( infp ); #endif if( dorp == 'd' ) { scoremtx = -1; pamN = NOTSPECIFIED; } else { nblosum = 62; scoremtx = 1; } constants( njob, seq ); rewind( infp ); name = AllocateCharMtx( njob, B+1 ); name1 = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); seq1 = AllocateCharMtx( 2, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i %s", fastapath, swopt, M, M, M, queryfile, datafile, ktuple, fastafile ); else sprintf( com, "%s %s -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", fastapath, swopt, M, M, M, queryfile, datafile, ktuple, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); if( scoremtx == -1 ) res = ReadFasta34m10_nuc( hat2p, mtx[i], i, name1, localhomtable[i] ); else res = ReadFasta34m10( hat2p, mtx[i], i, name1, localhomtable[i] ); fclose( hat2p ); if( res < njob - i ) { fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); exit( 1 ); } if( i == 0 ) for( j=0; j %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); } } } for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } fclose( hat3p ); #endif sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile ); system( com ); #if 0 sprintf( com, ALNDIR "/supgsdl < %s", hat2file ); res = system( com ); if( res ) ErrorExit( "error in spgsdl" ); #endif sprintf( com, "mv %s hat2", hat2file ); res = system( com ); if( res ) ErrorExit( "error in mv" ); SHOWVERSION; exit( 0 ); } mafft-7.505-without-extensions/core/treeOperation.c0000644000175000017500000004442014224501721021771 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define EF_THREEWAY 1.0 #define MAXBW 1.0 #define MINBW 0.01 #define MINLEN 0.001 #if DEBUG Node *stopol_g; #endif void checkMinusLength( int nseq, double **len ) { int i, j; for( i=0; i= 0 ) { free( tmpintvec[numintvec] ); } free( tmpintvec ); numintvec = 0; #endif } void treeCnv( Node *stopol, int locnseq, int ***topol, double **len, double **bw ) { static int **tmpintvec = NULL; static int numintvec = 0; if( stopol == NULL ) { while( --numintvec >= 0 ) { free( tmpintvec[numintvec] ); } free( tmpintvec ); numintvec = 0; tmpintvec = NULL; return; } int i; NodeInCub parent; int *count; int ccount; int rep; int tmpint; count = AllocateIntVec( 2 * locnseq ); /* oome */ if( !count ) ErrorExit( "Cannot allocate count.\n" ); checkMinusLength( locnseq, len ); /* uwagaki */ stopolInit( locnseq * 2, stopol ); for( i=0; ilength[0] ); } for( i=0, count=0; i<3; i++ ) { #if DEBUG fprintf( stderr, "ob->tmpChildren[%d] = %d\n", i, ob->tmpChildren[i] ); #endif if( oppositeNode != ob->children[i] ) dir_ch[count++] = i; else dir_pa = i; } #if DEBUG fprintf( stderr, "\n" ); #endif if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, oppositeNode-stopol_g ); #endif ErrorExit( "Invalid call\n" ); } tmpvalue0 = syntheticLength( ob->children[dir_ch[0]], ob ); tmpvalue1 = syntheticLength( ob->children[dir_ch[1]], ob ); #if DEBUG fprintf( stderr, "tmpvalue0 = %f\n", tmpvalue0 ); fprintf( stderr, "tmpvalue1 = %f\n", tmpvalue1 ); #endif if( tmpvalue0 ) tmpvalue0 = 1.0 / tmpvalue0; else nanflag = 1; if( tmpvalue1 ) tmpvalue1 = 1.0 / tmpvalue1; else nanflag = 1; if( nanflag ) value = 0.0; else { value = tmpvalue0 + tmpvalue1; value = 1.0 / value; } value += ob->length[dir_pa]; #if DEBUG fprintf( stderr, "value = %f\n", value ); #endif return( value ); } double calcW( Node *ob, Node *op ) { int i, count; int dir_ch[3]; int dir_pa = -10; // by katoh double a, b, c, f, s; double value; if( isLeaf( *ob ) ) return( 1.0 ); for( i=0, count=0; i<3; i++ ) { if( op != ob->children[i] ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) ErrorExit( "Invalid call of calcW\n" ); #if DEBUG fprintf( stderr, "In calcW\n" ); fprintf( stderr, "ob = %d\n", ob - stopol_g ); fprintf( stderr, "op = %d\n", op - stopol_g ); fprintf( stderr, "ob->children[c1] = %d\n", ob->children[dir_ch[0]] - stopol_g ); fprintf( stderr, "ob->children[c2] = %d\n", ob->children[dir_ch[1]] - stopol_g ); fprintf( stderr, "ob->children[pa] = %d\n", ob->children[dir_pa] - stopol_g ); fprintf( stderr, "\n" ); #endif a = syntheticLength( ob->children[dir_ch[0]], ob ); b = syntheticLength( ob->children[dir_ch[1]], ob ); c = syntheticLength( ob->children[dir_pa], ob ); #if DEBUG fprintf( stderr, "a = %f\n", a ); fprintf( stderr, "b = %f\n", b ); fprintf( stderr, "c = %f\n", c ); #endif if( !c ) return( MAXBW ); if ( !a || !b ) return( MINBW ); /* ? */ f = EF_THREEWAY; s = ( b*c + c*a + a*b ); value = a*b*(c+a)*(c+b) / ( c*(a+b) * f * s ); value = sqrt( value ); return( value ); } void calcBranchWeight( double **bw, int locnseq, Node *stopol, int ***topol, double **len ) { NodeInCub parent; int i; int rep; Node *topNode, *btmNode; double topW, btmW; for( i=locnseq; ichildren[i] != op ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, op-stopol_g ); #endif ErrorExit( "Incorrect call of distFromABranch_rec" ); } for( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ ) { result[n] += ob->length[dir_ch[0]]; } distFromABranch_rec( result, ob->children[dir_ch[0]], ob ); for( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ ) { result[n] += ob->length[dir_ch[1]]; } distFromABranch_rec( result, ob->children[dir_ch[1]], ob ); } void distFromABranch( int nseq, double *result, Node *stopol, int ***topol, double **len, int step, int LorR ) { Node *topNode, *btmNode; int i; if( nseq == 2 ) { result[0] = len[0][0]; result[1] = len[0][1]; // reporterr( "result[0] = %f\n", result[0] ); // reporterr( "result[1] = %f\n", result[1] ); return; } if( step == nseq - 2 ) { topNode = stopol[nseq-2].children[0]; btmNode = stopol + nseq-3; #if DEBUG fprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\n", topNode - stopol_g, btmNode-stopol_g ); #endif } else { for( i=0; i<3; i++ ) { if( stopol[step].members[i][0] == topol[step][LorR][0] ) break; } if( i== 3 ) ErrorExit( "Incorrect call of distFromABranch." ); btmNode = stopol[step].children[i]; topNode = stopol+step; } for( i=0; ichildren[i] != op ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, op-stopol_g ); #endif ErrorExit( "Incorrect call of weightFromABranch_rec" ); } for( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ ) result[n] *= *ob->weightptr[dir_ch[0]]; weightFromABranch_rec( result, ob->children[dir_ch[0]], ob ); for( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ ) result[n] *= *ob->weightptr[dir_ch[1]]; weightFromABranch_rec( result, ob->children[dir_ch[1]], ob ); } void weightFromABranch( int nseq, double *result, Node *stopol, int ***topol, int step, int LorR ) { Node *topNode, *btmNode; int i; if( step == nseq - 2 ) { topNode = stopol[nseq-2].children[0]; btmNode = stopol + nseq-3; #if DEBUG fprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\n", topNode - stopol_g, btmNode-stopol_g ); #endif } else { for( i=0; i<3; i++ ) { if( stopol[step].members[i][0] == topol[step][LorR][0] ) break; } if( i== 3 ) ErrorExit( "Incorrect call of weightFromABranch." ); btmNode = stopol[step].children[i]; topNode = stopol+step; } for( i=0; ichildren[i] != op ) dir_ch[count++] = i; else dir_pa = i; } if( count != 2 ) { #if DEBUG fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, op-stopol_g ); #endif ErrorExit( "Incorrect call of weightFromABranch_rec" ); } // fprintf( stderr, "\n" ); sumweight = 0.0; count = 0; lastkozo = -1; for( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ ) { // fprintf( stderr, "member1! n=%d\n", n ); sumweight += seqweight[n]; if( kozoari[n] ) { count++; lastkozo = n; } } for( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ ) { // fprintf( stderr, "member2! n=%d\n", n ); sumweight += seqweight[n]; if( kozoari[n] ) { count++; lastkozo = n; } } // fprintf( stderr, "count = %d\n", count ); if( count == 1 ) strweight[lastkozo] = sumweight; else if( count > 1 ) { assignstrweight_rec( strweight, ob->children[dir_ch[0]], ob, kozoari, seqweight ); assignstrweight_rec( strweight, ob->children[dir_ch[1]], ob, kozoari, seqweight ); } } void assignstrweight( int nseq, double *strweight, Node *stopol, int ***topol, int step, int LorR, char *kozoari, double *seqweight ) { Node *topNode, *btmNode; int i; if( step == nseq - 2 ) { topNode = stopol[nseq-2].children[0]; btmNode = stopol + nseq-3; #if DEBUG fprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\n", topNode - stopol_g, btmNode-stopol_g ); #endif } else { for( i=0; i<3; i++ ) { if( stopol[step].members[i][0] == topol[step][LorR][0] ) break; } if( i== 3 ) ErrorExit( "Incorrect call of weightFromABranch." ); btmNode = stopol[step].children[i]; topNode = stopol+step; } for( i=0; i-1; i++ ) fprintf( stderr, "%3d ", topol[step][0][i] ); fprintf( stderr, "\n" ); for( i=0; topol[step][1][i]>-1; i++ ) fprintf( stderr, "%3d ", topol[step][1][i] ); fprintf( stderr, "\n" ); for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( mseq1[0] == '-' || mseq2[0] == '-' ) { for( k=0; k 3 ) code = 36; else code = code1; #else code1 = amino_n[(unsigned char)*seqpt]; code2 = amino_n[(unsigned char)*seqrpt]; if( code1 > 3 ) { code = 36; } else if( code2 > 3 ) { code = code1; } else if( *dirpt == '5' ) { code = 4 + code2 * 4 + code1; } else if( *dirpt == '3' ) { code = 20 + code2 * 4 + code1; } else // if( *dirpt == 'o' ) // nai { code = code1; } #endif // fprintf( stderr, "%c -> code=%d toa=%d, tog=%d, toc=%d, tot=%d, ton=%d, efee=%f\n", *seqpt, code%4, ribosumdis[code][4+0], ribosumdis[code][4+1], ribosumdis[code][4+2], ribosumdis[code][20+3], ribosumdis[code][36], feff ); seqpt++; seqrpt++; dirpt++; (*cpmxptpt++)[code] += feff; } } } void mseqcat( char **seq1, char **seq2, double **eff, double *effarr1, double *effarr2, char name1[M][B], char name2[M][B], int clus1, int clus2 ) { int i, j; for( i=0; i 0.0 ) peff_kozo[m] += peff[m]; } } else //iranai { for( m=0; m-1; j++ ) if( s1 == topol[step][branch][0] ) value++; for( j=0; (s2=topol[i][1][j])>-1; j++ ) if( s2 == topol[step][branch][0] ) value++; } return( value ); } void BranchLeafNode( int nseq, int ***topol, int *node, int step, int branch ) { int i, j, k, s; for( i=0; i-1; j++ ) node[s]++; for( k=0; k-1; j++ ) node[s]++; } void RootLeafNode( int nseq, int ***topol, int *node ) { int i, j, k, s; for( i=0; i-1; j++ ) node[s]++; } void nodeFromABranch( int nseq, int *result, int **pairwisenode, int ***topol, double **len, int step, int num ) { int i, s, count; int *innergroup; int *outergroup1; #if 0 int outergroup2[nseq]; int table[nseq]; #else static int *outergroup2 = NULL; static int *table = NULL; if( outergroup2 == NULL ) { outergroup2 = AllocateIntVec( nseq ); table = AllocateIntVec( nseq ); } #endif innergroup = topol[step][num]; outergroup1 = topol[step][!num]; for( i=0; i-1; i++ ) table[s] = 0; for( i=0; (s=outergroup1[i])>-1; i++ ) table[s] = 0; for( i=0, count=0; i-1; i++ ) { result[s] = pairwisenode[s][outergroup1[0]] + pairwisenode[s][outergroup2[0]] - pairwisenode[outergroup1[0]][outergroup2[0]] - 1; result[s] /= 2; } for( i=0; (s=outergroup1[i])>-1; i++ ) { result[s] = pairwisenode[s][outergroup2[0]] + pairwisenode[s][innergroup[0]] - pairwisenode[innergroup[0]][outergroup2[0]] + 1; result[s] /= 2; } for( i=0; (s=outergroup2[i])>-1; i++ ) { result[s] = pairwisenode[s][outergroup1[0]] + pairwisenode[s][innergroup[0]] - pairwisenode[innergroup[0]][outergroup1[0]] + 1; result[s] /= 2; } #if 0 for( i=0; i-1; i++ ) { pair[r1] = 1; memlist1[k++] = r1; } memlist1[k] = -1; for( i=0, k=0; i 10 && jm > 10 ) exit( 1 ); #endif } } void makeEffMtx( int nseq, double **mtx, double *vec ) { int i, j; for( i=0; i start2, end1 <-> end2 i2 = m1; if( localhom[t1][i2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[t1]+i2; } } else { swaplist[k1] = 0; for( k2=0; (m2=memlist2[k2])!=-1; k2++ ) { t1 = targetmap[m1]; i2 = m2; if( localhom[t1][i2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[t1]+i2; } } } return( 0 ); } int msshrinklocalhom_fast_half( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink ) { int m1, k1, m2, k2; for( k1=0; (m1=memlist1[k1])!=-1; k1++ ) { for( k2=0; (m2=memlist2[k2])!=-1; k2++ ) { if( m1 < m2 ) { if( localhom[m1][m2-m1].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[m1]+m2-m1; } else { if( localhom[m2][m1-m2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[m2]+m1-m2; } } } return( 0 ); } int msshrinklocalhom_fast( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink ) { int m1, k1, m2, k2; for( k1=0; (m1=memlist1[k1])!=-1; k1++ ) { for( k2=0; (m2=memlist2[k2])!=-1; k2++ ) { if( localhom[m1][m2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[m1]+m2; } } return( 0 ); } int fastshrinklocalhom_one( int *mem1, int *mem2, int norg, LocalHom **localhom, LocalHom ***localhomshrink ) { int k1, k2; int *intpt1, *intpt2; for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ ) { for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ ) { if( *intpt2 != norg ) { fprintf( stderr, "ERROR! *intpt2 = %d\n", *intpt2 ); exit( 1 ); } if( localhom[*intpt1][0].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[*intpt1]; } } return( 0 ); } int fastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink ) { int k1, k2; int *intpt1, *intpt2; for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ ) { for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ ) { if( localhom[*intpt1][*intpt2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[*intpt1]+*intpt2; // if( localhomshrink[k1][k2] != NULL ) // printf( "ori localhomshrink[%d][%d].opt = %f\n", k1, k2, localhomshrink[k1][k2]->opt ); } } return( 0 ); } int fastshrinklocalhom_half_seed( int *mem1, int *mem2, int nseed, int *posinlsh1, int *posinlsh2, LocalHom **localhom, LocalHom ***localhomshrink ) { int k1, k2, sk1, sk2; int *intpt1, *intpt2; for( intpt1=mem1, k1=0, sk1=0; *intpt1!=-1; intpt1++, k1++ ) { if( *intpt1>=nseed ) posinlsh1[k1] = -1; else posinlsh1[k1] = sk1++; } for( intpt2=mem2, k2=0, sk2=0; *intpt2!=-1; intpt2++, k2++ ) { if( *intpt2>=nseed ) posinlsh2[k2] = -1; else posinlsh2[k2] = sk2++; } for( intpt1=mem1, sk1=0; *intpt1!=-1; intpt1++ ) { if( *intpt1>=nseed ) continue; for( intpt2=mem2, sk2=0; *intpt2!=-1; intpt2++ ) { if( *intpt2>=nseed ) continue; if( *intpt1 < *intpt2 ) { if( localhom[*intpt1][*intpt2-*intpt1].opt == -1 ) localhomshrink[sk1][sk2] = NULL; else localhomshrink[sk1][sk2] = localhom[*intpt1]+*intpt2-*intpt1; } else { if( localhom[*intpt2][*intpt1-*intpt2].opt == -1 ) localhomshrink[sk1][sk2] = NULL; else localhomshrink[sk1][sk2] = localhom[*intpt2]+*intpt1-*intpt2; } // if( localhomshrink[k1][k2] != NULL ) // printf( "ori localhomshrink[%d][%d].opt = %f, .importance=%f\n", k1, k2, localhomshrink[k1][k2]->opt, localhomshrink[k1][k2]->importance ); sk2++; } sk1++; } return( 0 ); } int fastshrinklocalhom_half( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink ) { int k1, k2; int *intpt1, *intpt2; for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ ) { for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ ) { if( *intpt1 < *intpt2 ) { if( localhom[*intpt1][*intpt2-*intpt1].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[*intpt1]+*intpt2-*intpt1; } else { if( localhom[*intpt2][*intpt1-*intpt2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[*intpt2]+*intpt1-*intpt2; } // if( localhomshrink[k1][k2] != NULL ) // printf( "ori localhomshrink[%d][%d].opt = %f, .importance=%f\n", k1, k2, localhomshrink[k1][k2]->opt, localhomshrink[k1][k2]->importance ); } } return( 0 ); } int fastshrinklocalhom_target( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink, char *swaplist, int *targetmap ) { int k1, k2; int *intpt1, *intpt2; int t1, i2; for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ ) { if( targetmap[*intpt1] == -1 ) { swaplist[k1] = 1; // swaplist[k1] = 0; // DAME!!! for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ ) { if( targetmap[*intpt2] == -1 ) { localhomshrink[k1][k2] = NULL; continue; } t1 = targetmap[*intpt2]; // end1<->end2, start1<->start2 i2 = *intpt1; if( localhom[t1][i2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[t1]+i2; // if( localhomshrink[k1][k2] != NULL ) // printf( "localhomshrink[%d][%d].opt = %f\n", k1, k2, localhomshrink[k1][k2]->opt ); // else // printf( "localhomshrink[%d][%d] = NULL\n", k1, k2 ); } } else { swaplist[k1] = 0; for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ ) { t1 = targetmap[*intpt1]; i2 = *intpt2; if( localhom[t1][i2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[t1]+i2; // if( localhomshrink[k1][k2] != NULL ) // printf( "localhomshrink[%d][%d].opt = %f\n", k1, k2, localhomshrink[k1][k2]->opt ); // else // printf( "localhomshrink[%d][%d] = NULL\n", k1, k2 ); } } } return( 0 ); } int msfastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink ) { int k1, k2; int *intpt1, *intpt2; int m1, m2; for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ ) { for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ ) { m1 = MIN(*intpt1,*intpt2); m2 = MAX(*intpt1,*intpt2); if( localhom[m1][m2].opt == -1 ) localhomshrink[k1][k2] = NULL; else localhomshrink[k1][k2] = localhom[m1]+m2; } } return( 0 ); } mafft-7.505-without-extensions/core/version.c0000644000175000017500000000011714224501721020631 0ustar nileshnilesh#include "mltaln.h" int main() { fprintf( stdout, VERSION ); return( 0 ); } mafft-7.505-without-extensions/core/mafft.bat0000644000175000017500000000165314224501721020573 0ustar nileshnilesh@echo off setlocal enabledelayedexpansion cls; 1>&2 chcp 65001 1>&2 for /f "usebackq tokens=*" %%i IN (`cd`) DO @set current_dir=%%i if /i "%current_dir%" == "%systemroot%" ( set mafft_working_dir="%~dp0" ) else ( set mafft_working_dir="%current_dir%" ) pushd "%mafft_working_dir%" echo; 1>&2 echo Preparing environment to run MAFFT on Windows. 1>&2 echo This may take a while, if real-time scanning by anti-virus software is on. 1>&2 set ROOTDIR=%~d0%~p0 set PATH=/usr/bin/:%PATH% set MAFFT_BINARIES=/usr/lib/mafft set TMPDIR=%TMP% set MAFFT_TMPDIR=%TMPDIR% REM set TMPDIR=%ROOTDIR%/tmp REM set MAFFT_TMPDIR=%TMPDIR% REM If you do not have write permission for Windows temporary folder REM (typically C:\Users\username\AppData\Local\Temp\), then REM uncomment (remove REM) the above two lines to use an alternative REM temporary folder. "%ROOTDIR%\usr\bin\bash" "/usr/bin/mafft" %* popd exit /b mafft-7.505-without-extensions/core/mltaln.h0000644000175000017500000002033014224501721020437 0ustar nileshnilesh#define USE_XCED 0 #define _XOPEN_SOURCE #if USE_XCED #include "config.h" #include "share.h" #else #endif #include "mafft.h" #include #include #include #include #include #include #include #include "mtxutl.h" //#include #include #include #ifdef enablemultithread #include #include #include #endif #ifndef _MSC_VER #include #endif #if !defined(mingw) && !defined(_MSC_VER) #include // for setstacksize, 2016/Jun #include // shared memory #include // shm_open #endif #define VERSION "7.505" #define SHOWVERSION reporterr( "%s (%s) Version " VERSION "\nalg=%c, model=%s, amax=%3.1f\n%d thread(s)\n\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, specificityconsideration, nthread ) #define FFT_THRESHOLD 80 #define FFT_WINSIZE_P 20 #define FFT_WINSIZE_D 100 #define DISPSEQF 60 #define DISPSITEI 0 #define MAXITERATION 500 #define M 500000 /* njob no saidaiti */ #define N 5000000 /* nlen no saidaiti */ #define MAXSEG 100000 #define B 256 #define C 60 /* 1 gyou no mojisuu */ #define D 6 #define DFORMAT "%#6.3f" #define rnd() ( ( 1.0 / ( RAND_MAX + 1.0 ) ) * rand() ) #define MAX(X,Y) ( ((X)>(Y))?(X):(Y) ) #define MIN(X,Y) ( ((X)<(Y))?(X):(Y) ) #define G(X) ( ((X)>(0))?(X):(0) ) #define BEFF 1.0 /* 0.6 ni suruto zureru */ #define WIN 3 #define SGAPP -1000 #define GETA2 0.001 #define GETA3 0.001 #define NOTSPECIFIED 100009 #define SUEFF 0.1 /* upg/(spg+upg) -> sueff.sed */ #define DIVLOCAL 0 #define INTMTXSCALE 1000000.0 #define JTT 201 #define TM 202 extern char modelname[500]; extern int njob, nlenmax; extern int amino_n[0x100]; extern char amino_grp[0x100]; //extern int amino_dis[0x100][0x100]; extern int **amino_dis; extern double **n_disLN; //extern double amino_dis_consweight_multi[0x100][0x100]; extern double **amino_dis_consweight_multi; extern int **n_dis; extern int **n_disFFT; extern double **n_dis_consweight_multi; extern unsigned char amino[0x100]; extern double polarity[0x100]; extern double volume[0x100]; extern int ribosumdis[37][37]; extern int ppid; extern double thrinter; extern double fastathreshold; extern int pslocal, ppslocal; extern int constraint; extern int divpairscore; extern int fmodel; // 1-> fmodel 0->default -1->raw extern int nblosum; // 45, 50, 62, 80 extern int kobetsubunkatsu; extern int bunkatsu; extern int dorp; extern int niter; extern int contin; extern int calledByXced; extern int devide; extern int scmtd; extern int weight; extern int utree; extern int tbutree; extern int refine; extern int check; extern double cut; extern int cooling; extern int trywarp; extern int penalty, ppenalty, penaltyLN; extern int penalty_dist, ppenalty_dist; extern int RNApenalty, RNAppenalty; extern int RNApenalty_ex, RNAppenalty_ex; extern int penalty_ex, ppenalty_ex, penalty_exLN; extern int penalty_EX, ppenalty_EX; extern int penalty_OP, ppenalty_OP; extern int penalty_shift; extern double penalty_shift_factor; extern int offset, poffset, offsetLN, offsetFFT; extern int RNAthr, RNApthr; extern int scoremtx; extern int TMorJTT; extern char use_fft; extern char force_fft; extern int nevermemsave; extern int fftscore; extern int fftWinSize; extern int fftThreshold; extern int fftRepeatStop; extern int fftNoAnchStop; extern int divWinSize; extern int divThreshold; extern int disp; extern int outgap; extern char alg; extern int cnst; extern int mix; extern int tbitr; extern int tbweight; extern int tbrweight; extern int disopt; extern int pamN; extern int checkC; extern double geta2; extern int treemethod; extern int kimuraR; extern char *swopt; extern int fftkeika; extern int score_check; extern char *inputfile; extern char *addfile; extern int addprofile; extern double consweight_multi; extern double consweight_rna; extern char RNAscoremtx; extern char *signalSM; extern FILE *prep_g; extern FILE *trap_g; extern char **seq_g; extern char **res_g; extern int rnakozo; extern char rnaprediction; /* sengen no ichi ha koko dake de ha nai */ extern void constants(); extern char **Calignm1(); extern char **Dalignm1(); extern char **align0(); extern double Cscore_m_1( char **, int, int, double ** ); extern double score_m_1( char **, int, int, double ** ); extern double score_calc0( char **, int, double **, int ); extern char seqcheck( char ** ); extern double substitution( char *, char * ); extern double substitution_score( char *, char * ); extern double substitution_nid( char *, char * ); extern double substitution_hosei( char *, char * ); extern double ipower( double, int ); extern double translate_and_Calign(); extern double A__align(); extern double A__align11(); extern double A__align_gapmap(); extern double partA__align(); extern double L__align11( double **scoringmtx, double scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt ); extern double G__align11(); extern double Falign(); extern double Falign_localhom(); extern double Conalign(); extern double Aalign(); extern double imp_match_out_sc( int, int ); extern double part_imp_match_out_sc( int, int ); extern void ErrorExit(); extern void cpmx_calc(); extern void intergroup_score( char **, char **, double *, double *, int, int, int, double * ); extern int conjuctionfortbfast(); extern int fastconjuction(); extern char seqcheck( char ** ); typedef struct _LocalHom { struct _LocalHom *next; struct _LocalHom *last; int start1; int end1; int start2; int end2; double opt; int overlapaa; int extended; double importance; double rimportance; // double fimportance; // double wimportance; char korh; int nokori; } LocalHom; typedef struct _NodeInCub { int step; int LorR; } NodeInCub; typedef struct _Node { struct _Node *children[3]; int tmpChildren[3]; double length[3]; double *weightptr[3]; int top[3]; int *members[3]; } Node; typedef struct _Segment { int start; int end; int center; double score; int skipForeward; int skipBackward; struct _Segment *pair; int number; } Segment; typedef struct _Segments { Segment group1; Segment group2; int number1; int number2; } Segments; typedef struct _Bchain { struct _Bchain *next; struct _Bchain *prev; int pos; } Bchain; typedef struct _Achain { int next; int prev; // int curr; } Achain; typedef struct _Fukusosuu { double R; double I; } Fukusosuu; typedef struct _Gappattern { int len; double freq; } Gappat; typedef struct _RNApair { int uppos; double upscore; int downpos; double downscore; int bestpos; double bestscore; } RNApair; typedef struct _Treedep { int child0; int child1; int done; double distfromtip; } Treedep; typedef struct _Addtree { int nearest; double dist1; char *neighbors; double dist2; } Addtree; typedef struct _lennum { int len; int num; } Lennum; typedef struct _pairnum { unsigned long long npairs; int num; int n0; int n1; } Pairnum; typedef struct _extanch { int i; int j; int starti; int endi; int startj; int endj; int score; } ExtAnch; typedef struct _gappos { int pos; int len; } GapPos; #include "fft.h" #include "dp.h" #include "functions.h" #ifdef enablemultithread #define TLS __thread #else #define TLS #endif extern TLS int commonAlloc1; extern TLS int commonAlloc2; extern TLS int **commonIP; extern TLS int **commonJP; extern int nthread; extern int nthreadreadlh; extern int nthreadpair; extern int randomseed; extern int parallelizationstrategy; #define BESTFIRST 0 #define BAATARI0 1 #define BAATARI1 2 #define BAATARI2 3 extern int scoreout; extern int spscoreout; extern int outnumber; extern int legacygapcost; extern double minimumweight; extern int nwildcard; extern TLS char *newgapstr; extern int nalphabets; extern int nscoredalphabets; extern double specificityconsideration; extern int ndistclass, maxdistclass; extern int gmsg; extern double sueff_global; extern double lenfaca, lenfacb, lenfacc, lenfacd; extern int maxl, tsize; extern char codonpos; extern char codonscore; /* for --large */ extern int compacttree; extern int lhlimit; extern int specifictarget; #define HAT3NODEBLOCK 500 extern int nadd; extern int usenaivescoreinsteadofalignmentscore; #define MYBUFSIZE 1000 * 1000 * 100 // 100MB mafft-7.505-without-extensions/core/SAalignmm.c0000644000175000017500000002031614224501721021017 0ustar nileshnilesh#include "mltaln.h" #include "dp.h" #define DEBUG 0 static void match_calc( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; int count = 0; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); if( initialize ) { for( j=0; j -1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); } static double Atracking( double *lasthorizontalw, double *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, double **cpmx1, double **cpmx2, int **ijp, int icyc, int jcyc ) { int i, j, k, l, iin, jin, ifi, jfi, lgth1, lgth2; // char gap[] = "-"; char *gap; double wm; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if DEBUG for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double g; double x; static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *currentw; static TLS double *previousw; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS double **cpmx1; static TLS double **cpmx2; static TLS int **intwork; static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; #if DEBUG fprintf( stderr, "eff in SA+++align\n" ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( currentw ); FreeFloatVec( previousw ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.1*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.1*lgth2), orlgth2 ) + 100; fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); currentw = AllocateFloatVec( ll2+2 ); previousw = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); fprintf( stderr, "succeeded\n" ); orlgth1 = ll1; orlgth2 = ll2; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); commonIP = AllocateIntMtx( ll1+10, ll2+10 ); fprintf( stderr, "succeeded\n\n" ); commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; cpmx_calc( seq1, cpmx1, eff1, strlen( seq1[0] ), icyc ); cpmx_calc( seq2, cpmx2, eff2, strlen( seq2[0] ), jcyc ); match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 ); match_calc( currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 ); if( outgap == 1 ) { for( i=1; i wm ) { wm = x; ijp[i][j] = -( j - mpi ); } g = penalty * 0.5; x = previousw[j-1] + g; if( mi <= x ) { mi = x; mpi = j-1; } g = penalty * 0.5; x = m[j] + g; if( x > wm ) { wm = x; ijp[i][j] = +( i - mp[j] ); } g = penalty * 0.5; x = previousw[j-1] + g; if( m[j] <= x ) { m[j] = x; mp[j] = i-1; } currentw[j] += wm; } lastverticalw[i] = currentw[lgth2-1]; } /* fprintf( stderr, "\n" ); for( i=0; i" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'I': disopt = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } void seq_grp_nuc( int *grp, char *seq ) { int tmp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) ); } *grp = END_OF_VEC; } void seq_grp( int *grp, char *seq ) { int tmp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) ); } *grp = END_OF_VEC; } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } static int localcommonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; // fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize ); } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } int main( int argc, char **argv ) { int i, j; FILE *fp, *infp; char **seq; int *grpseq; char *tmpseq; int **pointt; static char **name; static int nlen[M]; double **mtx; double **mtx2; double score, score0; static short *table1; char b[B]; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); tmpseq = AllocateCharVec( nlenmax+1 ); seq = AllocateCharMtx( njob, nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); pamN = NOTSPECIFIED; #if 0 FRead( infp, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); else tsize = (int)pow( 6, 6 ); maxl = 0; for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } for( i=0; ibestpos!=-1; pt++ ) { if( pt->bestpos > i ) fprintf( fp, "%d %d %50.40f\n", i, pt->bestpos, pt->bestscore ); } } #if 1 static void readrawmccaskill( FILE *fp, RNApair **pairprob, int length ) { char gett[1000]; int *pairnum; int i; int left, right; double prob; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i' ) continue; sscanf( gett, "%d %d %lf", &left, &right, &prob ); if( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou //fprintf( stderr, "gett = %s\n", gett ); if( left != right && prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", right, left, prob ); } } free( pairnum ); } #endif #ifdef enablemultithread static void *athread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int thread_no = targ->thread_no; int njob = targ->njob; int *jobpospt = targ->jobpospt; int **gapmap = targ->gapmap; char **nogap = targ->nogap; int nlenmax = targ->nlenmax; RNApair ***pairprob = targ->pairprob; int i, res; FILE *infp; char *com; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); while( 1 ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob ) { pthread_mutex_unlock( targ->mutex ); // return( NULL ); break; } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); commongappick_record( 1, nogap+i, gapmap[i] ); if( strlen( nogap[i] ) == 0 ) continue; sprintf( dirname, "_%d", i ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); fprintf( stderr, "%d / %d (by thread %4d)\n", i+1, njob, thread_no ); sprintf( com, "%s/_mccaskillinorg", dirname ); infp = fopen( com, "w" ); // fprintf( infp, ">in\n%s\n", nogap[i] ); fprintf( infp, ">in\n" ); write1seq( infp, nogap[i] ); fclose( infp ); // sprintf( com, "tr -d '\\r' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname ); sprintf( com, "cd %s && tr -d '\\r' < _mccaskillinorg > _mccaskillin && cd ..", dirname ); system( com ); // for cygwin, wakaran if( alg == 'G' ) sprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea ); else // sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea ); sprintf( com, "cd %s && env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea ); // mingw no tame, dirname/bin no kawari ni env PATH res = system( com ); if( res ) { fprintf( stderr, "ERROR IN mccaskill_mea\n" ); exit( 1 ); } sprintf( com, "%s/_mccaskillout", dirname ); infp = fopen( com, "r" ); readrawmccaskill( infp, pairprob[i], nlenmax ); fclose( infp ); // sprintf( com, "rm -rf \"%s\" > \"/dev/null\" 2>&1", dirname ); sprintf( com, "rm -rf \"%s\"", dirname ); // for windows, not use /dev/null if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); // nanosleep( 100000 ); sleep( 1 ); system( com ); } } free( dirname ); free( com ); return( NULL ); } #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; inputfile = NULL; dorp = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; whereismccaskillmea = NULL; alg = 's'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'd': whereismccaskillmea = *++argv; fprintf( stderr, "whereismccaskillmea = %s\n", whereismccaskillmea ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 's': alg = 's'; // use scarna; default break; case 'G': alg = 'G'; // use dafs, instead of scarna break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static char com[10000]; static int *nlen; int left, right; int res; static char **name, **seq, **nogap; static int **gapmap; static int *order; int i, j; FILE *infp; RNApair ***pairprob; RNApair **alnpairprob; RNApair *pairprobpt; RNApair *pt; int *alnpairnum; double prob; int adpos; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( !whereismccaskillmea ) whereismccaskillmea = ""; getnumlen( infp ); rewind( infp ); if( dorp != 'd' ) { fprintf( stderr, "nuc only\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*2+1 ); nogap = AllocateCharMtx( njob, nlenmax*2+1 ); gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); alnpairnum = AllocateIntVec( nlenmax ); for( i=0; i 0 ) { int jobpos; pthread_t *handle; pthread_mutex_t mutex; thread_arg_t *targ; jobpos = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i%d\n", i ); outmccaskill( stdout, pairprob[i], nlenmax ); } } else #endif { for( i=0; i%d\n", i ); continue; } infp = fopen( "_mccaskillinorg", "w" ); // fprintf( infp, ">in\n%s\n", nogap[i] ); fprintf( infp, ">in\n" ); write1seq( infp, nogap[i] ); fclose( infp ); system( "tr -d '\\r' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran if( alg == 'G' ) sprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea ); else sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea ); res = system( com ); if( res ) { fprintf( stderr, "ERROR IN mccaskill_mea\n" ); exit( 1 ); } infp = fopen( "_mccaskillout", "r" ); readrawmccaskill( infp, pairprob[i], nlenmax ); fclose( infp ); fprintf( stdout, ">%d\n", i ); outmccaskill( stdout, pairprob[i], nlenmax ); } } for( i=0; ibestpos!=-1; pairprobpt++ ) { left = gapmap[i][j]; right = gapmap[i][pairprobpt->bestpos]; prob = pairprobpt->bestscore; for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) if( pt->bestpos == right ) break; if( pt->bestpos == -1 ) { alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); adpos = alnpairnum[left]; alnpairnum[left]++; alnpairprob[left][adpos].bestscore = 0.0; alnpairprob[left][adpos].bestpos = right; alnpairprob[left][adpos+1].bestscore = -1.0; alnpairprob[left][adpos+1].bestpos = -1; pt = alnpairprob[left]+adpos; } else adpos = pt-alnpairprob[left]; pt->bestscore += prob; if( pt->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); } } for( i=0; ibestpos!=-1; pairprobpt++ ) { pairprobpt->bestscore /= (double)njob; left = i; right = pairprobpt->bestpos; prob = pairprobpt->bestscore; fprintf( stdout, "%d-%d, %f\n", left, right, prob ); } return( 0 ); #endif } mafft-7.505-without-extensions/core/iteration.c0000644000175000017500000002215714224501721021152 0ustar nileshnilesh /* iteration ( algorithm C ) */ #include "mltaln.h" #define DEBUG 0 static void Writeoptions( FILE *fp ) { if( scoremtx == 1 ) fprintf( fp, "Dayhoff( ... )\n" ); else if( scoremtx == -1 ) fprintf( fp, "DNA\n" ); else if( scoremtx == 2 ) fprintf( fp, "Miyata-Yasunaga\n" ); else fprintf( fp, "JTT %dPAM\n", pamN ); if( scoremtx == 0 ) fprintf( fp, "Gap Penalty = %+d, %+d\n", penalty, offset ); else fprintf( fp, "Gap Penalty = %+d\n", penalty ); fprintf( fp, "marginal score to search : best - %f\n", cut ); if( scmtd == 3 ) fprintf( fp, "score of rnd or sco\n" ); else if( scmtd == 4 ) fprintf( fp, "score = sigma( score for a pair of homologous amino acids ) / ( number of amino acids pairs )\n" ); else if( scmtd == 5 ) fprintf( fp, "score : SP\n" ); if( mix ) fprintf( fp, "?\n" ); else { if( weight == 2 ) fprintf( fp, "weighted, geta2 = %f\n", geta2 ); else if( weight == 3 ) { if( scmtd == 4 ) fprintf( fp, "reversely weighted in function 'align', unweighted in function 'score_calc'\n" ); else fprintf( fp, "weighted like ClustalW," ); } else fprintf( fp, "unweighted\n" ); } if( weight && utree ) { fprintf( fp, "using tree defined by the file hat2 with simplified UPG method\n" ); } if( weight && !utree ) fprintf( fp, "using temporary tree by simplified UPG method\n" ); fprintf( fp, "Algorithm %c\n", alg ); } char **align0( double *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex ) { char **result; if( alg == 'B' ) { ErrorExit( "Sorry!" ); /* if( outgap == 0 ) { result = alignm1_o( wm, aseq, seq, scmx, effarr, icyc, ex ); } if( outgap == 1 ) { result = alignm1( wm, aseq, seq, scmx, effarr, icyc, ex ); } */ } else if( alg == 'C' ) { result = Calignm1( wm, aseq, seq, effarr, icyc, ex ); } return( result ); } double score_m_1_0( char **aseq, int locnjob, int s, double **eff, double effarr[M] ) { double x; if( alg == 'B' ) { ErrorExit( "Sorry!" ); } if( alg == 'C' ) { x = Cscore_m_1( aseq, locnjob, s, eff ); } fprintf( stderr, "in score_m_1_0 %f\n", x ); return( x ); } int iteration( int locnjob, char name[M][B], int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **eff ) { double tscore, mscore; int identity; static char *mseq1, **mseq2 = NULL; static char **result; int i, l; static double effarr[M]; int s; int sss[2]; char ou; int alloclen; int resultlen; int nlenmax0 = nlenmax; FILE *prep; char sai[M]; char sai1[M]; char sai2[M]; #if 0 double his[2][M][MAXITERATION/locnjob+1]; #else double ***his; #endif int cyc[2]; char shindou = 0; double wm; int returnvalue; for( i=0; i nlenmax ) nlenmax = strlen( aseq[0] ); /* s = ( int )( rnd() * locnjob ); s++; if( s == locnjob ) s = 0; ou = 0; */ if( ou == 0 ) { ou = 1; s = sss[0]; /* sss[0]++; if( sss[0] == locnjob ) { sss[0] = 0; cyc[0]++; } */ sss[0]--; if( sss[0] == -1 ) { sss[0] = locnjob-1; cyc[0]++; } } else { ou = 0; s = sss[1]; sss[1]++; if( sss[1] == locnjob ) { sss[1] = 0; cyc[1]++; } } fprintf( trap_g, "%d ", weight ); /* for( i=0, count=0; i alloclen ) { if( resultlen > nlenmax0*3 || resultlen > N ) { fprintf(stderr, "Error in main1\n"); exit( 1 ); } FreeTmpSeqs( mseq2, mseq1 ); alloclen = strlen( result[0] ) * 2.0; fprintf( stderr, "\n\ntrying to allocate TmpSeqs\n\n" ); AllocateTmpSeqs( &mseq2, &mseq1, alloclen ); } for( i=0; i=s; i-- ) strcpy( mseq2[i+1], mseq2[i] ); strcpy( mseq2[s], mseq1 ); if( checkC ) { tmpscore1= score_m_1_0( mseq2, locnjob, s, eff, effarr ); fprintf( stderr, "pick up %d, before ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore ); fprintf( stderr, "pick up %d, after ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore1 ); if( tmpscore1 < tmpscore ) { fprintf( stderr, "\7" ); fprintf( trap_g, ">>>>>>>n\n" ); } if( fabs( wm - tmpscore1 ) / wm > 0.001 ) { fprintf( stderr, "\7sorry\n" ); exit( 1 ); } } identity = !strcmp( mseq2[s], aseq[s] ); if( s == locnjob - 1 ) ss = 0; else ss=s+1; identity *= !strcmp( mseq2[ss], aseq[ss] ); if( !identity ) { tmpscore = score_calc0( mseq2, locnjob, eff, s ); } else tmpscore = tscore; if( disp ) { fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 ); display( mseq2, locnjob ); } fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 ); fprintf( stderr, "score = %f mscore = %f ", tmpscore, mscore ); fprintf( trap_g, "%#4d %#4d / the rest ", l+1, s+1 ); fprintf( trap_g, "score = %f mscore = %f ", tmpscore, mscore ); if( identity ) { fprintf( stderr, "( identical )\n" ); fprintf( trap_g, "( identical )\n" ); sai[s] = 2; } else if( tmpscore > mscore - cut ) { fprintf( stderr, "accepted\n" ); fprintf( trap_g, "accepted\n" ); for( i=0; i mscore ) { for( i=0; i %f\n", mscore ); strcpy( sai, sai1 ); /* kokoka ? */ #if 0 if( !tbitr && !tbweight ) { prep = fopen( "best", "w" ); Write( prep, locnjob, name, nlen, bseq ); fclose( prep ); } #endif } } else { if( tmpscore == tscore ) { fprintf( stderr, "occational coincidence \n" ); fprintf( trap_g, "occational coincidence\n" ); } else { fprintf( stderr, "rejected\n" ); fprintf( trap_g, "rejected\n" ); } for( i=0; i0; i-- ) { if( tmpscore == his[ou][s][i] ) { shindou = 1; break; } } fprintf( stderr, "\n" ); if( shindou == 1 ) { returnvalue = -1; fprintf( trap_g, "oscillating\n" ); break; } } if( l == MAXITERATION ) returnvalue = -2; FreeDoubleCub( his ); return( returnvalue ); } mafft-7.505-without-extensions/core/partSalignmm.c0000644000175000017500000014330114224501721021605 0ustar nileshnilesh#include "mltaln.h" #include "dp.h" #define MACHIGAI 0 #define OUTGAP0TRY 1 #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 #define FASTMATCHCALC 1 #if 0 static void st_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len ) { int i, j, gc, gb; double feff; for( i=0; i impmtx=%f\n", i1, j1, impmtx[i1][j1] ); return( impmtx[i1][j1] ); #if 0 if( i1 == l1 || j1 == l2 ) return( 0.0 ); return( impmtx[i1+start1][j1+start2] ); #endif } static void part_imp_match_out_vead_gapmap( double *imp, int i1, int lgth2, int start2, int *gapmap2 ) { #if FASTMACHCALC double *pt = imp; int *gapmappt = gapmap2; while( lgth2-- ) *pt++ += impmtx[i1][start2+*gapmappt++]; #else int j; for( j=0; j-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); #else int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } #endif } static void fillzero( double *s, int l ) { while( l-- ) *s++ = 0.0; } static void match_calc_del( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, int mid, int nmask, int *mask1, int *mask2 ) { // osoi! int i, j, k, m; int c1, c2; // fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 ); // fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] ); // fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] ); // for( i=0; i ", match[k], mid ); match[k] -= matrices[mid][c1][c2] * eff1[i] * eff2[j]; // fprintf( stderr, "match[k] = %f (mid=%d)\n", match[k], mid ); } } // fprintf( stderr, "done\n" ); return; } static void match_calc_add( double **scoreingmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { #if FASTMATCHCALC // fprintf( stderr, "\nmatch_calc... %d", i1 ); int j, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *matchpt, *cpmxpdpt, **cpmxpdptpt; int *cpmxpdnpt, **cpmxpdnptpt; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); if( initialize ) { int count = 0; for( j=0; j-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); // fprintf( stderr, "done\n" ); #else int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( double *impwmpt, double *lasthorizontalw, double *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int icyc, int jcyc, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, int *warpis, int *warpjs, int warpbase ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk; // char gap[] = "-"; char *gap; double wm; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i= warpbase ) { ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { for( i=0; i= 0 ) { for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i= warpbase ) { ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { for( i=0; i= 0 ) { for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS double *ogcp1; static TLS double *ogcp2; static TLS double *fgcp1; static TLS double *fgcp2; static TLS double **cpmx1; static TLS double **cpmx2; static TLS double *gapfreq1; static TLS double *gapfreq2; static TLS int **intwork; static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; double fpenalty = (double)penalty; double fpenalty_shift = (double)penalty_shift; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; #endif double *fgcp2pt; double *ogcp2pt; double fgcp1va; double ogcp1va; double *gf2pt; double *gf2ptpre; double gf1va; double gf1vapre; double headgapfreq1; double headgapfreq2; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; if( seq1 == NULL ) { if( orlgth1 ) { // fprintf( stderr, "## Freeing local arrays in A__align\n" ); orlgth1 = 0; orlgth2 = 0; part_imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL ); free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); } else { // fprintf( stderr, "## Not allocated\n" ); } return( 0.0 ); } // fprintf( stderr, "IN partA__align\n" ); lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 1 // if( lgth1 == 0 ) fprintf( stderr, "WARNING: lgth1=0 in partA__align\n" ); // if( lgth2 == 0 ) fprintf( stderr, "WARNING: lgth2=0 in partA__align\n" ); if( lgth1 == 0 && lgth2 == 0 ) return( 0.0 ); if( lgth1 == 0 ) { for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); gapfreq1 = AllocateFloatVec( ll1+2 ); gapfreq2 = AllocateFloatVec( ll2+2 ); #if FASTMATCHCALC doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets ); #else doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc ); cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc ); if( sgap1 ) { new_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 ); new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 ); new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap1 ); new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 ); outgapcount( &headgapfreq1, icyc, sgap1, eff1 ); outgapcount( &headgapfreq2, jcyc, sgap2, eff2 ); outgapcount( gapfreq1+lgth1, icyc, egap1, eff1 ); outgapcount( gapfreq2+lgth2, jcyc, egap2, eff2 ); } else { st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 ); st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 ); st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 ); st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 ); headgapfreq1 = 0.0; headgapfreq2 = 0.0; gapfreq1[lgth1] = 0.0; gapfreq2[lgth2] = 0.0; } if( legacygapcost == 0 ) { gapcountf( gapfreq1, seq1, icyc, eff1, lgth1 ); gapcountf( gapfreq2, seq2, jcyc, eff2, lgth2 ); for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( outgap == 1 ) { for( i=1; i", wm ); #endif // g = mi + *fgcp2pt * gapfreq1[i]; if( (g = mi + *fgcp2pt * gf1va) > wm ) { wm = g; *ijppt = -( j - mpi ); } // g = *prept + *ogcp2pt * gapfreq1[i-1]; // if( (g = *prept + *ogcp2pt * gf1vapre) >= mi ) if( (g = *prept + *ogcp2pt * gf1vapre) > mi ) // 2018/Apr { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // g = *mjpt + fgcp1va * gapfreq2[j]; if( (g = *mjpt + fgcp1va * *gf2pt) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } // g = *prept + ogcp1va * gapfreq2[j-1]; // if( (g = *prept + ogcp1va * *gf2ptpre) >= *mjpt ) if( (g = *prept + ogcp1va * *gf2ptpre) > *mjpt ) // 2018/Apr { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( trywarp ) { #if USE_PENALTY_EX if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai #else if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai #endif { // fprintf( stderr, "WARP in partA__align\n" ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } curm = *curpt + wm; if( *wmrecords1pt > *wmrecordspt ) { *wmrecordspt = *wmrecords1pt; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); } if( curm > *wmrecordspt ) { *wmrecordspt = curm; *warpipt = i; *warpjpt = j; } wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; ijppt++; mjpt++; prept++; mpjpt++; curpt++; fgcp2pt++; ogcp2pt++; gf2ptpre++; gf2pt++; } lastverticalw[i] = currentw[lgth2-1]; if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } if( trywarp ) { // fprintf( stderr, "wm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double g; double *currentw, *previousw; #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS double *ogcp1; static TLS double *ogcp2; static TLS double *fgcp1; static TLS double *fgcp2; static TLS double ***cpmx1s; static TLS double ***cpmx2s; static TLS double *gapfreq1; static TLS double *gapfreq2; static TLS int ***intwork; static TLS double ***doublework; static TLS int orlgth1 = 0, orlgth2 = 0; double fpenalty = (double)penalty; double fpenalty_shift = (double)penalty_shift; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; #endif double *fgcp2pt; double *ogcp2pt; double fgcp1va; double ogcp1va; double *gf2pt; double *gf2ptpre; double gf1va; double gf1vapre; double headgapfreq1; double headgapfreq2; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; int *nmask, **masklist1, **masklist2; if( seq1 == NULL ) { if( orlgth1 ) { // fprintf( stderr, "## Freeing local arrays in A__align\n" ); orlgth1 = 0; orlgth2 = 0; part_imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL ); free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatCub( cpmx1s ); FreeFloatCub( cpmx2s ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatCub( doublework ); FreeIntCub( intwork ); } else { // fprintf( stderr, "## Not allocated\n" ); } return( 0.0 ); } masklist1 = AllocateIntMtx( maxdistclass, 0 ); masklist2 = AllocateIntMtx( maxdistclass, 0 ); nmask = calloc( maxdistclass, sizeof( int ) ); for( c=0; c orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatCub( cpmx1s ); FreeFloatCub( cpmx2s ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatCub( doublework ); FreeIntCub( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); cpmx1s = AllocateFloatCub( maxdistclass, nalphabets, ll1+2 ); cpmx2s = AllocateFloatCub( maxdistclass, nalphabets, ll2+2 ); gapfreq1 = AllocateFloatVec( ll1+2 ); gapfreq2 = AllocateFloatVec( ll2+2 ); doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; // cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc ); // cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc ); for( c=0; c tbfast.c, localhom ga hitsuyou if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( outgap == 1 ) { for( i=1; i", wm ); #endif // g = mi + *fgcp2pt * gapfreq1[i]; if( (g = mi + *fgcp2pt * gf1va) > wm ) { wm = g; *ijppt = -( j - mpi ); } // g = *prept + *ogcp2pt * gapfreq1[i-1]; // if( (g = *prept + *ogcp2pt * gf1vapre) >= mi ) if( (g = *prept + *ogcp2pt * gf1vapre) > mi ) // 2018/Apr { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif // g = *mjpt + fgcp1va * gapfreq2[j]; if( (g = *mjpt + fgcp1va * *gf2pt) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } // g = *prept + ogcp1va * gapfreq2[j-1]; // if( (g = *prept + ogcp1va * *gf2ptpre) >= *mjpt ) if( (g = *prept + ogcp1va * *gf2ptpre) > *mjpt ) // 2018/Apr { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( trywarp ) { #if USE_PENALTY_EX if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai #else if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai #endif { if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } curm = *curpt + wm; if( *wmrecords1pt > *wmrecordspt ) { *wmrecordspt = *wmrecords1pt; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); } if( curm > *wmrecordspt ) { *wmrecordspt = curm; *warpipt = i; *warpjpt = j; } wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; ijppt++; mjpt++; prept++; mpjpt++; curpt++; fgcp2pt++; ogcp2pt++; gf2ptpre++; gf2pt++; } lastverticalw[i] = currentw[lgth2-1]; if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } if( trywarp ) { // fprintf( stderr, "wm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i #include #include #include "mafft.h" int report( int status, int progress, char *message ) { fprintf( stderr, "status = %d\n", status ); fprintf( stderr, "progress = %d / 100\n", progress ); fprintf( stderr, "message = %s\n", message ); return( 0 ); } int chudan( int status, int progress, char *message ) { fprintf( stderr, "status = %d\n", status ); fprintf( stderr, "progress = %d / 100\n", progress ); fprintf( stderr, "message = %s\n", message ); return( 2 ); } int main( void ) { int i; int argc; char **argv; char **seq; char **name; char *message; int res; int n, l, mlen; // printf( "This is interface.\n" ); l = 10000; n = 130; seq = (char **)calloc( n, sizeof( char * ) ); name = (char **)calloc( n, sizeof( char * ) ); for( i=0; i -D; Protein -> -P strcpy( argv[18], "-I" ); // --add strcpy( argv[19], "2" ); // --add #if 0 mlen = 5000; message = (char *)calloc( mlen+1, sizeof( char ) ); fprintf( stderr, "first run\n" ); res = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report ); fprintf( stderr, "second run\n" ); res = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report ); fprintf( stderr, "third run\n" ); res = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report ); fprintf( stderr, "\n\n\nmessage in interface = :%s:\n", message ); free( message ); #else fprintf( stderr, "first run\n" ); res = disttbfast( n, l, name, seq, argc, argv, report ); fprintf( stderr, "second run\n" ); res = disttbfast( n, l, name, seq, argc, argv, chudan ); // fprintf( stderr, "third run\n" ); // res = disttbfast( n, l, name, seq, argc, argv, report ); #endif if( res == GUI_LENGTHOVER ) { fprintf( stderr, "length over!" ); } else { fprintf( stderr, "res = %d\n", res ); fprintf( stdout, "Output:\n" ); for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'D': scoremtx = -1; break; case 'P': scoremtx = 0; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } } int main( int ac, char **av ) { int *nlen; static char **name, **seq; double score; extern double score_calc_for_score( int, char ** ); arguments( ac, av ); getnumlen( stdin ); rewind( stdin ); nlen = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); seq = AllocateCharMtx( njob, nlenmax+2 ); readData_pointer( stdin, name, nlen, seq ); if( !isaligned( njob, seq ) ) ErrorExit( "Not aligned." ); constants( njob, seq ); score = score_calc_for_score( njob, seq ); if( scoremtx == 0 ) score += offset; fprintf( stdout, "score = %f\n", score ); if ( scoremtx == 0 ) fprintf( stdout, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( stdout, "Dayhoff( machigai ga aru )\n" ); else if( scoremtx == 2 ) fprintf( stdout, "M-Y\n" ); else if( scoremtx == -1 ) fprintf( stdout, "DNA 1:%d\n", kimuraR ); fprintf( stdout, "gap penalty = %+6.2f, %+6.2f, %+6.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); exit( 0 ); } mafft-7.505-without-extensions/core/rnatest.c0000644000175000017500000002353514224501721020635 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 void arguments( int argc, char *argv[] ) { int c; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'R': fftRepeatStop = 1; break; #endif case 'Q': calledByXced = 1; break; case 's': treemethod = 's'; break; case 'x': disp = 1; break; case 'p': treemethod = 'p'; break; #if 0 case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 'L': alg = 'L'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int *nlen; static char **name, **seq, **useq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double *eff; int i; FILE *infp; char c; int alloclen; RNApair **pair1; RNApair **pair2; double **map; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob > M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); seq = AllocateCharMtx( njob, nlenmax*5+1 ); useq = AllocateCharMtx( njob, nlenmax*5+1 ); aseq = AllocateCharMtx( njob, nlenmax*5+1 ); bseq = AllocateCharMtx( njob, nlenmax*5+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*5; pair1 = calloc( nlenmax*5+1, sizeof( RNApair *) ); pair2 = calloc( nlenmax*5+1, sizeof( RNApair *) ); map = AllocateFloatMtx( nlenmax+1, nlenmax ); eff = AllocateDoubleVec( njob ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); for( i=0; i %c ", seq[0][pair1[i].pos], seq[1][map12[pair1[i].pos].pos] ); if( pair2[map12[pair1[i].pos].pos].pos == -1 ) continue; fprintf( stderr, "%d:%d (%c)", map12[pair1[i].pos].pos, pair2[map12[pair1[i].pos].pos].pos, seq[1][pair2[map12[pair1[i].pos].pos].pos] ); } #endif exit( 1 ); pairalign( name, nlen, bseq, aseq, mseq1, mseq2, eff, alloclen ); fprintf( trap_g, "done.\n" ); #if DEBUG fprintf( stderr, "closing trap_g\n" ); #endif fclose( trap_g ); // writePre( njob, name, nlen, aseq, !contin ); #if 0 writeData( stdout, njob, name, nlen, aseq ); #endif #if IODEBUG fprintf( stderr, "OSHIMAI\n" ); #endif SHOWVERSION; return( 0 ); #endif } mafft-7.505-without-extensions/core/splittbfast.c0000644000175000017500000024233114224501721021511 0ustar nileshnilesh#include "mltaln.h" #define TREE 1 #define PICKSIZE 50 // must be >= 3 #define WEIGHT 0 #define TOKYORIPARA 0.70 // 0.70 #define TOKYORIPARA_A 0.70 // changed #define LENFAC 1 #define HUKINTOTREE 1 #define DIANA 0 #define MAX6DIST 10.0 // kouzoutai ni sasareru pointer ha static #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define END_OF_VEC -1 static char *fastapath; static int doalign; static int fromaln; static int uselongest; static int treeout; static int classsize; static int picksize; static int reorder; static int pid; static int maxdepth = 0; static double tokyoripara; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define DLENFACA 0.01 #define DLENFACB 2500 #define DLENFACC 2500 #define DLENFACD 0.1 static char datafile[1000]; static char queryfile[1000]; static char resultfile[1000]; typedef struct _scores { double score; int selfscore; int orilen; int *pointt; int numinseq; char *name; // char *seq; // reallo // char **seqpt; int shimon; } Scores; int intcompare( const int *a, const int *b ) { return( *a - *b ); } int lcompare( const Scores *a, const Scores *b ) { if( a->orilen < b->orilen ) return 1; else if( a->orilen > b->orilen ) return -1; else return 0; } int dcompare( const Scores *a, const Scores *b ) { if( a->score > b->score ) return 1; else if( a->score < b->score ) return -1; else { if( a->selfscore < b->selfscore ) return 1; else if( a->selfscore > b->selfscore ) return -1; else { if( a->orilen < b->orilen ) return 1; else if( a->orilen > b->orilen ) return -1; else return 0; } } } static void getfastascoremtx( int **tmpaminodis ) { FILE *qfp; FILE *dfp; FILE *rfp; int i, j; char aa; int slen; int res; char com[10000]; static char *tmpseq; static char *tmpname; double *resvec; if( scoremtx == -1 ) { tmpaminodis['a']['a'] = 5; tmpaminodis['g']['g'] = 5; tmpaminodis['c']['c'] = 5; tmpaminodis['t']['t'] = 5; tmpaminodis['n']['n'] = -1; return; } tmpseq = calloc( 2000, sizeof( char ) ); tmpname = calloc( B, sizeof( char ) ); resvec = calloc( 1, sizeof( double ) ); // fprintf( stderr, "xformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); sprintf( tmpname, ">+===========+%d ", 0 ); strcpy( tmpseq, "AAAAAAXXXXXX" ); strcat( tmpseq, "CCCCCCXXXXXX" ); strcat( tmpseq, "DDDDDDXXXXXX" ); strcat( tmpseq, "EEEEEEXXXXXX" ); strcat( tmpseq, "FFFFFFXXXXXX" ); strcat( tmpseq, "GGGGGGXXXXXX" ); strcat( tmpseq, "HHHHHHXXXXXX" ); strcat( tmpseq, "IIIIIIXXXXXX" ); strcat( tmpseq, "KKKKKKXXXXXX" ); strcat( tmpseq, "LLLLLLXXXXXX" ); strcat( tmpseq, "MMMMMMXXXXXX" ); strcat( tmpseq, "NNNNNNXXXXXX" ); strcat( tmpseq, "PPPPPPXXXXXX" ); strcat( tmpseq, "QQQQQQXXXXXX" ); strcat( tmpseq, "RRRRRRXXXXXX" ); strcat( tmpseq, "SSSSSSXXXXXX" ); strcat( tmpseq, "TTTTTTXXXXXX" ); strcat( tmpseq, "VVVVVVXXXXXX" ); strcat( tmpseq, "WWWWWWXXXXXX" ); strcat( tmpseq, "YYYYYYXXXXXX" ); slen = strlen( tmpseq ); writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); fclose( dfp ); fprintf( stderr, "done.\n" ); for( i=0; i<20; i++ ) { aa = amino[i]; // fprintf( stderr, "checking %c\n", aa ); *tmpseq = 0; sprintf( tmpname, ">+===========+%d ", 0 ); for( j=0; j<6; j++ ) sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); if( scoremtx == -1 ) sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile ); else sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 2, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", fastapath ); exit( 1 ); } rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadFasta34m10_scoreonly( rfp, resvec, 1 ); fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 ); fclose( rfp ); if( ( (int)*resvec % 6 ) > 0.0 ) { fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); exit( 1 ); } tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 ); // fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] ); } tmpaminodis['X']['X'] = -1; free( tmpname ); free( tmpseq ); free( resvec ); } #if 0 static void getblastscoremtx( int **tmpaminodis ) { FILE *qfp; FILE *dfp; FILE *rfp; int i, j; char aa; int slen; int res; char com[10000]; static char *tmpseq; static char *tmpname; double *resvec; if( scoremtx == -1 ) { tmpaminodis['a']['a'] = 1; tmpaminodis['g']['g'] = 1; tmpaminodis['c']['c'] = 1; tmpaminodis['t']['t'] = 1; return; } tmpseq = calloc( 2000, sizeof( char ) ); tmpname = calloc( B, sizeof( char ) ); resvec = calloc( 1, sizeof( double ) ); // fprintf( stderr, "xformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); sprintf( tmpname, "\0", i ); // BUG!! strcpy( tmpseq, "AAAAAAXXXXXX" ); strcat( tmpseq, "CCCCCCXXXXXX" ); strcat( tmpseq, "DDDDDDXXXXXX" ); strcat( tmpseq, "EEEEEEXXXXXX" ); strcat( tmpseq, "FFFFFFXXXXXX" ); strcat( tmpseq, "GGGGGGXXXXXX" ); strcat( tmpseq, "HHHHHHXXXXXX" ); strcat( tmpseq, "IIIIIIXXXXXX" ); strcat( tmpseq, "KKKKKKXXXXXX" ); strcat( tmpseq, "LLLLLLXXXXXX" ); strcat( tmpseq, "MMMMMMXXXXXX" ); strcat( tmpseq, "NNNNNNXXXXXX" ); strcat( tmpseq, "PPPPPPXXXXXX" ); strcat( tmpseq, "QQQQQQXXXXXX" ); strcat( tmpseq, "RRRRRRXXXXXX" ); strcat( tmpseq, "SSSSSSXXXXXX" ); strcat( tmpseq, "TTTTTTXXXXXX" ); strcat( tmpseq, "VVVVVVXXXXXX" ); strcat( tmpseq, "WWWWWWXXXXXX" ); strcat( tmpseq, "YYYYYYXXXXXX" ); slen = strlen( tmpseq ); writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); fclose( dfp ); if( scoremtx == -1 ) sprintf( com, "formatdb -p f -i %s -o F", datafile ); else sprintf( com, "formatdb -i %s -o F", datafile ); system( com ); fprintf( stderr, "done.\n" ); for( i=0; i<20; i++ ) { aa = amino[i]; fprintf( stderr, "checking %c\n", aa ); *tmpseq = 0; for( j=0; j<6; j++ ) sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", "blastall" ); exit( 1 ); } rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7_scoreonly( rfp, resvec, 1 ); fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 ); fclose( rfp ); if( ( (int)*resvec % 6 ) > 0.0 ) { fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); exit( 1 ); } tmpaminodis[aa][aa] = (int)( *resvec / 6 ); } tmpaminodis['X']['X'] = 0; free( tmpname ); free( tmpseq ); free( resvec ); } #endif static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata ) { double *val; FILE *qfp; FILE *dfp; FILE *rfp; int i; char com[10000]; static char datafile[1000]; static char queryfile[1000]; static char resultfile[1000]; static int pid; static char *tmpseq; static char *tmpname; int slen; int res; static Scores *scoresbk = NULL; static int ninbk = 0; if( pid == 0 ) { pid = (int)getpid(); sprintf( datafile, "/tmp/data-%d", pid ); sprintf( queryfile, "/tmp/query-%d", pid ); sprintf( resultfile, "/tmp/fasta-%d", pid ); tmpseq = calloc( nlenmax+1, sizeof( char ) ); tmpname = calloc( B+1, sizeof( char ) ); } val = calloc( nin, sizeof( double ) ); // fprintf( stderr, "nin=%d, q=%d\n", nin, query ); if( rewritedata ) { scoresbk = scores; ninbk = nin; // fprintf( stderr, "\nformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); if( picks == NULL ) for( i=0; i+===========+%d ", i ); slen = scores[i].orilen; writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); } else for( i=0; i+===========+%d ", i ); slen = scores[picks[i]].orilen; writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); } fclose( dfp ); } gappick0( tmpseq, seq[scores[query].numinseq] ); sprintf( tmpname, ">+==========+%d ", 0 ); slen = scores[query].orilen; qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); // fprintf( stderr, "calling fasta, nin=%d\n", nin ); if( scoremtx == -1 ) sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile ); else sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", fastapath ); exit( 1 ); } // fprintf( stderr, "fasta done\n" ); //exit( 1 ); rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); // fprintf( stderr, "reading fasta\n" ); if( scoremtx == -1 ) res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin ); else res = ReadFasta34m10_scoreonly( rfp, val, nin ); // fprintf( stderr, "done. val[0] = %f\n", val[0] ); fclose( rfp ); #if 0 for( i=0; i %s\0", nin, queryfile, datafile, resultfile ); else sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile ); res = system( com ); if( res ) ErrorExit( "error in blast" ); rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7_scoreonly( rfp, val, nin ); fclose( rfp ); #if 0 for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'p': picksize = myatoi( *++argv ); fprintf( stderr, "picksize = %d\n", picksize ); --argc; goto nextoption; case 's': classsize = myatoi( *++argv ); fprintf( stderr, "groupsize = %d\n", classsize ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'T': tokyoripara = (double)atof( *++argv ); --argc; goto nextoption; case 'l': uselongest = 0; break; case 'n' : outnumber = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'S': doalign = 'f'; break; case 'Z': fromaln = 1; break; case 'U': doalign = 1; break; case 'x': reorder = 0; break; case 't': treeout = 1; break; case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'L': legacygapcost = 1; break; #if 0 case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 'a': alg = 'a'; break; case 'R': alg = 'R'; break; case 'Q': alg = 'Q'; break; #endif case 'A': alg = 'A'; break; case 'N': nevermemsave = 1; break; case 'M': alg = 'M'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'G': use_fft = 1; force_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; case 'J': tbutree = 0; break; case 'X': treemethod = 'X'; // tsukawareteiru ???? sueff_global = atof( *++argv ); fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; case 'E': treemethod = 'E'; // upg (average) break; case 'q': treemethod = 'q'; // minimum break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case ':': nwildcard = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static int nunknown = 0; int seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else nunknown++; } *grp = END_OF_VEC; return( grp-grpbk ); } int seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else nunknown++; } *grp = END_OF_VEC; return( grp-grpbk ); } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } static int localcommonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } #if 1 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen ) { int l, len1, len2; int clus1, clus2; double pscore, tscore; static int *fftlog; static char *indication1, *indication2; static double *effarr1 = NULL; static double *effarr2 = NULL; static char **mseq1, **mseq2; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; #if 0 int i, j; #endif if( effarr1 == NULL ) { fftlog = AllocateIntVec( nseq ); effarr1 = AllocateDoubleVec( nseq ); effarr2 = AllocateDoubleVec( nseq ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); mseq1 = AllocateCharMtx( nseq, 0 ); mseq2 = AllocateCharMtx( nseq, 0 ); for( l=0; l 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // fprintf( stdout, "mseq1 = %s\n", mseq1[0] ); // fprintf( stdout, "mseq2 = %s\n", mseq2[0] ); if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = 0; commonAlloc1 = 0; commonAlloc2 = 0; } if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); else ffttry = 0; if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "\bf" ); if( alg == 'M' ) { fprintf( stderr, "\bm" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } } else { fprintf( stderr, "\bd" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "\bm" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, NULL, NULL, NULL, 0.0, 0.0 ); break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( n_dis_consweight_multi, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = A__align( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); return; } #endif #if 0 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol { int i, l; int nlim; int clus1, clus2; nlim = nalign-1; for( l=0; l 0 ) // sprintf( outputfile, "%su%d", outputfile, uniform ); sprintf( outputfile + strlen(outputfile), "u%d", uniform ); fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile ); outfp = fopen( outputfile, "w" ); free( outputfile ); if( outfp == NULL ) { fprintf( stderr, "Cannot open %s\n", outputfile ); exit( 1 ); } for( j=0; jG%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] ); fclose( outfp ); #endif #if TREE if( treeout ) { treelen = 0; tmptree = calloc( 100, sizeof( char ) ); for( j=0; j 1 ) { *(*tree+1) = '('; *(*tree+2) = '\0'; } else { *(*tree+1) = '\0'; } for( j=0; j 1 ) strcat( *tree, ")\n" ); else strcat( *tree, "\n" ); // fprintf( stdout, "*tree = %s\n", *tree ); } #endif for( j=0; jselfscore; belongto = 0; while( i-- ) { // fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score ); if( ptr->selfscore > selfscore0 ) { selfscore0 = ptr->selfscore; belongto = ptr-scores; } ptr++; } #if 1 if( belongto != 0 ) { // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); ptr = calloc( 1, sizeof( Scores ) ); *ptr = scores[belongto]; scores[belongto] = *scores; *scores = *ptr; free( ptr ); } #endif } else { qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare ); belongto = (int)( 0.5 * nin ); // fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen ); if( belongto != 0 ) { // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); ptr = calloc( 1, sizeof( Scores ) ); *ptr = scores[belongto]; scores[belongto] = *scores; *scores = *ptr; free( ptr ); } } if( qinoya != scores->numinseq ) // if( 1 || qinoya != scores->numinseq ) { // fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt ); if( doalign ) { if( doalign == 'f' ) { blastresults = callfasta( seq, scores, nin, NULL, 0, 1 ); if( scores->selfscore != (int)blastresults[0] ) { fprintf( stderr, "\n\nWARNING1: selfscore\n" ); fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 ); fprintf( stderr, "scores->orilen = %d\n", scores->orilen ); fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] ); // if( abs( scores->selfscore - (int)blastresults[0] ) > 2 ) // exit( 1 ); // scores->selfscore = (int)blastresults[0]; //iinoka? // exit( 1 ); } } else gappick0( mseq1[0], seq[scores->numinseq] ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, scores[0].pointt ); } selfscore0 = scores[0].selfscore; for( i=0; iorilen > scores[i].orilen ) { longer = (double)scores->orilen; shorter = (double)scores[i].orilen; } else { longer = (double)scores[i].orilen; // nai shorter = (double)scores->orilen; //nai } #if LENFAC lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); #else lenfac = 1.0; #endif if( doalign ) { if( doalign == 'f' ) { scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1; if( scores[i].score < 0.0 ) scores[i].score = 0.0; } else { if( fromaln ) { // scores[i].score = ( 1.0 - (double)G__align11_noalign( n_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; } else { if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin ); gappick0( mseq2[0], seq[scores[i].numinseq] ); // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); scores[i].score = ( 1.0 - (double)G__align11_noalign( n_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; // fprintf( stderr, "scores[i] = %f\n", scores[i].score ); // fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] ); // fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] ); // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); } } } else { scores[i].score = ( 1.0 - (double)localcommonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac; if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST; } // if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen ); } if( doalign == 'f' ) free( blastresults ); if( doalign == 0 ) free( table1 ); //exit( 1 ); } // fprintf( stderr, "sorting .. " ); qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare ); // fprintf( stderr, "done.\n" ); maxdist = scores[nin-1].score; if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai. { if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) ) { maxdist = 0.0; } // fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 ); } // fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin ); if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score ); // kokoni if( nin < 2 || ... ) picks = AllocateIntVec( nin+1 ); s_p_map = AllocateIntVec( nin+1 ); s_y_map = AllocateIntVec( nin+1 ); pickkouho = AllocateIntVec( nin+1 ); closeh = AllocateIntVec( nin+1 ); // nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq ); // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu // fprintf( stderr, "selecting kouhos phase 2\n" ); // if( nkouho == 0 ) // { // fprintf( stderr, "selecting kouhos, phase 2\n" ); // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // } // fprintf( stderr, "\ndone\n\n" ); for( i=0; i 0 ) { // fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] ); // fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] ); picktmp = pickkouho[nkouho-1]; // fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho ); nkouho--; if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) ) { // fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq ); // fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] ); ; } else { *iptr++ = picktmp; npick++; // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq ); } } i = 1; while( npick0 ) { if( i ) { i = 0; rn = nkouho * 0.5; // fprintf( stderr, "rn = %d\n", rn ); } else { rn = rnd() * (nkouho); } picktmp = pickkouho[rn]; // fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho ); // fprintf( stderr, "#kouho before swap\n" ); // for( i=0; i 2 ) // exit( 1 ); // scores->selfscore = (int)blastresults[0]; //iinoka? } } else gappick0( mseq1[0], seq[scores[picks[j]].numinseq] ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, scores[picks[j]].pointt ); } selfscore0 = scores[picks[j]].selfscore; pickmtx[j][0] = 0.0; for( i=j+1; i scores[picks[i]].orilen ) { longer = (double)scores[picks[j]].orilen; shorter = (double)scores[picks[i]].orilen; } else { longer = (double)scores[picks[i]].orilen; shorter = (double)scores[picks[j]].orilen; } #if LENFAC lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); #else lenfac = 1.0; #endif if( doalign ) { if( doalign == 'f' ) { pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0; } else { if( fromaln ) { fprintf( stderr, "%d-%d/%d\r", j, i, npick ); pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11( orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; } else { // fprintf( stderr, "\r%d / %d ", i, nin ); gappick0( mseq2[0], seq[scores[picks[i]].numinseq] ); pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( n_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score ); } } } else { pickmtx[j][i-j] = ( 1.0 - (double)localcommonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac; if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST; } } if( doalign == 'f' ) free( blastresults ); if( doalign == 0 ) free( table1 ); } dfromcp = AllocateDoubleMtx( npick, nin ); dfromc = AllocateDoubleMtx( npick, 0 ); for( i=0; i 2 ) { double avdist; double avdist1; double avdist2; double maxavdist; int splinter; int count; int dochokoho; splinter = 0; int *docholist; int *docholistbk; maxavdist = 0.0; for( i=0; i 1 ) { nyuko = 2; yukos[0] = picks[0]; yukos[1] = picks[1]; closeh[picks[0]] = yukos[0]; closeh[picks[1]] = yukos[1]; } else { nyuko = 1; yukos[0] = picks[0]; closeh[picks[0]] = yukos[0]; } #elif HUKINTOTREE if( npick > 2 ) { #if 0 double avdist; double maxavdist; int count; int splinter; maxavdist = 0.0; splinter=0; for( i=0; i, npick=%d members = \n", npick ); // for( i=0; i %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun ); #if 0 if( scores[picks[i]].orilen > scores[picks[j]].orilen ) { fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 ); tsukau[j] = 0; } else { fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 ); tsukau[i] = 0; } if( 0 && j == npick-1 ) tsukau[i] = 0; else tsukau[j] = 0; fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 ); #else tsukau[j] = 0; closeh[picks[j]] = closeh[picks[i]]; // fprintf( stderr, "%d => tsukawanai\n", j ); #endif } } } } for( ii=0,i=0; i yukomtx[i][j-i] ) minscoreinpick[i] = yukomtx[i][j-i]; } for( j=0; j yukomtx[j][i-j] ) minscoreinpick[i] = yukomtx[j][i-j]; } fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] ); } #endif #if TREE if( treeout ) { children = calloc( nyuko+1, sizeof( char * ) ); for( i=0; iselfscore ); // fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] ); uniform = -1; for( j=0; j scores[j].orilen ) { longer = scores[yukos[i]].orilen; shorter = scores[j].orilen; } else { shorter = scores[yukos[i]].orilen; longer = scores[j].orilen; } #if LENFAC // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA ); // fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen ); #else lenfac = 1.0; #endif #if 0 // iihazu -> dame ii = s_y_map[j]; jj=s_y_map[yukos[i]]; if( ii != -1 && jj != -1 ) { if( dfromc[ii][yukos[jj]] != -0.5 ) { dfromc[i][j] = dfromc[ii][yukos[jj]]; } else { if( ii > jj ) { kk = jj; jj = ii; ii = kk; } dfromc[ii][yukos[jj]] = dfromc[i][j] = yukomtx[ii][jj-ii]; } } else #else if( dfromc[i][j] == -0.5 ) #endif { if( doalign ) { if( doalign == 'f' ) { dfromc[i][j] = ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1; if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0; } else { if( fromaln ) { dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; } else { gappick0( mseq2[0], seq[scores[j].numinseq] ); dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( n_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; } } } else { dfromc[i][j] = ( 1.0 - (double)localcommonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac; if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST; } } // fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] ); // if( i == 1 ) // fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] ); } // fprintf( stderr, "i=%d, freeing\n", i ); if( !doalign ) free( table1 ); if( doalign && doalign == 'f' ) free( blastresults ); } fprintf( stderr, " \r" ); for( i=0; iselfscore, scores->orilen, scores[nin-1].orilen, nin ); for( j=0; j%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 ); } else // if( belongto == -1 ) #else belongto = s_y_map[j]; if( belongto == -1 ) #endif { belongto = 0; // default ha horyu minscore = dfromc[0][j]; for( i=0; i ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 ) // if( rnd() < 0.5 ) // CHUUI !!!!! { // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] ); minscore = dfromc[i][j]; belongto = i; } } } #if 0 if( dfromc[belongto][j] > minscoreinpick[belongto] ) { fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); belongto = npick; } else fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); #endif // fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] ); // fprintf( stderr, "numin = %d\n", numin[belongto] ); outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) ); outs[belongto][numin[belongto]] = scores[j]; numin[belongto]++; } free( dfromcp ); FreeDoubleMtx( dfromc ); // fprintf( stderr, "##### npick = %d\n", npick ); // fprintf( stderr, "##### nyuko = %d\n", nyuko ); if( nyuko > 2 ) { fprintf( stderr, "upgma " ); // veryfastsupg_double_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len ); fixed_musclesupg_double_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL, 1, 1 ); fprintf( stderr, "\r \r" ); } else { topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) ); topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) ); topol[0][0][0] = 0; topol[0][0][1] = -1; topol[0][1][0] = 1; topol[0][1][1] = -1; } FreeFloatHalfMtx( yukomtx, npick ); #if 0 ii = nyuko-1; fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko ); for( j=0; j 1 ) { fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] ); for( j=0; jnuminseq ); } for( i=0; i= classsize || !aligned ) val = 0; else val = 1; if( nyuko > 1 ) { int *mem1p, *mem2p; int mem1size, mem2size; int v1 = 0, v2 = 0, v3 = 0; int nlim; int l; static int *mem1 = NULL; static int *mem2 = NULL; char **parttree = NULL; // by Mathog #if TREE if( treeout ) { parttree = (char **)calloc( nyuko, sizeof( char * ) ); for( i=0; inuminseq; } } *mptr = -1; mem2p = topol[l][1]; mptr = mem2; mem2size = 0; while( *mem2p != -1 ) { // fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] ); i = numin[*mem2p]; ptr = outs[*(mem2p++)]; mem2size += i; while( i-- ) { *mptr++ = (ptr++)->numinseq; } } *mptr = -1; qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare ); qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare ); // selhead( mem1, numin[0] ); // selhead( mem2, numin[1] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, "mem1 (nin=%d) = \n", nin ); for( i=0; ; i++ ) { fprintf( stderr, "%d ", mem1[i]+1 ); if( mem1[i] == -1 ) break; } fprintf( stderr, "\n" ); fprintf( stderr, "mem2 (nin=%d) = \n", nin ); for( i=0; ; i++ ) { fprintf( stderr, "%d ", mem2[i]+1 ); if( mem2[i] == -1 ) break; } fprintf( stderr, "\n" ); #endif #if 0 fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size ); fprintf( stderr, "before alignment\n" ); for( j=0; j v2 ) { v3 = v1; v1 = v2; v2 = v3; } // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 ); // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 ); // fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] ); sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] ); strcpy( parttree[v1], *tree ); // fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] ); // fprintf( stderr, "*tree = %s\n", *tree ); free( parttree[v2] ); parttree[v2] = NULL; } #endif #if 0 fprintf( stderr, "after alignment\n" ); for( j=0; j maxdepth ) maxdepth = *depthpt; (*depthpt)++; } } else { #if TREE if( treeout ) { sprintf( *tree, "%s", children[0] ); free( children[0] ); free( children ); } #endif } for( i=0; i njob ) tokyoripara = 0.0; alloclen = nlenmax * 2; name = AllocateCharMtx( njob, B+1 ); if( classsize == 1 ) seq = AllocateCharMtx( njob, 0 ); else seq = AllocateCharMtx( njob, alloclen+1 ); nlen = AllocateIntVec( njob ); tmpseq = calloc( nlenmax+1, sizeof( char ) ); pointt = AllocateIntMtx( njob, 0 ); grpseq = AllocateIntVec( nlenmax + 1 ); order = (int *)calloc( njob + 1, sizeof( int ) ); whichgroup = (int *)calloc( njob, sizeof( int ) ); weight = (double *)calloc( njob, sizeof( double ) ); fprintf( stderr, "alloclen = %d in main\n", alloclen ); for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { if( seq_grp_nuc( grpseq, tmpseq ) < 6 ) { fprintf( stderr, "Seq %d, too short.\n", i+1 ); fprintf( stderr, "name = %s\n", name[i] ); fprintf( stderr, "seq = %s\n", seq[i] ); exit( 1 ); // continue; } makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { if( seq_grp( grpseq, tmpseq ) < 6 ) { fprintf( stderr, "Seq %d, too short.\n", i+1 ); fprintf( stderr, "name = %s\n", name[i] ); fprintf( stderr, "seq = %s\n", seq[i] ); exit( 1 ); // continue; } makepointtable( pointt[i], grpseq ); } // fprintf( stdout, ">%s\n", name[i] ); // fprintf( stdout, "%s\n", seq[i] ); } if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown ); // exit( 1 ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } pid = (int)getpid(); sprintf( datafile, "/tmp/data-%d", pid ); sprintf( queryfile, "/tmp/query-%d", pid ); sprintf( resultfile, "/tmp/fasta-%d", pid ); scores = (Scores *)calloc( njob, sizeof( Scores ) ); // fprintf( stderr, "\nCalculating i-i scores ... \n" ); for( i=0; iselfscore = %d\n", i, scores[i].selfscore ); free( blastresults ); #else pscore = 0; if( scoremtx == -1 ) { st = 1; en = 0; for( pt=seq[i]; *pt; pt++ ) { if( *pt == 'u' ) *pt = 't'; aan = amino_n[(int)*pt]; if( aan<0 || aan >= 4 ) *pt = 'n'; if( *pt == 'n' ) { en++; if( st ) continue; else pscore += tmpaminodis[(int)*pt][(int)*pt]; } else { st = 0; en = 0; pscore += tmpaminodis[(int)*pt][(int)*pt]; } } scores[i].selfscore = pscore - en * tmpaminodis['n']['n']; } else { st = 1; en = 0; for( pt=seq[i]; *pt; pt++ ) { aan = amino_n[(int)*pt]; if( aan<0 || aan >= 20 ) *pt = 'X'; if( *pt == 'X' ) { en++; if( st ) continue; else pscore += tmpaminodis[(int)*pt][(int)*pt]; } else { st = 0; en = 0; pscore += tmpaminodis[(int)*pt][(int)*pt]; } } scores[i].selfscore = pscore - en * tmpaminodis['X']['X']; } #endif } else { pscore = 0; for( pt=seq[i]; *pt; pt++ ) { // pscore += amino_dis[(int)*pt][(int)*pt]; pscore += amino_dis[(int)*pt][(int)*pt]; } scores[i].selfscore = pscore; } // fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, pointt[i] ); scores[i].selfscore = localcommonsextet_p( table1, pointt[i] ); free( table1 ); } } if( tmpaminodis ) FreeIntMtx( tmpaminodis ); depth = 0; #if TREE if( treeout ) { tree = (char **)calloc( 1, sizeof( char *) ); *tree = NULL; // splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight ); completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) ); if( inputfile ) sprintf( treefile, "%s.tree", inputfile ); else sprintf( treefile, "splittbfast.tree" ); treefp = fopen( treefile, "w" ); fprintf( treefp, "%s\n", *tree ); fclose( treefp ); } else completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); #else completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); #endif fprintf( stderr, "\nDone.\n\n" ); #if 1 groupnum = 0; groupid = -1; paramem = NULL; npara = 0; for( i=0; i 1 && classsize > 2 ) { qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); // selhead( paramem, npara ); alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); } free( paramem ); paramem = NULL; npara = 0; } sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 ); } else { paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) ); paramem[npara++] = pos; sprintf( tmpname, "Group-para %s", name[pos]+1 ); } tmpname[B-1] = 0; if( classsize > 1 && classsize <= njob ) strcpy( name[pos]+1, tmpname ); } if( paramem ) { paramem[npara] = -1; if( npara > 1 && classsize > 2 ) { qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); // selhead( paramem, npara ); alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); } free( paramem ); paramem = NULL; npara = 0; } #else for( i=0; i 1 ) { fprintf( stderr, "\n\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize ); fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum ); fprintf( stderr, "Note that the alignment is not completed.\n" ); if( reorder ) fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); #if TREE if( treeout ) { fprintf( stderr, "\n" ); fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); } // else // { // fprintf( stderr, "To output guide tree,\n" ); // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); // } #endif if( !doalign ) { fprintf( stderr, "\n" ); fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); } else { fprintf( stderr, "\n\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "nseq = %d\n", njob ); fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize ); // fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" ); // fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" ); // fprintf( stderr, "please try \n" ); // fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" ); // fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" ); // fprintf( stderr, "and performs only intra-group alignments.\n" ); if( reorder ) fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); #if TREE if( treeout ) { fprintf( stderr, "\n" ); fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); } // else // { // fprintf( stderr, "To output guide tree,\n" ); // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); // } #endif if( !doalign || fromaln ) { fprintf( stderr, "\n" ); fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); } #if TREE if( treeout ) free( treefile ); #endif #if 0 fprintf( stdout, "weight =\n" ); for( i=0; i value ) value = cand; return( value ); } void calcNaiseki( Fukusosuu *value, Fukusosuu *x, Fukusosuu *y ) { value->R = x->R * y->R + x->I * y->I; value->I = -x->R * y->I + x->I * y->R; } Fukusosuu *AllocateFukusosuuVec( int l1 ) { Fukusosuu *value; value = (Fukusosuu *)calloc( l1, sizeof( Fukusosuu ) ); if( !value ) { fprintf( stderr, "Cannot allocate %d FukusosuuVec\n", l1 ); return( NULL ); } return( value ); } Fukusosuu **AllocateFukusosuuMtx( int l1, int l2 ) { Fukusosuu **value; int j; // fprintf( stderr, "allocating %d x %d FukusosuuMtx\n", l1, l2 ); value = (Fukusosuu **)calloc( l1+1, sizeof( Fukusosuu * ) ); if( !value ) { fprintf( stderr, "Cannot allocate %d x %d FukusosuuVecMtx\n", l1, l2 ); exit( 1 ); } for( j=0; j max ) { ikouho = i; max = tmp; } } #if 0 if( max < 0.15 ) { break; } #endif #if 0 fprintf( stderr, "Kouho No.%d, pos=%d, score=%f, lag=%d\n", j, ikouho, soukan[ikouho], ikouho-nlen4 ); #endif soukan[ikouho] = -9999.9; kouho[j] = ( ikouho - nlen4 ); } return( j ); } void zurasu2( int lag, int clus1, int clus2, char **seq1, char **seq2, char **aseq1, char **aseq2 ) { int i; #if 0 fprintf( stderr, "### lag = %d\n", lag ); #endif if( lag > 0 ) { for( i=0; i 0 ) { for( i=0; i=0; j-- ) { if( prf1[j] ) { hat1[pre1] = j; pre1 = j; } if( prf2[j] ) { hat2[pre2] = j; pre2 = j; } } hat1[pre1] = -1; hat2[pre2] = -1; /* make site score */ stra[i] = 0.0; for( k=hat1[nalphabets]; k!=-1; k=hat1[k] ) for( j=hat2[nalphabets]; j!=-1; j=hat2[j] ) // stra[i] += n_dis[k][j] * prf1[k] * prf2[j]; stra[i] += n_disFFT[k][j] * prf1[k] * prf2[j]; stra[i] /= totaleff; } (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; for( j=0; j threshold ) { #if 0 seg->start = i; seg->end = i; seg->center = ( seg->start + seg->end + fftWinSize ) / 2 ; seg->score = score; status = 0; value++; #else if( !status ) { status = 1; starttmp = i; length = 0; cumscore = 0.0; } length++; cumscore += score; #endif } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { if( length > fftWinSize ) { seg->start = starttmp; seg->end = i; seg->center = ( seg->start + seg->end + fftWinSize ) / 2 ; seg->score = cumscore; #if 0 fprintf( stderr, "%d-%d length = %d, score = %f, value = %d\n", seg->start, seg->end, length, cumscore, value ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } value++; seg++; } length = 0; cumscore = 0.0; status = 0; starttmp = i; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status && length > fftWinSize ) { seg->end = i; seg->start = starttmp; seg->center = ( starttmp + i + fftWinSize ) / 2 ; seg->score = cumscore; #if 0 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } #if TMPTMPTMP exit( 0 ); #endif // fprintf( stderr, "returning %d\n", value ); return( value ); } static int permit( Segment *seg1, Segment *seg2 ) { return( 0 ); if( seg1->end >= seg2->start ) return( 0 ); if( seg1->pair->end >= seg2->pair->start ) return( 0 ); else return( 1 ); } void blockAlign2( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ) { int i, j, k, shift, cur1, cur2, count, klim; static TLS int crossscoresize = 0; static TLS int *result1 = NULL; static TLS int *result2 = NULL; static TLS int *ocut1 = NULL; static TLS int *ocut2 = NULL; double maximum; static TLS double **crossscore = NULL; static TLS int **track = NULL; static TLS double maxj, maxi; static TLS int pointj, pointi; if( cut1 == NULL) { if( result1 ) { if( result1 ) free( result1 ); result1 = NULL; if( result2 ) free( result2 ); result2 = NULL; if( ocut1 ) free( ocut1 ); ocut1 = NULL; if( ocut2 ) free( ocut2 ); ocut2 = NULL; if( track ) FreeIntMtx( track ); track = NULL; if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = NULL; } crossscoresize = 0; return; } if( result1 == NULL ) { result1 = AllocateIntVec( MAXSEG ); result2 = AllocateIntVec( MAXSEG ); ocut1 = AllocateIntVec( MAXSEG ); ocut2 = AllocateIntVec( MAXSEG ); } if( crossscoresize < *ncut+2 ) { crossscoresize = *ncut+2; if( fftkeika ) fprintf( stderr, "allocating crossscore and track, size = %d\n", crossscoresize ); if( track ) FreeIntMtx( track ); if( crossscore ) FreeDoubleMtx( crossscore ); track = AllocateIntMtx( crossscoresize, crossscoresize ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } #if 0 for( i=0; i<*ncut-2; i++ ) fprintf( stderr, "%d.start = %d, score = %f\n", i, seg1[i]->start, seg1[i]->score ); for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%#4.0f ", ocrossscore[i][j] ); fprintf( stderr, "\n" ); } #endif for( i=0; i<*ncut; i++ ) for( j=0; j<*ncut; j++ ) /* mudadanaa */ crossscore[i][j] = ocrossscore[i][j]; for( i=0; i<*ncut; i++ ) { ocut1[i] = cut1[i]; ocut2[i] = cut2[i]; } for( i=1; i<*ncut; i++ ) { #if 0 fprintf( stderr, "### i=%d/%d\n", i,*ncut ); #endif for( j=1; j<*ncut; j++ ) { pointi = 0; maxi = 0.0; klim = j-2; for( k=0; k maxj ) { pointi = k; maxi = crossscore[i-1][k]; } } pointj = 0; maxj = 0.0; klim = i-2; for( k=0; k maxj ) { pointj = k; maxj = crossscore[k][j-1]; } } maxi += penalty; maxj += penalty; maximum = crossscore[i-1][j-1]; track[i][j] = 0; if( maximum < maxi ) { maximum = maxi ; track[i][j] = j - pointi; } if( maximum < maxj ) { maximum = maxj ; track[i][j] = pointj - i; } crossscore[i][j] += maximum; } } #if 0 for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%3d ", track[i][j] ); fprintf( stderr, "\n" ); } #endif result1[MAXSEG-1] = *ncut-1; result2[MAXSEG-1] = *ncut-1; for( i=MAXSEG-1; i>=1; i-- ) { cur1 = result1[i]; cur2 = result2[i]; if( cur1 == 0 || cur2 == 0 ) break; shift = track[cur1][cur2]; if( shift == 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - 1; continue; } else if( shift > 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - shift; } else if( shift < 0 ) { result1[i-1] = cur1 + shift; result2[i-1] = cur2 - 1; } } count = 0; for( j=i; j ocrossscore[result1[j-1]][result2[j-1]] ) count--; cut1[count] = ocut1[result1[j]]; cut2[count] = ocut2[result2[j]]; count++; } *ncut = count; #if 0 for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); #endif } void blockAlign3( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ) // memory complexity = O(n^3), time complexity = O(n^2) { int i, j, shift, cur1, cur2, count; static TLS int crossscoresize = 0; static TLS int jumpposi, *jumppos; static TLS double jumpscorei, *jumpscore; static TLS int *result1 = NULL; static TLS int *result2 = NULL; static TLS int *ocut1 = NULL; static TLS int *ocut2 = NULL; double maximum; static TLS double **crossscore = NULL; static TLS int **track = NULL; if( result1 == NULL ) { result1 = AllocateIntVec( MAXSEG ); result2 = AllocateIntVec( MAXSEG ); ocut1 = AllocateIntVec( MAXSEG ); ocut2 = AllocateIntVec( MAXSEG ); } if( crossscoresize < *ncut+2 ) { crossscoresize = *ncut+2; if( fftkeika ) fprintf( stderr, "allocating crossscore and track, size = %d\n", crossscoresize ); if( track ) FreeIntMtx( track ); if( crossscore ) FreeDoubleMtx( crossscore ); if( jumppos ) FreeIntVec( jumppos ); if( jumpscore ) FreeDoubleVec( jumpscore ); track = AllocateIntMtx( crossscoresize, crossscoresize ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); jumppos = AllocateIntVec( crossscoresize ); jumpscore = AllocateDoubleVec( crossscoresize ); } #if 0 for( i=0; i<*ncut-2; i++ ) fprintf( stderr, "%d.start = %d, score = %f\n", i, seg1[i]->start, seg1[i]->score ); for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%#4.0f ", ocrossscore[i][j] ); fprintf( stderr, "\n" ); } #endif for( i=0; i<*ncut; i++ ) for( j=0; j<*ncut; j++ ) /* mudadanaa */ crossscore[i][j] = ocrossscore[i][j]; for( i=0; i<*ncut; i++ ) { ocut1[i] = cut1[i]; ocut2[i] = cut2[i]; } for( j=0; j<*ncut; j++ ) { jumpscore[j] = -999.999; jumppos[j] = -1; } for( i=1; i<*ncut; i++ ) { jumpscorei = -999.999; jumpposi = -1; for( j=1; j<*ncut; j++ ) { #if 1 fprintf( stderr, "in blockalign3, ### i=%d, j=%d\n", i, j ); #endif #if 0 for( k=0; k maxj ) { pointi = k; maxi = crossscore[i-1][k]; } } pointj = 0; maxj = 0.0; for( k=0; k maxj ) { pointj = k; maxj = crossscore[k][j-1]; } } maxi += penalty; maxj += penalty; #endif maximum = crossscore[i-1][j-1]; track[i][j] = 0; if( maximum < jumpscorei && permit( seg1[jumpposi], seg1[i] ) ) { maximum = jumpscorei; track[i][j] = j - jumpposi; } if( maximum < jumpscore[j] && permit( seg2[jumppos[j]], seg2[j] ) ) { maximum = jumpscore[j]; track[i][j] = jumpscore[j] - i; } crossscore[i][j] += maximum; if( jumpscorei < crossscore[i-1][j] ) { jumpscorei = crossscore[i-1][j]; jumpposi = j; } if( jumpscore[j] < crossscore[i][j-1] ) { jumpscore[j] = crossscore[i][j-1]; jumppos[j] = i; } } } #if 0 for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%3d ", track[i][j] ); fprintf( stderr, "\n" ); } #endif result1[MAXSEG-1] = *ncut-1; result2[MAXSEG-1] = *ncut-1; for( i=MAXSEG-1; i>=1; i-- ) { cur1 = result1[i]; cur2 = result2[i]; if( cur1 == 0 || cur2 == 0 ) break; shift = track[cur1][cur2]; if( shift == 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - 1; continue; } else if( shift > 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - shift; } else if( shift < 0 ) { result1[i-1] = cur1 + shift; result2[i-1] = cur2 - 1; } } count = 0; for( j=i; j ocrossscore[result1[j-1]][result2[j-1]] ) count--; cut1[count] = ocut1[result1[j]]; cut2[count] = ocut2[result2[j]]; count++; } *ncut = count; #if 0 for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); #endif } mafft-7.505-without-extensions/core/mafft.h0000644000175000017500000000030614224501721020246 0ustar nileshnileshextern int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, char **argv, int (*callback)(int, int, char*)); #define GUI_ERROR 1 #define GUI_LENGTHOVER 2 #define GUI_CANCEL 3 mafft-7.505-without-extensions/core/mafft-homologs.10000644000175000017500000001070614224501721022011 0ustar nileshnilesh.\" Title: MAFFT-HOMOLOGS .\" Author: Kazutaka Katoh .\" Generator: DocBook XSL Stylesheets v1.72.0 .\" Date: 2007-08-14 .\" Manual: Mafft Manual .\" Source: mafft-homologs 2.1 .\" .TH "MAFFT\-HOMOLOGS" "1" "2007\-06\-09" "mafft\-homologs 2.1" "Mafft Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" .RS 0 mafft\-homologs \- aligns sequences together with homologues automatically collected from SwissProt via NCBI BLAST .RE .SH "SYNOPSIS" .RS 0 \fBmafft\-homologs\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] .RE .SH "DESCRIPTION" .RS 0 The accuracy of an alignment of a few distantly related sequences is considerably improved when being aligned together with their close homologs. The reason for the improvement is probably the same as that for PSI\-BLAST. That is, the positions of highly conserved residues, those with many gaps and other additional information is brought by close homologs. According to Katoh et al. (2005), the improvement by adding close homologs is 10% or so, which is comparable to the improvement by incorporating structural information of a pair of sequences. Mafft\-homologs in a mafft server works like this: .sp .RS 4 \h'-04' 1.\h'+02'Collect a number (50 by default) of close homologs (E=1e\-10 by default) of the input sequences. .RE .sp .RS 4 \h'-04' 2.\h'+02'Align the input sequences and homologs all together using the L\-INS\-i strategy. .RE .sp .RS 4 \h'-04' 3.\h'+02'Remove the homologs. .RE .RE .SH "OPTIONS" .RS 0 .PP \fB\-a\fR \fI\fIn\fR\fR .RS 4 The number of collected sequences (default: 50). .RE .PP \fB\-e\fR \fI\fIn\fR\fR .RS 4 Threshold value (default: 1e\-10). .RE .PP \fB\-o\fR \fI\fIxxx\fR\fR .RS 4 Options for mafft (default: " \-\-op 1.53 \-\-ep 0.123 \-\-maxiterate 1000 --localpair --reorder"). .RE .PP \fB\-l\fR .RS 4 Locally carries out BLAST searches instead of NCBI BLAST (requires locally installed BLAST and a database). .RE .PP \fB\-f\fR .RS 4 Outputs collected homologues also (default: off). .RE .PP \fB\-w\fR .RS 4 entire sequences are subjected to BLAST search (default: well\-aligned region only) .RE .RE .SH "REQUIREMENTS" .RS 0 .PP MAFFT version > 5.58. .PP Either of .RS 4 .PP lynx (when remote BLAST server is used) .PP BLAST and a protein sequence database (when local BLAST is used) .RE .RE .SH "REFERENCES" .RS 0 .PP Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment. .RE .SH "SEE ALSO" .RS 0 .PP \fBmafft\fR(1) .RE .SH "AUTHORS" .RS 0 .PP \fBKazutaka Katoh\fR <\&katoh_at_bioreg.kyushu\-u.ac.jp.\&> .sp -1n .IP "" 4 Wrote Mafft. .PP \fBCharles Plessy\fR <\&charles\-debian\-nospam@plessy.org\&> .sp -1n .IP "" 4 Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. .RE .SH "COPYRIGHT" .RS 0 Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) .br Copyright \(co 2007 Charles Plessy (this manpage) .br .PP Mafft and its manpage are offered under the following conditions: .PP Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: .sp .RS 4 \h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. .RE .sp .RS 4 \h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. .RE .sp .RS 4 \h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. .RE .PP THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .br .RE mafft-7.505-without-extensions/core/mtxutl.h0000644000175000017500000000266314224501721020516 0ustar nileshnileshvoid MtxuntDouble( double **, int ); void MtxmltDouble( double **, double **, int ); char *AllocateCharVec( int ); void FreeCharVec( char * ); char **AllocateCharMtx( int, int); void ReallocateCharMtx( char **, int, int); void FreeCharMtx( char ** ); double *AllocateFloatVec( int ); void FreeFloatVec( double * ); double **AllocateFloatHalfMtx( int ); double **AllocateFloatMtx( int, int ); void FreeFloatHalfMtx( double **, int ); void FreeFloatMtx( double ** ); double **AlocateFloatTri( int ); void FreeFloatTri( double ** ); int *AllocateIntVec( int ); int *AllocateIntVecLarge( unsigned long long ); void FreeIntVec( int * ); int **AllocateIntMtx( int, int ); int **AllocateIntMtxLarge( unsigned long long, unsigned long long ); void FreeIntMtx( int ** ); char ***AllocateCharCub( int, int, int ); void FreeCharCub( char *** ); int ***AllocateIntCub( int, int, int ); void FreeIntCub( int *** ); double *AllocateDoubleVec( int ); void FreeDoubleVec( double * ); double **AllocateDoubleHalfMtx( int ); double **AllocateDoubleMtx( int, int ); void FreeDoubleHalfMtx( double **, int ); void FreeDoubleMtx( double ** ); double ***AllocateDoubleCub( int, int, int ); void FreeDoubleCub( double *** ); double ***AllocateFloatCub( int, int, int ); void FreeFloatCub( double *** ); short *AllocateShortVec( int ); void FreeShortVec( short * ); short **AllocateShortMtx( int, int ); void FreeShortMtx( short ** ); void freeintmtx( int **, int ); mafft-7.505-without-extensions/core/pairlocalalignmain.c0000644000175000017500000000033514224501721022774 0ustar nileshnilesh#include "mltaln.h" int main( int argc, char **argv ) { int res = pairlocalalign( 0, 0, NULL, NULL, NULL, NULL, argc, argv, NULL ); if( res == GUI_CANCEL ) res = 0; // treeout de goto chudan wo riyousuru return res; } mafft-7.505-without-extensions/core/dndpre.c0000644000175000017500000002612614224501721020430 0ustar nileshnilesh#include "mltaln.h" #define TEST 0 static int treeout = 0; static int maxdist = 1; #ifdef enablemultithread typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _thread_arg { int njob; int thread_no; double *selfscore; double **mtx; char **seq; int **skiptable; Jobtable *jobpospt; pthread_mutex_t *mutex; } thread_arg_t; #if 0 void *athread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; double *selfscore = targ->selfscore; double **mtx = targ->mtx; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; int i, j; double ssi, ssj, bunbo; double mtxv; if( njob == 1 ) return( NULL ); while( 1 ) { pthread_mutex_lock( targ->mutex ); j = jobpospt->j; i = jobpospt->i; j++; // fprintf( stderr, "\n i=%d, j=%d before check\n", i, j ); if( j == njob ) { // fprintf( stderr, "\n j = %d, i = %d, njob = %d\n", j, i, njob ); fprintf( stderr, "%4d/%4d (thread %4d), dndpre\r", i+1, njob, thread_no ); i++; j = i + 1; if( i == njob-1 ) { // fprintf( stderr, "\n i=%d, njob-1=%d\n", i, njob-1 ); pthread_mutex_unlock( targ->mutex ); return( NULL ); } } // fprintf( stderr, "\n i=%d, j=%d after check\n", i, j ); jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex ); ssi = selfscore[i]; ssj = selfscore[j]; bunbo = MIN( ssi, ssj ); if( bunbo == 0.0 ) mtxv = maxdist; else { // mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo ); if( usenaivescoreinsteadofalignmentscore ) mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], 0.0 ) / bunbo ); else mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty ) / bunbo ); } #if 1 if( mtxv < 0.0 ) { reporterr( "WARNING: negative distance, mtxv = %f\n", mtxv ); mtxv = 0.0; } if( mtxv > 9.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j] = mtxv; } } #else void *athread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; double *selfscore = targ->selfscore; double **mtx = targ->mtx; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; int i, j; double ssi, ssj, bunbo; double mtxv; if( njob == 1 ) return( NULL ); while( 1 ) { pthread_mutex_lock( targ->mutex ); i = jobpospt->i; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } jobpospt->i += 1; pthread_mutex_unlock( targ->mutex ); if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); ssi = selfscore[i]; for( j=i+1; j 9.9 ) { fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv ); mtxv = 9.9; // exit( 1 ); // 2016/Aug/3 } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j] = mtxv; } } } #endif #endif void arguments( int argc, char *argv[] ) { int c; nadd = 0; nthread = 1; alg = 'X'; fmodel = 0; treeout = 0; scoremtx = 1; nblosum = 62; dorp = NOTSPECIFIED; inputfile = NULL; ppenalty = NOTSPECIFIED; //? ppenalty_ex = NOTSPECIFIED; //? poffset = NOTSPECIFIED; //? kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; usenaivescoreinsteadofalignmentscore = 0; nwildcard = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'Z': usenaivescoreinsteadofalignmentscore = 1; break; case 't': treeout = '1'; break; case 'D': dorp = 'd'; break; case 'a': fmodel = 1; break; case 'P': dorp = 'p'; break; case ':': nwildcard = 1; break; case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame. break; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; // fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; // fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'M': maxdist = myatoi( *++argv ); // fprintf( stderr, "maxdist = %d\n", maxdist ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); // fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char **argv ) { int i, j, ilim; char **seq; static char **name; int *nlen; double *selfscore; double **mtx; double mtxv; FILE *fp; FILE *infp; double ssi, ssj, bunbo; int **skiptable = NULL; char c; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( infp ); #endif if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } rewind( infp ); njob -= nadd; // atarashii hairetsu ha mushi seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); mtx = AllocateDoubleMtx( njob, njob ); selfscore = AllocateFloatVec( njob ); nlen = AllocateIntVec( njob ); #if 0 FRead( stdin, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); #endif fclose( infp ); for( i=1; i 0 ) { thread_arg_t *targ; Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 9.0 ) { fprintf( stderr, "WARNING: Distance %d-%d is strange, %f.\n", i, j, mtxv ); mtxv = 9.9; // exit( 1 ); // 2016/Aug/3 } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j] = mtxv; } } } #endif #if TEST for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; --argc; goto nextoption; case 'a': alignmentfile = *++argv; --argc; goto nextoption; case 'z': // add2ndharfarg wo tsukau tame. mapout = 2; break; case 'Z': // add2ndharfarg wo tsukau tame. mapout = 1; break; case 'p': // add2ndharfarg wo tsukau tame. break; case 'Y': keeplength = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *alfp; FILE *dlfp; char **name; char **aname; char **oname; char **seq; char **aseq; int *nlen; int *oripos; char *npt, *npt0, *npt2, *pt, *pt2; int i, o, l, prelen; int nlenmin; int njobs, njoba; // int **dlist; // int *ndel; char *gett; char *insname; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( alignmentfile ) { alfp = fopen( alignmentfile, "r" ); if( !alfp ) { fprintf( stderr, "Cannot open %s\n", alignmentfile ); exit( 1 ); } } else { fprintf( stderr, "No alignment is given.\n" ); exit( 1 ); } if( keeplength ) { dlfp = fopen( "_deletelist", "r" ); if( !dlfp ) { fprintf( stderr, "Cannot open _deletefile\n" ); exit( 1 ); } } dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); njobs = njob; // fprintf( stderr, "in infp, %d x %d - %d %c\n", njob, nlenmin, nlenmax, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); oripos = AllocateIntVec( njob ); readData_pointer_casepreserve( infp, name, nlen, seq ); fclose( infp ); dorp = NOTSPECIFIED; getnumlen( alfp ); njoba = njob; // fprintf( stderr, "in alfp, %d x %d %c\n", njob, nlenmax, dorp ); aseq = AllocateCharMtx( njob, nlenmax+1 ); aname = AllocateCharMtx( njob, B+1 ); oname = AllocateCharMtx( njob, B+1 ); insname = calloc( njob, sizeof( char ) ); readData_pointer( alfp, aname, nlen, aseq ); fclose( alfp ); for( i=0; i 5.58 is required # # Usage: # mafft-homologs.rb [options] input > output # Options: # -a # the number of collected sequences (default: 50) # -e # threshold value (default: 1e-10) # -o "xxx" options for mafft # (default: " --op 1.53 --ep 0.123 --maxiterate 1000") # -l locally carries out blast searches instead of NCBI blast # (requires locally installed blast and a database) # -f outputs collected homologues also (default: off) # -w entire sequences are subjected to BLAST search # (default: well-aligned region only) #require 'getopts' require 'optparse' require 'tempfile' if ENV["MAFFT_BLAST"] && ENV["MAFFT_BLAST"] != "" then blastpath = ENV["MAFFT_BLAST"] end if ENV["MAFFT_HOMOLOGS_MAFFT"] && ENV["MAFFT_HOMOLOGS_MAFFT"] != "" then mafftpath = ENV["MAFFT_HOMOLOGS_MAFFT"] end # mktemp GC.disable temp_vf = Tempfile.new("_vf").path temp_if = Tempfile.new("_if").path temp_pf = Tempfile.new("_pf").path temp_af = Tempfile.new("_af").path temp_qf = Tempfile.new("_qf").path temp_bf = Tempfile.new("_bf").path temp_rid = Tempfile.new("_rid").path temp_res = Tempfile.new("_res").path system( mafftpath + " --help > #{temp_vf} 2>&1" ) pfp = File.open( "#{temp_vf}", 'r' ) while pfp.gets break if $_ =~ /MAFFT v/ end pfp.close if( $_ ) then mafftversion = $_.sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s else mafftversion = "0" end if( mafftversion < "5.58" ) then STDERR.puts "" STDERR.puts "======================================================" STDERR.puts "Install new mafft (v. >= 5.58)" STDERR.puts "======================================================" STDERR.puts "" exit end srand ( 0 ) def readfasta( fp, name, seq ) nseq = 0 tmpseq = "" while fp.gets if $_ =~ /^>/ then name.push( $_.sub(/>/,"").strip ) seq.push( tmpseq ) if nseq > 0 nseq += 1 tmpseq = "" else tmpseq += $_.strip end end seq.push( tmpseq ) return nseq end nadd = 600 num_alignments = 600 num_threads_blast = 4 eval = 1e-1 local = 0 fullout = 0 entiresearch = 1 corewin = 50 corethr = 0.3 #mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder " mafftopt = " --op 1.53 --ep 0.0 --globalpair --maxiterate 1000 --reorder " #if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then # puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file" # exit #end params = ARGV.getopts( "sfwlhe:a:o:c:d:n:N:" ) #if $OPT_c then if params["c"] != nil then corewin = params["c"].to_i end #if $OPT_d then #if params["d"] != nil then # corethr = params["d"].to_f #end # if params["d"] != nil then localdb = params["d"].to_s end if params["n"] != nil then num_alignments = params["n"].to_s end if params["N"] != nil then num_threads_blast = params["N"].to_s end #if $OPT_w if params["w"] == true then entiresearch = 1 end #if $OPT_f if params["f"] == true then fullout = 1 end #if $OPT_s if params["s"] == true then fullout = 0 end #if $OPT_l if params["l"] == true then local = 1 end #if $OPT_e then if params["e"] != nil then # eval = $OPT_e.to_f eval = params["e"].to_f end #if $OPT_a then if params["a"] != nil then nadd = params["a"].to_i end #if $OPT_o then if params["o"] != nil then mafftopt += " " + params["o"] + " " end infn = ARGV[0].to_s.strip system "cat " + infn + " > #{temp_if}" ar = mafftopt.split(" ") nar = ar.length for i in 0..(nar-1) if ar[i] == "--seed" then system "cat #{ar[i+1]} >> #{temp_if}" end end if fullout == 0 then mafftopt += " --excludehomologs " end nseq = 0 ifp = File.open( "#{temp_if}", 'r' ) while ifp.gets nseq += 1 if $_ =~ /^>/ end ifp.close if nseq >= 10000 then STDERR.puts "The number of input sequences must be <10000." exit elsif nseq == 1 then system( "cp #{temp_if}" + " #{temp_pf}" ) else STDERR.puts "Performing preliminary alignment .. " if entiresearch == 1 then # system( mafftpath + " --maxiterate 1000 --localpair #{temp_if} > #{temp_pf}" ) system( mafftpath + " --maxiterate 0 --retree 2 #{temp_if} > #{temp_pf}" ) else system( mafftpath + " --maxiterate 1000 --localpair --core --coreext --corethr #{corethr.to_s} --corewin #{corewin.to_s} #{temp_if} > #{temp_pf}" ) end end pfp = File.open( "#{temp_pf}", 'r' ) inname = [] inseq = [] slen = [] act = [] nin = 0 nin = readfasta( pfp, inname, inseq ) for i in 0..(nin-1) slen.push( inseq[i].gsub(/-/,"").length ) act.push( 1 ) end pfp.close pfp = File.open( "#{temp_if}", 'r' ) orname = [] orseq = [] nin = 0 nin = readfasta( pfp, orname, orseq ) pfp.close allen = inseq[0].length for i in 0..(nin-2) for j in (i+1)..(nin-1) next if act[i] == 0 next if act[j] == 0 pid = 0.0 total = 0 for a in 0..(allen-1) next if inseq[i][a,1] == "-" || inseq[j][a,1] == "-" total += 1 pid += 1.0 if inseq[i][a,1] == inseq[j][a,1] end pid /= total # puts "#{i.to_s}, #{j.to_s}, #{pid.to_s}" if pid > 0.5 then if slen[i] < slen[j] act[i] = 0 else act[j] = 0 end end end end #p act afp = File.open( "#{temp_af}", 'w' ) STDERR.puts "Searching .. \n" ids = [] add = [] sco = [] nblast = 0 # ato de tsukau kamo for i in 0..(nin-1) singleids = [] singleadd = [] inseq[i].gsub!(/-/,"") afp.puts ">" + orname[i] afp.puts orseq[i] # afp.puts ">" + inname[i] # afp.puts inseq[i] STDERR.puts "Query (#{i+1}/#{nin})\n" + inname[i] if act[i] == 0 then STDERR.puts "Skip.\n\n" next end if local == 0 then command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}" system command ridp = File.open( "#{temp_rid}", 'r' ) while ridp.gets break if $_ =~ / RID = (.*)/ end ridp.close rid = $1.strip STDERR.puts "Submitted to NCBI. rid = " + rid STDERR.printf "Waiting " while 1 STDERR.printf "." sleep 10 command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}" system command resp = File.open( "#{temp_res}", 'r' ) # resp.gets # if $_ =~ /WAITING/ then # resp.close # next # end while( resp.gets ) break if $_ =~ /QBlastInfoBegin/ end resp.gets if $_ =~ /WAITING/ then resp.close next else resp.close break end end else # puts "Not supported" # exit qfp = File.open( "#{temp_qf}", 'w' ) qfp.puts "> " qfp.puts inseq[i] qfp.close command = blastpath + " -num_iterations 2 -num_threads #{num_threads_blast} -evalue #{eval} -num_alignments #{num_alignments} -outfmt 5 -query #{temp_qf} -db #{localdb} > #{temp_res}" system command # system "cp #{temp_res} _res" end STDERR.puts " Done.\n\n" resp = File.open( "#{temp_res}", 'r' ) hitnum = 0 lasteval = "nohit" while resp.gets break if $_ =~ /2<\/Iteration_iter-num>/ end if $_ == nil then STDERR.puts "no hit" else while 1 while resp.gets break if $_ =~ /(.*)<\/Hit_id>/ || $_ =~ /()/ end id = $1 break if $_ =~ // # p id starthit = 9999999 endhit = -1 startquery = 9999999 endquery = -1 target = "" score = 0.0 while line = resp.gets if line =~ /(.*)<\/Hsp_hit-from>/ starthitcand=$1.to_i elsif line =~ /(.*)<\/Hsp_hit-to>/ endhitcand=$1.to_i elsif line =~ /(.*)<\/Hsp_query-from>/ startquerycand=$1.to_i elsif line =~ /(.*)<\/Hsp_query-to>/ endquerycand=$1.to_i elsif $_ =~ /(.*)<\/Hsp_hseq>/ targetcand = $1.sub( /-/, "" ).sub( /U/, "X" ) elsif line =~ /(.*)<\/Hsp_bit-score>/ scorecand=$1.to_f elsif line =~ /(.*)<\/Hsp_evalue>/ evalcand=$1.to_s elsif line =~ /<\/Hsp>/ if endhit == -1 then starthit = starthitcand endhit= endhitcand startquery = startquerycand endquery= endquerycand target = targetcand score = scorecand lasteval = evalcand else # if endhit <= endhitcand && endquery <= endquerycand then if endhit <= starthitcand && endquery <= startquerycand then endhit = endhitcand endquery = endquerycand target = target + "XX" + targetcand score = score + scorecand end # if starthitcand <= starthit && startquerycand <= startquery then if endhitcand <= starthit && endquerycand <= startquery then starthit = starthitcand startquery = startquerycand target = targetcand + "XX" + target score = score + scorecand end end elsif line =~ /<\/Hit>/ hitnum = hitnum + 1 break; end end singleids.push( id ) singleadd.push( target ) known = ids.index( id ) if known != nil then if sco[known] >= score then next else ids.delete_at( known ) add.delete_at( known ) sco.delete_at( known ) end end ids.push( id ) sco.push( score ) add.push( target ) end resp.close end n = singleids.length outnum = 0 totalprob = 0 prob = [] for m in 0..(n-1) # prob[m] = 1.0 / population[eclass[m]] prob[m] = 1.0 totalprob += prob[m] end # puts "" for m in 0..(n-1) prob[m] /= (totalprob) prob[m] *= (nadd.to_f / nin.to_f) prob[m] = 1 if prob[m] > 1 end for m in 0..(n-1) if rand( 1000000 ).to_f/1000000 < prob[m] then # STDERR.puts "hit in " + m.to_s afp.puts ">_addedbymaffte_" + singleids[m] afp.puts singleadd[m] end end end afp.close STDERR.puts "Aligning .. " system( mafftpath + mafftopt + "#{temp_af} > #{temp_bf}" ) STDERR.puts "done." bfp = File.open( "#{temp_bf}", 'r' ) outseq = [] outnam = [] readfasta( bfp, outnam, outseq ) bfp.close outseq2 = [] outnam2 = [] len = outseq.length for i in 0..(len-1) # p outnam[i] if fullout == 0 && outnam[i] =~ /_addedbymaffte_/ then next end outseq2.push( outseq[i] ) outnam2.push( outnam[i].sub( /_addedbymaffte_/, "_ho_" ) ) end nout = outseq2.length len = outseq[0].length p = len while p>0 p -= 1 allgap = 1 for j in 0..(nout-1) if outseq2[j][p,1] != "-" then allgap = 0 break end end if allgap == 1 then for j in 0..(nout-1) outseq2[j][p,1] = "" end end end for i in 0..(nout-1) puts ">" + outnam2[i] puts outseq2[i].gsub( /.{1,60}/, "\\0\n" ) end system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" ) #system( "cp #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid} ." ) if File.exist?( "#{temp_af}.tree" ) then system( "sed 's/_addedbymaffte_/_ho_/' #{temp_af}.tree > #{ARGV[0].to_s}.tree" ) system( "rm #{temp_af}.tree" ) end mafft-7.505-without-extensions/core/regionalrealignment.rb0000644000175000017500000002142014224501721023353 0ustar nileshnilesh#! /usr/bin/env ruby $MAFFTCOMMAND = '"/usr/local/bin/mafft"' # Edit the above line to specify the location of mafft. # $MAFFTCOMMAND = '"C:\folder name\mafft.bat"' # windows # $MAFFTCOMMAND = '"/usr/local/bin/mafft"' # mac or cygwin # $MAFFTCOMMAND = '"/usr/bin/mafft"' # linux (rpm) # $MAFFTCOMMAND = '"/somewhere/mafft.bat"' # all-in-one version for linux or mac ##################################################################### # # regionalrealignment.rb version 0.2 (2013/Sep/21) # ruby regionalrealignment.rb setting input > output # See http://mafft.cbrc.jp/alignment/software/regionalrealignment.html # # v0.2, 2013/Sep/21, Fixed a windows-specific bug. # ##################################################################### def readfasta( fp, name, seq ) nseq = 0 tmpseq = "" while fp.gets if $_ =~ /^>/ then name.push( $_.sub(/>/,"").strip ) seq.push( tmpseq ) if nseq > 0 nseq += 1 tmpseq = "" else tmpseq += $_.strip end end seq.push( tmpseq ) return nseq end def resolve( tree ) while 1 # p tree tree.sub!( /\,([0-9]+):(\-?[0-9\.]+)\,([0-9]+):(\-?[0-9\.]+)/, ",XXX" ) hit1 = $1 hit2 = $2 hit3 = $3 hit4 = $4 # p hit1 # p hit2 # p hit3 # p hit4 # puts "introduce XXX" # p tree break unless tree.index(/XXX/) poshit = tree.index(/XXX/) # puts "poshit=" + poshit.to_s i = poshit height = 0 while i >= 0 break if height == 0 && tree[i..i] == '(' if tree[i..i] == ')' then height += 1 elsif tree[i..i] == '(' then height -= 1 end i -= 1 end poskakko = i # puts "poskakko = " + poskakko.to_s zenhan = tree[0..poskakko] zenhan = "" if poskakko == -1 # puts "zenhan = " + zenhan treelen = tree.length tree = zenhan + "(" + tree[poskakko+1..treelen] # puts "add (" # p tree tree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" ) # p tree end return tree end if ARGV.length != 2 then STDERR.puts "" STDERR.puts "Usage: ruby #{$0} setingfile inputfile > output" STDERR.puts "" exit 1 end infilename = ARGV[1] tname = [] tseq = [] infp = File.open( infilename, "r" ) tin = readfasta( infp, tname, tseq ) infp.close if tin == 0 then STDERR.puts "" STDERR.puts "Error in the '#{infilename}' file. Is this FASTA format?\n" STDERR.puts "" exit 1 end alnlen = tseq[0].length if alnlen == 0 then STDERR.puts "" STDERR.puts "Error in the '#{infilename}' file. Is this FASTA format?\n" STDERR.puts "" exit 1 end for i in 0..(tin-1) if alnlen != tseq[i].length then STDERR.puts "" STDERR.puts "Please insert gaps such that all the input sequences have the same length.\n" STDERR.puts "" exit 1 end end checkmap = [] for i in 0..(alnlen-1) checkmap.push(0) end outputseq = [] for i in 0..(tin-1) outputseq.push("") end settingfile = ARGV[0].to_s reg = [] startpos = [] endpos = [] realign = [] options = [] treeoption = "" revwarn = 0 sfp = File.open( settingfile, "r" ) while line = sfp.gets line.sub!(/#.*/,"") next if line.length < 2 if line.strip =~ /^treeoption / then treeoption = line.strip.sub(/.*treeoption/,"") break end end sfp.close sfp = File.open( settingfile, "r" ) while line = sfp.gets line.sub!(/#.*/,"") next if line.length < 2 next if line.strip =~ /^treeoption/ startposv = line.split(' ')[0].to_i - 1 endposv = line.split(' ')[1].to_i - 1 if startposv < 0 || endposv < 0 then STDERR.puts "\nError in the '#{settingfile}' file. Please check this line:\n" STDERR.puts line STDERR.puts "Sites must be numbered as 1, 2, ...\n" STDERR.puts "\n" exit 1 end if startposv >= alnlen || endposv >= alnlen then STDERR.puts "\nError in the '#{settingfile}' file. Please check this line:\n" STDERR.puts line STDERR.puts "Sites must be numbered as 1, 2, ... #{alnlen}\n" STDERR.puts "\n" exit 1 end if startposv > endposv then STDERR.puts "\nWarning. Please check this line:\n" STDERR.puts line STDERR.puts "Start position > End position ?\n" STDERR.puts "\n" revwarn = 1 # exit 1 end startpos.push( startposv ) endpos.push( endposv ) if startposv > endposv for k in (endposv)..(startposv) checkmap[k] += 1 end else for k in (startposv)..(endposv) checkmap[k] += 1 end end if line.split(' ')[2] == "realign" then realign.push( 1 ) elsif line.split(' ')[2] == "preserve" then realign.push( 0 ) else STDERR.puts "\n" STDERR.puts "The third column must be 'realign' or 'preserve'\n" STDERR.puts "Please check this line:\n" STDERR.puts line STDERR.puts "\n" exit 1 end if line =~ / \-\-/ && line =~ /realign/ then options.push( line.sub(/.*realign/,"").strip ) else options.push( treeoption ) end end sfp.close #p startpos #p endpos #p options #res = system "#{$MAFFTCOMMAND} #{treeoption} --treeout --retree 0 --thread -1 #{infilename} > _dum" res = system "#{$MAFFTCOMMAND} #{treeoption} --treeout --retree 0 #{infilename} > _dum" if res == false then STDERR.puts "\n" STDERR.puts "ERROR in building a guide tree" STDERR.puts "\n" exit 1 end treefp = File.open( "#{infilename}.tree", "r" ) tree = "" while line = treefp.gets tree += line.strip break if tree =~ /;$/ end treefp.close tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "") scale = 1.0 mtreefp = File.open("_tree", "w") #STDERR.puts "Tree = " + tree memi = [-1,-1] leni = [-1,-1] while tree.index( /\(/ ) tree = resolve( tree ) tree.sub!( /\(([0-9]+):(\-?[0-9\.]+),([0-9]+):(\-?[0-9\.]+)\)/, "XXX" ) memi[0] = $1.to_i leni[0] = $2.to_f * scale memi[1] = $3.to_i leni[1] = $4.to_f * scale if leni[0] > 10 || leni[1] > 10 then STDERR.puts "" STDERR.puts "Please check the scale of branch length!" STDERR.puts "The unit of branch lengths must be 'substitution/site'" STDERR.puts "If the unit is 'substition' in your tree, please" STDERR.puts "use the scale argument," STDERR.puts "% newick2mafft scale in > out" STDERR.puts "where scale = 1/(alignment length)" STDERR.puts "" exit 1 end # STDERR.puts "subtree = " + $& if memi[1] < memi[0] then memi.reverse! leni.reverse! end tree.sub!( /XXX/, memi[0].to_s ) # STDERR.puts "Tree = " + tree mtreefp.printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] ) end mtreefp.close numreg = startpos.length for i in 0..(numreg-1) partfp = File.open( "_part", "w" ) for j in 0..(tin-1) partfp.puts ">" + tname[j] if startpos[i] > endpos[i] then partfp.puts tseq[j][endpos[i]..startpos[i]].reverse else partfp.puts tseq[j][startpos[i]..endpos[i]] end end partfp.close if( realign[i] == 1 ) then STDERR.puts "Aligning region #{startpos[i]+1} - #{endpos[i]+1}" res = system "#{$MAFFTCOMMAND} #{options[i]} --inputorder --treein _tree _part > _partout" if res == false then STDERR.puts "\n" STDERR.puts "ERROR in aligning region #{startpos[i]+1} - #{endpos[i]+1}" STDERR.puts "Please check the option:" STDERR.puts "#{options[i]}" STDERR.puts "\n" exit 1 end else STDERR.puts "Copying region #{startpos[i]+1} - #{endpos[i]+1}" # system "cp _part _partout" File.rename( "_part", "_partout" ) end pname = [] pseq = [] partfp = File.open( "_partout", "r" ) pin = readfasta( partfp, pname, pseq ) partfp.close for j in 0..(tin-1) outputseq[j] += pseq[j] end end for j in 0..(tin-1) puts ">" + tname[j] puts outputseq[j] end STDERR.puts "Done." numdupsites = checkmap.select{|x| x>1}.length if numdupsites > 0 then STDERR.puts "" STDERR.puts "#########################################################" STDERR.puts "# Warning: #{numdupsites} sites were duplicatedly selected." STDERR.puts "#########################################################" STDERR.puts "" end numunselectedsites = checkmap.select{|x| x==0}.length if numunselectedsites > 0 then STDERR.puts "" STDERR.puts "#########################################################" STDERR.puts "# Warning: #{numunselectedsites} sites were not selected." STDERR.puts "#########################################################" STDERR.puts "" end if revwarn == 1 then STDERR.puts "" STDERR.puts "#########################################################" STDERR.puts "# Warning: The order of sites were reversed." STDERR.puts "#########################################################" STDERR.puts "" end STDERR.puts "" STDERR.puts " Tree: computed with #{treeoption} --treeout " for i in 0..(numreg-1) range = sprintf( "%6d - %6d", startpos[i]+1, endpos[i]+1 ) if realign[i] == 1 then STDERR.puts "#{range}: realigned with #{options[i]} --treein (tree)" else STDERR.puts "#{range}: preserved" end end STDERR.puts "" File.delete( "_dum" ) File.delete( "_tree" ) File.delete( "_part" ) File.delete( "_partout" ) mafft-7.505-without-extensions/core/Salignmm.c0000644000175000017500000023202414224501721020717 0ustar nileshnilesh#include "mltaln.h" #include "dp.h" #define MACHIGAI 0 #define OUTGAP0TRY 0 #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 1 #define FASTMATCHCALC 1 #define SLOW 0 #define TERMGAPFAC 0.0 #define TERMGAPFAC_EX 0.0 static TLS double **impmtx = NULL; static TLS int impalloclen = 0; double imp_match_out_sc( int i1, int j1 ) { // fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold ); // fprintf( stderr, "val = %f\n", impmtx[i1][j1] ); return( impmtx[i1][j1] ); } #if 0 static void imp_match_out_vead_gapmap( double *imp, int i1, int lgth2, int *gapmap2 ) { #if FASTMATCHCALC double *pt = impmtx[i1]; int *gapmappt = gapmap2; while( lgth2-- ) *imp++ += pt[*gapmappt++]; #else int j; double *pt = impmtx[i1]; for( j=0; j target -> localhomtable. seedinlh12 -> localhom ni haitteiru. else fillimp_file( impmtx, imp, clus1, clus2, lgth1, lgth2, seq1, seq2, eff1, eff2, eff1_kozo, eff2_kozo, localhom, swaplist, forscore, orinum1, orinum2, uselh, seedinlh1, seedinlh2, nodeid, nfiles ); } static void match_calc_del( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, int mid, int nmask, int *mask1, int *mask2 ) { // osoi! int i, j, k, m; int c1, c2; // fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 ); // fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] ); // fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] ); // for( i=0; i ", match[k], mid ); match[k] -= matrices[mid][c1][c2] * eff1[i] * eff2[j]; // fprintf( stderr, "match[k] = %f (mid=%d)\n", match[k], mid ); } } // fprintf( stderr, "done\n" ); return; } #if SLOW static void match_calc_slow( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, double **doublework, int **intwork, int initialize, int flip ) { // osoi! int i, j, k; int c1, c2; int mid; // fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 ); // fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] ); // fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] ); // for( i=0; i-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); // fprintf( stderr, "done\n" ); #else int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void match_calc( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize ) { #if FASTMATCHCALC // fprintf( stderr, "\nmatch_calc... %d", i1 ); int j, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *matchpt, *cpmxpdpt, **cpmxpdptpt; int *cpmxpdnpt, **cpmxpdnptpt; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); if( initialize ) { int count = 0; for( j=0; j-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } free( scarr ); // fprintf( stderr, "done\n" ); #else int j, k, l; // double scarr[26]; double **cpmxpd = doublework; int **cpmxpdn = intwork; double *scarr; scarr = calloc( nalphabets, sizeof( double ) ); // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } free( scarr ); #endif } static void Atracking_localhom( double *impwmpt, double *lasthorizontalw, double *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, int **ijp, int icyc, int jcyc, int *warpis, int *warpjs, int warpbase, int *ngap1, int *ngap2, int reuseprofiles, char **gt1, char **gt2 ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk; double wm; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); if( gt1 == NULL ) { gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); } else { gt1bk = *gt1; gt2bk = *gt2; } #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i= warpbase ) { ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; //*ngap2 = 1; // shita de keisan } l= jin; while( --l >= 0 ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; //*ngap1 = 1; } break; } else { l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; //*ngap2 = 1; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; //*ngap1 = 1; } } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += (double)imp_match_out_sc( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } #if 1 // Atracking() to onaji. Wazukani hayai hazu. Test mada. if( strchr( gaptable1, '-' ) ) *ngap1 = 1; else *ngap1 = 0; if( strchr( gaptable2, '-' ) ) *ngap2 = 1; else *ngap2 = 0; if( *ngap1 == 0 && reuseprofiles ) ; else if( *ngap1 == 0 ) { limk = gt1bk + lgth1+lgth2 - gaptable1; for( i=0; i yobidashimoto // gapf1 = NULL; // free( gapf2 ); // gapf1 = NULL; #if 0 reporterr( "gaptable1=%s\n", gaptable1 ); reporterr( "gaptable2=%s\n", gaptable2 ); reporterr( "result (gapfreq) = " ); for( j=0; j=0; j-- ) { if( (g=lasthorizontalw[j]+ ( fpenalty * TERMGAPFAC + fpenalty_ex * (lgth2-1-j) * TERMGAPFAC_EX ) ) > wm ) { wm = g; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } for( i=lgth1-2; i>=0; i-- ) { if( ( g=lastverticalw[i]+ ( fpenalty * TERMGAPFAC + fpenalty_ex * (lgth1-1-i) * TERMGAPFAC_EX ) ) > wm ) { wm = g; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } if( lasthorizontalw[lgth2-1] > wm ) // score ga onaji baai erabarenai { wm = lasthorizontalw[lgth2-1]; iin = lgth1-1; jin = lgth2-1; ijp[lgth1][lgth2] = 0; } #else wm = lastverticalw[0]; for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } #endif } for( i=0; i= warpbase ) { ifi = warpis[ijp[iin][jin]-warpbase]; jfi = warpjs[ijp[iin][jin]-warpbase]; } else if( ijp[iin][jin] < 0 ) { ifi = iin-1; jfi = jin+ijp[iin][jin]; } else if( ijp[iin][jin] > 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } if( ifi == -warpbase && jfi == -warpbase ) { l = iin; while( --l >= 0 ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin; while( --l >= 0 ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } break; } else { l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } // reporterr( "gaptable1=%s\n", gaptable1 ); // reporterr( "gaptable2=%s\n", gaptable2 ); if( strchr( gaptable1, '-' ) ) *ngap1 = 1; else *ngap1 = 0; if( strchr( gaptable2, '-' ) ) *ngap2 = 1; else *ngap2 = 0; #if 1 if( *ngap1 == 0 && reuseprofiles ) ; else if( *ngap1 == 0 ) { limk = gt1bk + lgth1+lgth2 - gaptable1; for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double wmo = 0.0; double g; double *currentw, *previousw; // double fpenalty = (double)penalty; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex_l; #endif #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ char **mseq1; char **mseq2; char **mseq; static TLS double *ogcp1, *ogcp1o; static TLS double *ogcp2, *ogcp2o; static TLS double *fgcp1, *fgcp1o; static TLS double *fgcp2, *fgcp2o; double *ogcp1opt, *ogcp2opt, *fgcp1opt, *fgcp2opt; static TLS double **cpmx1; double ***cpmx1pt = NULL; static TLS double **cpmx2; double ***cpmx2pt = NULL; static TLS int **intwork; static TLS double **doublework; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double *gapfreq1; double *gapfreq1pt; static TLS double *gapfreq2; double *gapfreq2pt; double fpenalty = (double)penalty_l; double fpenalty_shift = (double)penalty_shift; double *fgcp2pt; double *ogcp2pt; double fgcp1va; double ogcp1va; double *gf2pt; double *gf2ptpre; double gf1va; double gf1vapre; double headgapfreq1; double headgapfreq2; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; char *gt1, *gt2, *gt1bk, *gt2bk; // for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp1o ); FreeFloatVec( ogcp2 ); FreeFloatVec( ogcp2o ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp1o ); FreeFloatVec( fgcp2 ); FreeFloatVec( fgcp2o ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp1o = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); ogcp2o = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp1o = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); fgcp2o = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); previousfirstlen = -1; previousicyc = -1; gapfreq1 = AllocateFloatVec( ll1+2 ); gapfreq2 = AllocateFloatVec( ll2+2 ); #if FASTMATCHCALC doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 ); #else doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { double t = 0.0; for( i=0; i= 0 && firstmem == previousfirstmem && lgth1 == previousfirstlen && nthread == 0 && icyc == previousicyc+1 ) // mouhitotsu jouken ga iru. firstmem >= 0 && firstmem == previousfirstmem && calledbytreebase && prevcalledbytreebase if( calledbyfulltreebase == 1 && previouscall && firstmem >= 0 && firstmem == previousfirstmem && lgth1 == previousfirstlen && nthread == 0 && icyc == previousicyc+1 ) // mouhitotsu jouken ga iru. firstmem >= 0 && firstmem == previousfirstmem && calledbytreebase && prevcalledbytreebase reuseprofiles = 1; else reuseprofiles = 0; // Atracking de gapireru() wo skip surutameni reuseprofile flag wo tsukau. #if 0 reporterr( "nthread=%d\n", nthread ); reporterr( "reuseprofiles=%d\n", reuseprofiles ); reporterr( "calledbyfulltreebase=%d\n", calledbyfulltreebase ); reporterr( "previouscall=%d\n", previouscall ); reporterr( "firstmem=%d, prefiousfirstmem=%d\n", firstmem, previousfirstmem ); reporterr( "lgth1=%d, previousfirstlen=%d\n", lgth1, previousfirstlen ); if( reuseprofiles ) exit(1 ); #endif if( n_dis[0][amino_n['-']] != 0 ) { reporterr( "Bug probably in versions >7.36. Please report this issue to katoh@ifrec.osaka-u.ac.jp\n" ); exit( 1 ); } if( cpmxresult && specificityconsideration == 0.0 ) // n_dynamicmtx ga henka suru toki profile ha sairiyou dekinai. { if( sgap1 ) { reporterr( "The combination of sgap1 and cpmxhit is not supported. See Salignmm.c\n" ); exit( 1 ); } if( cpmxchild0 && *cpmxchild0 ) { // reporterr( "\nUse cpmxhist for child 0!\n" ); cpmx1pt = (cpmxchild0); gapfreq1pt = (*cpmxchild0)[nalphabets]; ogcp1opt = (*cpmxchild0)[nalphabets+1]; fgcp1opt = (*cpmxchild0)[nalphabets+2]; } else { // reporterr( "\nDo not use cpmxhist for child 0!\n" ); cpmx1pt = &cpmx1; cpmx_calc_new( seq1, *cpmx1pt, eff1, lgth1, icyc ); gapfreq1pt = gapfreq1; gapcountf( gapfreq1pt, seq1, icyc, eff1, lgth1 ); for( i=0; i tbfast.c // impossible if( localhom ) imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( headgp == 1 ) { for( i=1; i", wm ); fprintf( stderr, "%5.0f? (penal=%5.2f)", g=mi+*fgcp2pt*(1.0-gapfreq1pt[i]), *fgcp2pt*(1.0-gapfreq1pt[i]) ); #endif if( (g=mi+*fgcp2pt*gf1va) > wm ) { wm = g; *ijppt = -( j - mpi ); // fprintf( stderr, "Jump to %d (%c)!", mpi, seq2[0][mpi] ); } if( (g=*prept+*ogcp2pt*gf1vapre) >= mi ) // if( (g=*prept+*ogcp2pt*gf1vapre) > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); fprintf( stderr, "%5.0f? (penal=%5.2f)", g=*mjpt+fgcp1va*(1.0-gapfreq2pt[j]), fgcp1va*(1.0-gapfreq2pt[j]) ); #endif if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); // fprintf( stderr, "Jump to %d (%c)!", *mpjpt, seq1[0][*mpjpt] ); } if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt ) // if( (g=*prept+ ogcp1va* *gf2ptpre) > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( trywarp ) { #if USE_PENALTY_EX if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai #else if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai #endif { // fprintf( stderr, "WARP!!\n" ); if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif curm = *curpt + wm; if( *wmrecords1pt > *wmrecordspt ) { *wmrecordspt = *wmrecords1pt; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); } if( curm > *wmrecordspt ) { *wmrecordspt = curm; *warpipt = i; *warpjpt = j; } wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fgcp2pt++; ogcp2pt++; gf2ptpre++; gf2pt++; } lastverticalw[i] = currentw[lgth2-1]; if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } if( trywarp ) { // fprintf( stderr, "wm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i 20 ) // if( 0 ) // if( 1 ) { int limk = gt1bk + lgth1+lgth2 - gt1; limk = (int)(( (limk+1000) / 1000 ) * 1000); // kiriage //reporterr( "limk=%d\n", limk ); #if 1 // marume gosa wo teigenn suru tame double totaleff1 = 0.0; double totaleff2 = 0.0; for( i=0; i0.001 || fabs(totaleff2-1.0)>0.001 ) { reporterr( "totaleff1 = %50.40f\n", totaleff1 ); reporterr( "totaleff2 = %50.40f\n", totaleff2 ); exit( 1 ); } totaleff1 = totaleff1 * orieff1 / (orieff1 + orieff2); totaleff2 = totaleff2 * orieff2 / (orieff1 + orieff2); #else // reporterr( "totaleff1 (rescaled ) = %20.10f\n", totaleff1 ); // reporterr( "totaleff2 (rescaled ) = %20.10f\n", totaleff2 ); // double bk1 = totaleff1; // double bk2 = totaleff2; double totaleff1 = orieff1 / (orieff1 + orieff2); double totaleff2 = orieff2 / (orieff1 + orieff2); // reporterr( "totaleff1 (not rescaled) = %20.10f\n", totaleff1 ); // reporterr( "totaleff2 (not rescaled) = %20.10f\n", totaleff2 ); // reporterr( "ratio1 = %30.25f\n", totaleff1/bk1 ); // reporterr( "ratio2 = %30.25f\n", totaleff2/bk2 ); #endif *cpmxresult = AllocateDoubleMtx( nalphabets+3, 0 ); // gapcount, opg, fng no bun createcpmxresult( *cpmxresult, limk, totaleff1, totaleff2, cpmx1pt, cpmx2pt, gt1, gt2, (cpmx1!=*cpmx1pt), (cpmx2!=*cpmx2pt) ); // naka de free creategapfreqresult( *cpmxresult+nalphabets, limk, totaleff1, totaleff2, gapfreq1pt, gapfreq2pt, gt1, gt2 ); // naka deha free shinai // gapfreq1, gapfreq2 ha mada tsukau createogresult( *cpmxresult+nalphabets+1, limk, totaleff1, totaleff2, ogcp1opt, ogcp2opt, gapfreq1pt, gapfreq2pt, gt1, gt2 ); // naka deha free shinai if( cpmx1!=*cpmx1pt ) free( ogcp1opt ); if( cpmx2!=*cpmx2pt ) free( ogcp2opt ); createfgresult( *cpmxresult+nalphabets+2, limk, totaleff1, totaleff2, fgcp1opt, fgcp2opt, gapfreq1pt, gapfreq2pt, gt1, gt2 ); // naka deha free shinai if( cpmx1!=*cpmx1pt ) free( fgcp1opt ); if( cpmx2!=*cpmx2pt ) free( fgcp2opt ); if( cpmx1!=*cpmx1pt ) free( gapfreq1pt ); if( cpmx2!=*cpmx2pt ) free( gapfreq2pt ); #if 0 reporterr( "\n" ); for( j=0; j N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } if( ngap1 || !reuseprofiles ) for( i=0; i %d\n", strlen( seq1[0] ) ); previousfirstlen = lgth1; previousfirstmem = firstmem; previousicyc = icyc; previouscall = calledbyfulltreebase; free( mseq1 ); free( mseq2 ); FreeCharMtx( mseq ); return( wm ); } double A__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, int *gapmap1, int *gapmap2 ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { fprintf( stderr, "Unexpected error. Please contact katoh@ifrec.osaka-u.ac.jp\n" ); exit( 1 ); } double A__align_variousdist( int **which, double ***matrices, double **n_dynamicmtx, int penalty_l, int penalty_ex_l, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { // int k; register int i, j, c; int ngap1, ngap2; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; double wm = 0.0; /* int ?????? */ double wmo = 0.0; /* int ?????? */ double g; double *currentw, *previousw; // double fpenalty = (double)penalty; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex_l; #endif #if 1 double *wtmp; int *ijppt; double *mjpt, *prept, *curpt; int *mpjpt; #endif static TLS double mi, *m; static TLS int **ijp; static TLS int mpi, *mp; static TLS double *w1, *w2; static TLS double *match; static TLS double *initverticalw; /* kufuu sureba iranai */ static TLS double *lastverticalw; /* kufuu sureba iranai */ static TLS char **mseq1; static TLS char **mseq2; static TLS char **mseq; static TLS double *ogcp1; static TLS double *ogcp2; static TLS double *fgcp1; static TLS double *fgcp2; static TLS double ***cpmx1s; static TLS double ***cpmx2s; static TLS int ***intwork; static TLS double ***doublework; static TLS int orlgth1 = 0, orlgth2 = 0; static TLS double *gapfreq1; static TLS double *gapfreq2; double fpenalty = (double)penalty_l; double fpenalty_shift = (double)penalty_shift; double *fgcp2pt; double *ogcp2pt; double fgcp1va; double ogcp1va; double *gf2pt; double *gf2ptpre; double gf1va; double gf1vapre; double headgapfreq1; double headgapfreq2; int *warpis = NULL; int *warpjs = NULL; int *warpi = NULL; int *warpj = NULL; int *prevwarpi = NULL; int *prevwarpj = NULL; double *wmrecords = NULL; double *prevwmrecords = NULL; int warpn = 0; int warpbase; double curm = 0.0; double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt; int *warpipt, *warpjpt; int *nmask, **masklist1, **masklist2; if( seq1 == NULL ) { if( orlgth1 ) { // fprintf( stderr, "## Freeing local arrays in A__align\n" ); orlgth1 = 0; orlgth2 = 0; imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 ); free( mseq1 ); free( mseq2 ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatCub( cpmx1s ); FreeFloatCub( cpmx2s ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatCub( doublework ); FreeIntCub( intwork ); } else { // fprintf( stderr, "## Not allocated\n" ); } return( 0.0 ); } #if SLOW nmask = calloc( maxdistclass, sizeof( int ) ); #else masklist1 = AllocateIntMtx( maxdistclass, 0 ); masklist2 = AllocateIntMtx( maxdistclass, 0 ); nmask = calloc( maxdistclass, sizeof( int ) ); for( c=0; c orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatCub( cpmx1s ); FreeFloatCub( cpmx2s ); FreeFloatVec( gapfreq1 ); FreeFloatVec( gapfreq2 ); FreeFloatCub( doublework ); FreeIntCub( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); cpmx1s = AllocateFloatCub( maxdistclass, nalphabets, ll1+2 ); cpmx2s = AllocateFloatCub( maxdistclass, nalphabets, ll2+2 ); gapfreq1 = AllocateFloatVec( ll1+2 ); gapfreq2 = AllocateFloatVec( ll2+2 ); doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets ); intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets+1 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { double t = 0.0; for( i=0; i tbfast.c // impossible if( localhom ) imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( headgp == 1 ) { for( i=1; i", wm ); fprintf( stderr, "%5.0f? (penal=%5.2f)", g=mi+*fgcp2pt*(1.0-gapfreq1[i]), *fgcp2pt*(1.0-gapfreq1[i]) ); #endif if( (g=mi+*fgcp2pt*gf1va) > wm ) { wm = g; *ijppt = -( j - mpi ); // fprintf( stderr, "Jump to %d (%c)!", mpi, seq2[0][mpi] ); } if( (g=*prept+*ogcp2pt*gf1vapre) >= mi ) // if( (g=*prept+*ogcp2pt*gf1vapre) > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); fprintf( stderr, "%5.0f? (penal=%5.2f)", g=*mjpt+fgcp1va*(1.0-gapfreq2[j]), fgcp1va*(1.0-gapfreq2[j]) ); #endif if( (g=*mjpt+ fgcp1va* *gf2pt) > wm ) { wm = g; *ijppt = +( i - *mpjpt ); // fprintf( stderr, "Jump to %d (%c)!", *mpjpt, seq1[0][*mpjpt] ); } if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt ) // if( (g=*prept+ ogcp1va* *gf2ptpre) > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif if( trywarp ) { #if USE_PENALTY_EX if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai #else if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai #endif { if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] ) { *ijppt = warpbase + warpn - 1; } else { *ijppt = warpbase + warpn; warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) ); warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) ); warpis[warpn] = prevwarpi[j-1]; warpjs[warpn] = prevwarpj[j-1]; warpn++; } wm = g; } curm = *curpt + wm; if( *wmrecords1pt > *wmrecordspt ) { *wmrecordspt = *wmrecords1pt; *warpipt = *(warpipt-1); *warpjpt = *(warpjpt-1); } if( curm > *wmrecordspt ) { *wmrecordspt = curm; *warpipt = i; *warpjpt = j; } wmrecordspt++; wmrecords1pt++; warpipt++; warpjpt++; } #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fgcp2pt++; ogcp2pt++; gf2ptpre++; gf2pt++; } lastverticalw[i] = currentw[lgth2-1]; if( trywarp ) { fltncpy( prevwmrecords, wmrecords, lastj ); intncpy( prevwarpi, warpi, lastj ); intncpy( prevwarpj, warpj, lastj ); } } if( trywarp ) { // fprintf( stderr, "wm = %f\n", wm ); // fprintf( stderr, "warpn = %d\n", warpn ); free( wmrecords ); free( prevwmrecords ); free( warpi ); free( warpj ); free( prevwarpi ); free( prevwarpj ); } #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i0 ) { *--mseq1[0] = seq1[0][i+l--]; *--mseq2[0] = *gap; } l= prevhitj - j - 1; fprintf( stderr, "l=%d\n", l ); while( l>0 ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][j+l--]; } if( i < 0 || j < 0 ) return; *--mseq1[0] = seq1[0][i]; *--mseq2[0] = seq2[0][j]; fprintf( stderr, "added %c to mseq1, mseq1 = %s \n", seq1[0][i], mseq1[0] ); fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] ); } static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 ) { char tmpc = s1[0][i1]; char *seq2 = s2[0]; while( lgth2-- ) *match++ = amino_dis[(int)tmpc][(int)*seq2++]; } static double Atracking( double *lasthorizontalw, double *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, double **cpmx1, double **cpmx2, int **ijp ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk; // char gap[] = "-"; char *gap; gap = newgapstr; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); #if 0 for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--mseq1[0] = seq1[0][ifi+l]; *--mseq2[0] = *gap; k++; } l= jin - jfi; while( --l ) { *--mseq1[0] = *gap; *--mseq2[0] = seq2[0][jfi+l]; k++; } if( iin <= 0 || jin <= 0 ) break; *--mseq1[0] = seq1[0][ifi]; *--mseq2[0] = seq2[0][jfi]; k++; iin = ifi; jin = jfi; } return( 0.0 ); } void backdp( double **WMMTX, double wmmax, double *maxinw, double *maxinh, int lgth1, int lgth2, int alloclen, double *w1, double *w2, double *initverticalw, double *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 ) { register int i, j; int prevhiti, prevhitj; // int lasti, lastj; double g; double fpenalty = (double)penalty; #if USE_PENALTY_EX double fpenalty_ex = (double)penalty_ex; #endif double *currentw, *previousw, *wtmp; double mi; int mpi; int *mpjpt; double *mjpt, *prept, *curpt; double wm = 0.0; double forwwm; currentw = w1; previousw = w2; match_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 ); match_calc( currentw, seq1, seq2, lgth1-1, lgth2 ); prevhiti = iin; prevhitj = jin; fprintf( stderr, "prevhiti = %d, lgth1 = %d\n", prevhiti, lgth1 ); fprintf( stderr, "prevhitj = %d, lgth2 = %d\n", prevhitj, lgth2 ); extendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 ); for( i=0; i0; --j ) { m[j-1] = currentw[j]; mp[j] = 0; // iranai } for( j=0; j-1; i-- ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[lgth2-1] = initverticalw[i+1]; match_calc( currentw, seq1, seq2, i, lgth2 ); #if 0 fprintf( stderr, "i=%d, currentw = \n", i ); for( j=0; j-1; j-- ) { // fprintf( stderr, "i,j=%d,%d %c-%c ", i, j, seq1[0][i], seq2[0][j] ); wm = *prept; g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; } g = *prept; if( g >= mi ) { mi = g; mpi = j+1; //iranai } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; } g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; //iranai } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "*curpt = %5.0f \n", *curpt ); #endif // forwwm = wm + MIN( maxinw[i], maxinh[j] ); forwwm = wm + MIN( maxinw[i], maxinh[j] ); WMMTX[i][j] = forwwm; if( forwwm == wmmax && i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( maxinw ); FreeFloatVec( maxinh ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( doublework ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); maxinw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); maxinh = AllocateFloatVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } mseq1[0] = mseq[0]; mseq2[0] = mseq[1]; if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeFloatMtx( WMMTX ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateIntMtx( ll1+10, ll2+10 ); WMMTX = AllocateFloatMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 for( i=0; i", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = -( j - mpi ); } g = *prept; if( g >= mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijppt = +( i - *mpjpt ); } g = *prept; if( g >= *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt += wm; WMMTX[i][j] = *curpt; if( j wmmax ) { wmmax = g; iin = i; jin = lgth2-1; } } for( j=0; j wmmax ) { wmmax = g; iin = lgth1-1; jin = j; } } for( i=0; iMSres\n%s\n", mseq1[0] ); fprintf( stderr, ">MSres\n%s\n", mseq2[0] ); #endif #if 0 for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } strcpy( seq1[0], mseq1[0] ); strcpy( seq2[0], mseq2[0] ); #if 1 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif return( wm ); } mafft-7.505-without-extensions/core/fft.c0000644000175000017500000000740714224501721017734 0ustar nileshnilesh#include "mltaln.h" #include "mtxutl.h" /* from "C gengo niyoru saishin algorithm jiten" ISBN4-87408-414-1 Haruhiko Okumura */ static void make_sintbl(int n, double sintbl[]) { int i, n2, n4, n8; double c, s, dc, ds, t; n2 = n / 2; n4 = n / 4; n8 = n / 8; t = sin(PI / n); dc = 2 * t * t; ds = sqrt(dc * (2 - dc)); t = 2 * dc; c = sintbl[n4] = 1; s = sintbl[0] = 0; for (i = 1; i < n8; i++) { c -= dc; dc += t * c; s += ds; ds -= t * s; sintbl[i] = s; sintbl[n4 - i] = c; } if (n8 != 0) sintbl[n8] = sqrt(0.5); for (i = 0; i < n4; i++) sintbl[n2 - i] = sintbl[i]; for (i = 0; i < n2 + n4; i++) sintbl[i + n2] = - sintbl[i]; } /* {\tt fft()}. */ static void make_bitrev(int n, int bitrev[]) { int i, j, k, n2; n2 = n / 2; i = j = 0; for ( ; ; ) { bitrev[i] = j; if (++i >= n) break; k = n2; while (k <= j) { j -= k; k /= 2; } j += k; } } /* */ int fft(int n, Fukusosuu *x, int freeflag) { static TLS int last_n = 0; /* {\tt n} */ static TLS int *bitrev = NULL; /* */ static TLS double *sintbl = NULL; /* */ int i, j, k, ik, h, d, k2, n4, inverse; double t, s, c, dR, dI; if (freeflag) { if (bitrev) free(bitrev); bitrev = NULL; if (sintbl) free(sintbl); sintbl = NULL; last_n = 0; return( 0 ); } /* */ if (n < 0) { n = -n; inverse = 1; /* */ } else inverse = 0; n4 = n / 4; if (n != last_n || n == 0) { last_n = n; #if 0 if (sintbl != NULL) { free(sintbl); sintbl = NULL; } if (bitrev != NULL) { free(bitrev); bitrev = NULL; } if (n == 0) return 0; /* */ sintbl = (double *)malloc((n + n4) * sizeof(double)); bitrev = (int *)malloc(n * sizeof(int)); #else /* by T. Nishiyama */ sintbl = realloc(sintbl, (n + n4) * sizeof(double)); bitrev = realloc(bitrev, n * sizeof(int)); #endif if (sintbl == NULL || bitrev == NULL) { fprintf(stderr, "\n"); return 1; } make_sintbl(n, sintbl); make_bitrev(n, bitrev); } for (i = 0; i < n; i++) { /* */ j = bitrev[i]; if (i < j) { t = x[i].R; x[i].R = x[j].R; x[j].R = t; t = x[i].I; x[i].I = x[j].I; x[j].I = t; } } for (k = 1; k < n; k = k2) { /* */ #if 0 fprintf( stderr, "%d / %d\n", k, n ); #endif h = 0; k2 = k + k; d = n / k2; for (j = 0; j < k; j++) { #if 0 if( j % 1 == 0 ) fprintf( stderr, "%d / %d\r", j, k ); #endif c = sintbl[h + n4]; if (inverse) s = - sintbl[h]; else s = sintbl[h]; for (i = j; i < n; i += k2) { #if 0 if( k>=4194000 ) fprintf( stderr, "in loop %d - %d < %d, k2=%d\r", j, i, n, k2 ); #endif ik = i + k; dR = s * x[ik].I + c * x[ik].R; dI = c * x[ik].I - s * x[ik].R; x[ik].R = x[i].R - dR; x[i].R += dR; x[ik].I = x[i].I - dI; x[i].I += dI; } h += d; } } if (! inverse) /* n */ for (i = 0; i < n; i++) { x[i].R /= n; x[i].I /= n; } return 0; /* */ } mafft-7.505-without-extensions/core/setcore.c0000644000175000017500000002441214224501721020614 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 1 double corethr; int coreext; void arguments( int argc, char *argv[] ) { int c; fftkeika = 1; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 0; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; corethr = .5; coreext = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; fprintf( stderr, "weighti = %f\n", fastathreshold ); --argc; goto nextoption; case 'i': corethr = atof( *++argv ); fprintf( stderr, "corethr = %f\n", corethr ); --argc; goto nextoption; case 'm': fmodel = 1; break; case 'c': coreext = 1; break; case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 's': treemethod = 's'; break; case 'x': treemethod = 'x'; break; case 'p': treemethod = 'p'; break; case 'a': alg = 'a'; break; case 'A': alg = 'A'; break; case 'S': alg = 'S'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'x' ) fprintf( fp, "Tree = UPGMA (3).\n" ); else if( treemethod == 's' ) fprintf( fp, "Tree = UPGMA (2).\n" ); else if( treemethod == 'p' ) fprintf( fp, "Tree = UPGMA (1).\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char **name, **seq; static char **oseq; static double **pscore; static double *eff; static double **node0, **node1; static double *gapc; static double *avgap; double tmpavgap; int i, j, m, goffset; static int ***topol; static double **len; FILE *prep; char c; int corestart, coreend; int alloclen; int winsize; char *pt, *ot; double gapmin; arguments( argc, argv ); getnumlen( stdin ); rewind( stdin ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*9+1 ); name = AllocateCharMtx( njob, B+1 ); oseq = AllocateCharMtx( njob, nlenmax*9+1 ); alloclen = nlenmax*9; topol = AllocateIntCub( njob, 2, njob ); len = AllocateDoubleMtx( njob, 2 ); pscore = AllocateDoubleMtx( njob, njob ); eff = AllocateDoubleVec( njob ); node0 = AllocateDoubleMtx( njob, njob ); node1 = AllocateDoubleMtx( njob, njob ); gapc = AllocateDoubleVec( alloclen ); avgap = AllocateDoubleVec( alloclen ); #if 0 Read( name, nlen, seq ); #else readData_pointer( stdin, name, nlen, seq ); #endif constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } writePre( njob, name, nlen, seq, 0 ); if( tbutree == 0 ) { for( i=1; i seq[i] ) if( *pt != '-' ) { *ot-- = *pt; m--; } ot = oseq[i]+winsize+coreend-corestart+1; pt = seq[i]+coreend; if( coreext ) m = winsize; else m = 0; while( m && *(++pt) ) { if( *pt != '-' ) { *ot++ = *pt; m--; } } fprintf( stdout, ">%s\n", name[i] ); fprintf( stdout, "%s\n", oseq[i] ); } exit( 1 ); SHOWVERSION; return( 0 ); } mafft-7.505-without-extensions/core/filter.c0000644000175000017500000000717614224501721020445 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 double maxunusual; static double count_unusual( char *seq, char *usual ) { int i; char *pt; int count, len; count = 0; pt = seq; while( *pt ) { if( !strchr( usual, *pt ) ) count++; pt++; } // reporterr( "%d/%d=%f\n", count, pt-seq, ((double)count/(pt-seq)) ); return( (double)count / (pt-seq) ); } static void shortenN( char *seq, char unknown ) { int i; int status; char *out = seq; int unknownU = toupper(unknown); status = 0; while( *seq ) { if( unknownU != toupper(*seq) ) // hikouritsu? { *out++ = *seq++; status = 0; } else if( status == 0 ) { *out++ = unknown; seq++; status = 1; } else seq++; } *out = 0; } void arguments( int argc, char *argv[] ) { int c; maxunusual = 0.05; inputfile = NULL; dorp = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'm': maxunusual = myatof( *++argv ); fprintf( stderr, "maxunusual = %f\n", maxunusual ); --argc; goto nextoption; case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { FILE *infp; int nlenmin; char **name; char **seq; int *nlen; int i; char *usual; char unknown; int nout; char *tmpseq; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; // dorp = NOTSPECIFIED; getnumlen_casepreserve( infp, &nlenmin ); // fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); tmpseq = AllocateCharVec( nlenmax+1 ); // readData_pointer( infp, name, nlen, seq ); readData_pointer_casepreserve( infp, name, nlen, seq ); fclose( infp ); // for( i=0; i%s\n", name[i]+1 ); if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0; fprintf( origfp, "%s\n", seq[i] ); } fclose( origfp ); #endif if( dorp == 'p' ) { usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-"; unknown = 'X'; } else { usual = "ATGCUatgcu-"; unknown = 'n'; } nout = 0; for( i=0; i%s\n", name[i]+1 ); // fprintf( stdout, "%s\n", seq[i] ); fprintf( stdout, "%s\n", tmpseq ); // 2022/Apr nout++; } } if( nout < njob ) { if( dorp == 'p' ) fprintf( stderr, "\n\nRemoved %d sequence(s) where the frequency of ambiguous amino acids > %5.3f\n\n\n", njob-nout, maxunusual ); else fprintf( stderr, "\n\nRemoved %d sequence(s) where the frequency of ambiguous bases > %5.3f\n\n\n", njob-nout, maxunusual ); } free( nlen ); free( tmpseq ); FreeCharMtx( seq ); FreeCharMtx( name ); return( 0 ); } mafft-7.505-without-extensions/core/pairlocalalign.c0000644000175000017500000027012014224501721022130 0ustar nileshnilesh#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define SHISHAGONYU 0 // for debug #define NODIST -9999 static char *whereispairalign; static char *laraparams; static char foldalignopt[1000]; static int stdout_align; static int stdout_dist; static int store_localhom; static int store_dist; static int laste; static int lastm; static int lastsubopt; static int lastonce; typedef struct _lastres { int score; int start1; int start2; char *aln1; char *aln2; } Lastres; typedef struct _reg { int start; int end; } Reg; typedef struct _aln { int nreg; Reg *reg1; Reg *reg2; } Aln; typedef struct _lastresx { int score; int naln; Aln *aln; } Lastresx; #ifdef enablemultithread typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _thread_arg { int thread_no; int njob; Jobtable *jobpospt; char **name; char **seq; char **dseq; int *thereisxineachseq; LocalHom **localhomtable; double **distancemtx; double *selfscore; char ***bpp; Lastresx **lastresx; int alloclen; int *targetmap; double **expdist; pthread_mutex_t *mutex_counter; pthread_mutex_t *mutex_stdout; } thread_arg_t; #endif typedef struct _lastcallthread_arg { int nq, nd; char **dseq; char **qseq; Lastresx **lastresx; #ifdef enablemultithread int thread_no; int *kshare; pthread_mutex_t *mutex; #endif } lastcallthread_arg_t; static void t2u( char *seq ) { while( *seq ) { if ( *seq == 'A' ) *seq = 'a'; else if( *seq == 'a' ) *seq = 'a'; else if( *seq == 'T' ) *seq = 'u'; else if( *seq == 't' ) *seq = 'u'; else if( *seq == 'U' ) *seq = 'u'; else if( *seq == 'u' ) *seq = 'u'; else if( *seq == 'G' ) *seq = 'g'; else if( *seq == 'g' ) *seq = 'g'; else if( *seq == 'C' ) *seq = 'c'; else if( *seq == 'c' ) *seq = 'c'; else *seq = 'n'; seq++; } } static int removex( char *d, char *m ) { int val = 0; while( *m != 0 ) { if( *m == 'X' || *m == 'x' ) { m++; val++; } else { *d++ = *m++; } } *d = 0; return( val ); } static void putlocalhom_last( char *s1, char *s2, LocalHom *localhompt, Lastresx *lastresx, char korh ) { char *pt1, *pt2; int naln, nreg; int iscore; int isumscore; int sumoverlap; LocalHom *tmppt = localhompt; LocalHom *tmppt2; LocalHom *localhompt0; Reg *rpt1, *rpt2; Aln *apt; int nlocalhom = 0; int len; // fprintf( stderr, "s1=%s\n", s1 ); // fprintf( stderr, "s2=%s\n", s2 ); naln = lastresx->naln; apt = lastresx->aln; if( naln == 0 ) return; while( naln-- ) { rpt1 = apt->reg1; rpt2 = apt->reg2; nreg = apt->nreg; isumscore = 0; sumoverlap = 0; while( nreg-- ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = rpt1->start; tmppt->start2 = rpt2->start; tmppt->end1 = rpt1->end; tmppt->end2 = rpt2->end; tmppt->korh = 'h'; if( rpt1 == apt->reg1 ) localhompt0 = tmppt; // ? // fprintf( stderr, "in putlocalhom, reg1: %d-%d (nreg=%d)\n", rpt1->start, rpt1->end, lastresx->nreg ); // fprintf( stderr, "in putlocalhom, reg2: %d-%d (nreg=%d)\n", rpt2->start, rpt2->end, lastresx->nreg ); len = tmppt->end1 - tmppt->start1 + 1; // fprintf( stderr, "tmppt->start1=%d\n", tmppt->start1 ); // fprintf( stderr, "tmppt->start2=%d\n", tmppt->start2 ); // fprintf( stderr, "s1+tmppt->start1=%*.*s\n", len, len, s1+tmppt->start1 ); // fprintf( stderr, "s2+tmppt->start2=%*.*s\n", len, len, s2+tmppt->start2 ); pt1 = s1 + tmppt->start1; pt2 = s2 + tmppt->start2; iscore = 0; while( len-- ) { iscore += n_dis[(int)amino_n[(unsigned char)*pt1++]][(int)amino_n[(unsigned char)*pt2++]]; // - offset $B$O$$$i$J$$$+$b(B // fprintf( stderr, "len=%d, %c-%c, iscore(0) = %d\n", len, *(pt1-1), *(pt2-1), iscore ); } if( divpairscore ) { tmppt->overlapaa = tmppt->end2-tmppt->start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += tmppt->end2-tmppt->start2+1; } rpt1++; rpt2++; } #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif if( !divpairscore ) { for( tmppt2=localhompt0; tmppt2; tmppt2=tmppt2->next ) { tmppt2->overlapaa = sumoverlap; tmppt2->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } apt++; } } static int countcomma( char *s ) { int v = 0; while( *s ) if( *s++ == ',' ) v++; return( v ); } static double recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen ) { static FILE *fp = NULL; double value; char *aln1; char *aln2; int of1tmp, of2tmp; if( fp == NULL ) { fp = fopen( "_foldalignout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _foldalignout\n" ); exit( 1 ); } } aln1 = calloc( alloclen, sizeof( char ) ); aln2 = calloc( alloclen, sizeof( char ) ); readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen ); if( strstr( foldalignopt, "-global") ) { fprintf( stderr, "Calling G__align11\n" ); value = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap ); *of1pt = 0; *of2pt = 0; } else { fprintf( stderr, "Calling L__align11\n" ); value = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, of1pt, of2pt ); } // value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame if( aln1[0] == 0 ) { fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 ); } else { strcpy( *mseq1, aln1 ); strcpy( *mseq2, aln2 ); *of1pt = of1tmp; *of2pt = of2tmp; } // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara. // fclose( fp ); // saigo dake yatta houga yoi. // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); free( aln1 ); free( aln2 ); return( value ); } static void block2reg( char *block, Reg *reg1, Reg *reg2, int start1, int start2 ) { Reg *rpt1, *rpt2; char *tpt, *npt; int pos1, pos2; int len, glen1, glen2; pos1 = start1; pos2 = start2; rpt1 = reg1; rpt2 = reg2; while( block ) { block++; // fprintf( stderr, "block = %s\n", block ); tpt = strchr( block, ':' ); npt = strchr( block, ',' ); if( !tpt || tpt > npt ) { len = atoi( block ); reg1->start = pos1; reg2->start = pos2; pos1 += len - 1; pos2 += len - 1; reg1->end = pos1; reg2->end = pos2; // fprintf( stderr, "in loop reg1: %d-%d\n", reg1->start, reg1->end ); // fprintf( stderr, "in loop reg2: %d-%d\n", reg2->start, reg2->end ); reg1++; reg2++; } else { sscanf( block, "%d:%d", &glen1, &glen2 ); pos1 += glen1 + 1; pos2 += glen2 + 1; } block = npt; } reg1->start = reg1->end = reg2->start = reg2->end = -1; while( rpt1->start != -1 ) { // fprintf( stderr, "reg1: %d-%d\n", rpt1->start, rpt1->end ); // fprintf( stderr, "reg2: %d-%d\n", rpt2->start, rpt2->end ); rpt1++; rpt2++; } // *apt1 = *apt2 = 0; // fprintf( stderr, "aln1 = %s\n", aln1 ); // fprintf( stderr, "aln2 = %s\n", aln2 ); } static void readlastresx_singleq( FILE *fp, int n1, int nameq, Lastresx **lastresx ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; if( name2 != nameq ) { fprintf( stderr, "BUG!!!\n" ); exit( 1 ); } // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 1 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 1 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 1 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 1 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #ifdef enablemultithread #if 0 static void readlastresx_group( FILE *fp, Lastresx **lastresx ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 3 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #endif #endif static void readlastresx( FILE *fp, int n1, int n2, Lastresx **lastresx, char **seq1, char **seq2 ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 3 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #ifdef enablemultithread #if 0 static void *lastcallthread_group( void *arg ) { lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg; int k, i; int nq = targ->nq; int nd = targ->nd; #ifdef enablemultithread int thread_no = targ->thread_no; int *kshare = targ->kshare; #endif Lastresx **lastresx = targ->lastresx; char **dseq = targ->dseq; char **qseq = targ->qseq; char command[5000]; FILE *lfp; int msize; int klim; int qstart, qend, shou, amari; char kd[1000]; if( nthread ) { shou = nq / nthread; amari = nq - shou * nthread; fprintf( stderr, "shou: %d, amari: %d\n", shou, amari ); qstart = thread_no * shou; if( thread_no - 1 < amari ) qstart += thread_no; else qstart += amari; qend = qstart + shou - 1; if( thread_no < amari ) qend += 1; fprintf( stderr, "%d: %d-%d\n", thread_no, qstart, qend ); } k = -1; while( 1 ) { if( nthread ) { if( qstart > qend ) break; if( k == thread_no ) break; fprintf( stderr, "\n%d-%d / %d (thread %d) \n", qstart, qend, nq, thread_no ); k = thread_no; } else { k++; if( k == nq ) break; fprintf( stderr, "\r%d / %d \r", k, nq ); } if( alg == 'R' ) // if 'r' -> calllast_fast { fprintf( stderr, "Not supported\n" ); exit( 1 ); } else // 'r' { kd[0] = 0; } sprintf( command, "_q%d", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open %s", command ); exit( 1 ); } for( i=qstart; i<=qend; i++ ) fprintf( lfp, ">%d\n%s\n", i, qseq[i] ); fclose( lfp ); // if( alg == 'R' ) msize = MAX(10,k+nq); // else msize = MAX(10,nd+nq); if( alg == 'R' ) msize = MAX(10,k*lastm); else msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k ); // sprintf( command, "grep '>' _db%sd", kd ); // system( command ); sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k ); if( system( command ) ) exit( 1 ); sprintf( command, "_lastres%d", k ); lfp = fopen( command, "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres%d", k ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); // fprintf( stderr, "Reading lastres\n" ); readlastresx_group( lfp, lastresx ); fclose( lfp ); } return( NULL ); } #endif #endif static void *lastcallthread( void *arg ) { lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg; int k, i; int nq = targ->nq; int nd = targ->nd; #ifdef enablemultithread int thread_no = targ->thread_no; int *kshare = targ->kshare; #endif Lastresx **lastresx = targ->lastresx; char **dseq = targ->dseq; char **qseq = targ->qseq; char command[5000]; FILE *lfp; int msize; int klim; char kd[1000]; k = -1; while( 1 ) { #ifdef enablemultithread if( nthread ) { pthread_mutex_lock( targ->mutex ); k = *kshare; if( k == nq ) { pthread_mutex_unlock( targ->mutex ); break; } fprintf( stderr, "\r%d / %d (thread %d) \r", k, nq, thread_no ); ++(*kshare); pthread_mutex_unlock( targ->mutex ); } else #endif { k++; if( k == nq ) break; fprintf( stderr, "\r%d / %d \r", k, nq ); } if( alg == 'R' ) // if 'r' -> calllast_fast { klim = MIN( k, njob-nadd ); // klim = k; // dochira demo yoi if( klim == k ) { sprintf( command, "_db%dd", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _db." ); exit( 1 ); } for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); // sprintf( command, "md5sum _db%dd > /dev/tty", k ); // system( command ); if( dorp == 'd' ) sprintf( command, "%s/lastdb _db%dd _db%dd", whereispairalign, k, k ); else sprintf( command, "%s/lastdb -p _db%dd _db%dd", whereispairalign, k, k ); system( command ); sprintf( kd, "%d", k ); } else // calllast_fast de tsukutta nowo riyou { kd[0] = 0; // fprintf( stderr, "klim=%d, njob=%d, nadd=%d, skip!\n", klim, njob, nadd ); } } else // 'r' { kd[0] = 0; } sprintf( command, "_q%d", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open %s", command ); exit( 1 ); } fprintf( lfp, ">%d\n%s\n", k, qseq[k] ); fclose( lfp ); // if( alg == 'R' ) msize = MAX(10,k+nq); // else msize = MAX(10,nd+nq); if( alg == 'R' ) msize = MAX(10,k*lastm); else msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k ); // sprintf( command, "grep '>' _db%sd", kd ); // system( command ); sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k ); if( system( command ) ) exit( 1 ); sprintf( command, "_lastres%d", k ); lfp = fopen( command, "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres%d", k ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); // fprintf( stderr, "Reading lastres\n" ); readlastresx_singleq( lfp, nd, k, lastresx ); fclose( lfp ); } return( NULL ); } static void calllast_fast( int nd, char **dseq, int nq, char **qseq, Lastresx **lastresx ) { int i, j; FILE *lfp; char command[1000]; lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } if( dorp == 'd' ) { fprintf( lfp, " " ); for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<4; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } } else { fprintf( lfp, " " ); for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<20; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } } fclose( lfp ); // if( alg == 'r' ) // if 'R' -> lastcallthread, kokonoha nadd>0 no toki nomi shiyou { sprintf( command, "_dbd" ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _dbd" ); exit( 1 ); } if( alg == 'R' ) j = njob-nadd; else j = nd; for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); if( dorp == 'd' ) sprintf( command, "%s/lastdb _dbd _dbd", whereispairalign ); else sprintf( command, "%s/lastdb -p _dbd _dbd", whereispairalign ); system( command ); } #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; lastcallthread_arg_t *targ; int *ksharept; targ = (lastcallthread_arg_t *)calloc( nthread, sizeof( lastcallthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); ksharept = calloc( 1, sizeof(int) ); *ksharept = 0; pthread_mutex_init( &mutex, NULL ); for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); if( dorp == 'd' ) { sprintf( command, "%s/lastdb _db _db", whereispairalign ); system( command ); lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } fprintf( lfp, " " ); for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<4; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } fclose( lfp ); #if 0 sprintf( command, "lastex -s 2 -a %d -b %d -p _scoringmatrixforlast -E 10000 _db.prj _db.prj > _lastex", -penalty, -penalty_ex ); system( command ); lfp = fopen( "_lastex", "r" ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); laste = atoi( command ); fclose( lfp ); fprintf( stderr, "laste = %d\n", laste ); sleep( 10 ); #else // laste = 5000; #endif } else { sprintf( command, "%s/lastdb -p _db _db", whereispairalign ); system( command ); lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } fprintf( lfp, " " ); for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<20; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } fclose( lfp ); // fprintf( stderr, "Not written yet\n" ); } lfp = fopen( "_q", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _q" ); exit( 1 ); } for( i=0; i%d\n%s\n", i, qseq[i] ); } fclose( lfp ); msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from calllast_once, msize=%d\n", msize ); sprintf( command, "%s/lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", whereispairalign, msize, laste, -penalty, -penalty_ex ); // sprintf( command, "lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", 1, laste, -penalty, -penalty_ex ); // sprintf( command, "lastal -v -e 40 -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", -penalty, -penalty_ex ); res = system( command ); if( res ) { fprintf( stderr, "LAST aborted\n" ); exit( 1 ); } lfp = fopen( "_lastres", "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres" ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); fprintf( stderr, "Reading lastres\n" ); readlastresx( lfp, nd, nq, lastresx, dseq, qseq ); fclose( lfp ); } static void callfoldalign( int nseq, char **mseq ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); res = system( com ); if( res ) { fprintf( stderr, "Error in foldalign\n" ); exit( 1 ); } } static void calllara( int nseq, char **mseq, char *laraarg ) { FILE *fp; int i; int res; static char com[10000]; // for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); // fprintf( stderr, "calling LaRA\n" ); sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); res = system( com ); if( res ) { fprintf( stderr, "Error in lara\n" ); exit( 1 ); } } static double recalllara( char **mseq1, char **mseq2, int alloclen ) { static FILE *fp = NULL; static char *ungap1; static char *ungap2; static char *ori1; static char *ori2; // int res; static char com[10000]; double value; if( fp == NULL ) { fp = fopen( "_laraout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _laraout\n" ); exit( 1 ); } ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); myfgets( com, 9999, fp ); strcpy( *mseq1, com ); myfgets( com, 9999, fp ); strcpy( *mseq2, com ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); t2u( ori1 ); t2u( ori2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); // fclose( fp ); // saigo dake yatta houga yoi. return( value ); } static double calldafs_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j ) { FILE *fp; int res; char *com; double value; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); sprintf( dirname, "_%d-%d", i, j ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); sprintf( com, "%s/_bpporg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname ); system( com ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); sprintf( com, "%s/_dafsinorg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_dafsinorg\n", dirname ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_dafsinorg > %s/_dafsin", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "_dafssh%s", dirname ); fp = fopen( com, "w" ); fprintf( fp, "cd %s\n", dirname ); fprintf( fp, "%s/dafs --mafft-in _bpp _dafsin > _dafsout 2>_dum\n", whereispairalign ); fprintf( fp, "exit $tatus\n" ); fclose( fp ); sprintf( com, "tr -d '\\r' < _dafssh%s > _dafssh%s.unix", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "sh _dafssh%s.unix 2>_dum%s", dirname, dirname ); res = system( com ); if( res ) { fprintf( stderr, "Error in dafs\n" ); exit( 1 ); } sprintf( com, "%s/_dafsout", dirname ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_dafsout\n", dirname ); exit( 1 ); } myfgets( com, 999, fp ); // nagai kanousei ga arunode fgets( com, 999, fp ); myfgets( com, 999, fp ); // nagai kanousei ga arunode fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); #if 0 sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); usleep( 2000 ); system( com ); } #endif free( dirname ); free( com ); return( value ); } static double callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j ) { FILE *fp; int res; char *com; double value; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); sprintf( dirname, "_%d-%d", i, j ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); sprintf( com, "%s/_bpporg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname ); system( com ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); sprintf( com, "%s/_mxscarnainorg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_mxscarnainorg\n", dirname ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_mxscarnainorg > %s/_mxscarnain", dirname, dirname ); system( com ); // for cygwin, wakaran #if 0 sprintf( com, "cd %s; %s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", dirname, whereispairalign ); #else sprintf( com, "_mxscarnash%s", dirname ); fp = fopen( com, "w" ); fprintf( fp, "cd %s\n", dirname ); fprintf( fp, "%s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum\n", whereispairalign ); fprintf( fp, "exit $tatus\n" ); fclose( fp ); //sleep( 10000 ); sprintf( com, "tr -d '\\r' < _mxscarnash%s > _mxscarnash%s.unix", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "sh _mxscarnash%s.unix 2>_dum%s", dirname, dirname ); #endif res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } sprintf( com, "%s/_mxscarnaout", dirname ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_mxscarnaout\n", dirname ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); #if 0 sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); usleep( 2000 ); system( com ); } #endif free( dirname ); free( com ); return( value ); } static void readhat4( FILE *fp, char ***bpp ) { char oneline[1000]; int bppsize; int onechar; // double prob; // int posi, posj; bppsize = 0; // fprintf( stderr, "reading hat4\n" ); onechar = getc(fp); // fprintf( stderr, "onechar = %c\n", onechar ); if( onechar != '>' ) { fprintf( stderr, "Format error\n" ); exit( 1 ); } ungetc( onechar, fp ); fgets( oneline, 999, fp ); while( 1 ) { onechar = getc(fp); ungetc( onechar, fp ); if( onechar == '>' || onechar == EOF ) { // fprintf( stderr, "Next\n" ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = NULL; break; } fgets( oneline, 999, fp ); // fprintf( stderr, "oneline=%s\n", oneline ); // sscanf( oneline, "%d %d %lf", &posi, &posj, &prob ); // fprintf( stderr, "%d %d -> %f\n", posi, posj, prob ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = calloc( 100, sizeof( char ) ); strcpy( (*bpp)[bppsize], oneline ); bppsize++; } } static void preparebpp( int nseq, char ***bpp ) { FILE *fp; int i; fp = fopen( "hat4", "r" ); if( !fp ) { fprintf( stderr, "Cannot open hat4\n" ); exit( 1 ); } for( i=0; i 0 && (*++argv)[0] == '-' ) { // reporterr( "(*argv)[0] in while loop = %s\n", (*argv) ); while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; // fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; // fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; #if 0 case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #else case 'l': if( atof( *++argv ) < 0.00001 ) store_localhom = 0; --argc; goto nextoption; #endif case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'p': laraparams = *++argv; fprintf( stderr, "laraparams = %s\n", laraparams ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); // fprintf( stderr, "nthread = %d\n", nthread ); --argc; #ifndef enablemultithread nthread = 0; #endif goto nextoption; case 'I': nadd = myatoi( *++argv ); // fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'w': lastm = myatoi( *++argv ); fprintf( stderr, "lastm = %d\n", lastm ); --argc; goto nextoption; case 'e': laste = myatoi( *++argv ); fprintf( stderr, "laste = %d\n", laste ); --argc; goto nextoption; case 'u': specificityconsideration = (double)myatof( *++argv ); // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame. break; case 'c': stdout_dist = 1; break; case 'n': stdout_align = 1; break; case 'x': store_localhom = 0; store_dist = 0; break; #if 1 case 'a': fmodel = 1; break; #endif #if 0 case 'r': fmodel = -1; break; #endif case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; #endif #if 0 case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; case 'a': alg = 'a'; break; case 'S': alg = 'S'; break; #endif case 'U': lastonce = 1; break; case 'S': lastsubopt = 1; break; case 't': alg = 't'; store_localhom = 0; break; case 'L': alg = 'L'; break; case 'Y': alg = 'Y'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> L; break; case 'Z': usenaivescoreinsteadofalignmentscore = 1; break; case 's': alg = 's'; break; case 'G': alg = 'G'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'r': alg = 'r'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> R, last break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; case '=': specifictarget = 1; break; case ':': nwildcard = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'o': // foldalignopt = *++argv; strcat( foldalignopt, " " ); strcat( foldalignopt, *++argv ); fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); --argc; goto nextoption; #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; #endif default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "pairlocalalign options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static double score2dist( double pscore, double selfscore1, double selfscore2) { double val; double bunbo; // fprintf( stderr, "In score2dist\n" ); if( (bunbo=MIN( selfscore1, selfscore2 )) == 0.0 ) val = 2.0; else if( bunbo < pscore ) // mondai ari val = 0.0; else val = ( 1.0 - pscore / bunbo ) * 2.0; return( val ); } #if enablemultithread static void *athread( void *arg ) // alg='R', alg='r' -> tsukawarenai. { thread_arg_t *targ = (thread_arg_t *)arg; int i, ilim, j, jst; int off1, off2, dum1, dum2, thereisx; int intdum; double pscore = 0.0; // by D.Mathog double *effarr1; double *effarr2; char **mseq1, **mseq2, **distseq1, **distseq2, **dumseq1, **dumseq2; char **aseq; double **dynamicmtx = NULL; double dist; double scoreoffset; // thread_arg int thread_no = targ->thread_no; int njob = targ->njob; Jobtable *jobpospt = targ->jobpospt; char **name = targ->name; char **seq = targ->seq; char **dseq = targ->dseq; int *thereisxineachseq = targ->thereisxineachseq; LocalHom **localhomtable = targ->localhomtable; double **distancemtx = targ->distancemtx; double *selfscore = targ->selfscore; char ***bpp = targ->bpp; Lastresx **lastresx = targ->lastresx; int alloclen = targ->alloclen; int *targetmap = targ->targetmap; double **expdist = targ->expdist; // fprintf( stderr, "thread %d start!\n", thread_no ); effarr1 = AllocateDoubleVec( 1 ); effarr2 = AllocateDoubleVec( 1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); if( alg == 'N' ) { dumseq1 = AllocateCharMtx( 1, alloclen+10 ); dumseq2 = AllocateCharMtx( 1, alloclen+10 ); } distseq1 = AllocateCharMtx( 1, 0 ); distseq2 = AllocateCharMtx( 1, 0 ); aseq = AllocateCharMtx( 2, alloclen+10 ); if( specificityconsideration > 0.0 ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd; else ilim = njob - 1; while( 1 ) { pthread_mutex_lock( targ->mutex_counter ); j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; if( alg == 'Y' || alg == 'r' ) jst = njob - nadd; else jst = i + 1; j = jst; if( i == ilim ) { // fprintf( stderr, "thread %d end!\n", thread_no ); pthread_mutex_unlock( targ->mutex_counter ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // 20130603 G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11( NULL, 0.0, NULL, NULL, 0, NULL, NULL ); L__align11_noalign( NULL, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); free( effarr1 ); free( effarr2 ); free( mseq1 ); free( mseq2 ); if( alg == 'N' ) { FreeCharMtx( dumseq1 ); FreeCharMtx( dumseq2 ); } free( distseq1 ); free( distseq2 ); FreeCharMtx( aseq ); if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); return( NULL ); } } jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex_counter ); // if( j == i+1 || j % 100 == 0 ) if( j == i+1 && i % 10 == 0 ) { fprintf( stderr, "% 5d / %d (by thread %3d) \r", i, njob-nadd, thread_no ); // fprintf( stderr, "% 5d - %5d / %d (thread %d)\n", i, j, njob, thread_no ); } if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 ) { if( store_dist ) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = 3.0; else distancemtx[i][j-i] = 3.0; } if( stdout_dist) { pthread_mutex_lock( targ->mutex_stdout ); fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 3.0 ); pthread_mutex_unlock( targ->mutex_stdout ); } continue; } strcpy( aseq[0], seq[i] ); strcpy( aseq[1], seq[j] ); // clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 ); // clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 ); // fprintf( stderr, "Skipping conjuction..\n" ); effarr1[0] = 1.0; effarr2[0] = 1.0; mseq1[0] = aseq[0]; mseq2[0] = aseq[1]; thereisx = thereisxineachseq[i] + thereisxineachseq[j]; // strcpy( distseq1[0], dseq[i] ); // nen no tame // strcpy( distseq2[0], dseq[j] ); // nen no tame distseq1[0] = dseq[i]; distseq2[0] = dseq[j]; // fprintf( stderr, "mseq1 = %s\n", mseq1[0] ); // fprintf( stderr, "mseq2 = %s\n", mseq2[0] ); #if 0 fprintf( stderr, "group1 = %.66s", indication1 ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); fprintf( stderr, "\n" ); #endif // for( l=0; l 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( ( scoreoffset = dist2offset( dist ) ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); L__align11( dynamicmtx, scoreoffset, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); } } // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // CHUUI!!!!!! break; case( 'Y' ): if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) // new sequence vs exiting sequence nomi keisan { if( usenaivescoreinsteadofalignmentscore ) { L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { if( store_localhom ) { pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); } } else pscore = 0.0; break; case( 'A' ): if( usenaivescoreinsteadofalignmentscore ) { G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { // if( store_localhom ) if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) ) { pscore = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap ); if( thereisx ) pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki #if 1 if( specificityconsideration > 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); // dist = score2dist( L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ), selfscore[i], selfscore[j] ); // 2014/Feb/20 if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); G__align11( dynamicmtx, mseq1, mseq2, alloclen, outgap, outgap ); } // pscore = (double)naivepairscore11( *mseq1, *mseq2, 0.0 ); } // #endif } else pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki } off1 = off2 = 0; break; case( 'N' ): if( usenaivescoreinsteadofalignmentscore ) { genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen ); pscore = genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) { strcpy( dumseq1[0], distseq1[0] ); strcpy( dumseq2[0], distseq2[0] ); pscore = genL__align11( n_dis_consweight_multi, dumseq1, dumseq2, alloclen, &dum1, &dum2 ); // uwagaki } #if 1 if( specificityconsideration > 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); genL__align11( dynamicmtx, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } break; case( 't' ): off1 = off2 = 0; // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen ); pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // tsuneni distseq shiyou break; case( 's' ): pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; case( 'G' ): pscore = calldafs_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; #if 0 case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen ); off1 = off2 = 0; break; case( 'K' ): pscore = genG__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen ); off1 = off2 = 0; break; case( 'H' ): pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen ); break; case( 'B' ): case( 'T' ): pscore = recalllara( mseq1, mseq2, alloclen ); off1 = off2 = 0; break; case( 'M' ): pscore = MSalign11( mseq1, mseq2, alloclen ); break; #endif default: ErrorExit( "\n\nERROR IN SOURCE FILE\n\n" ); } } if( alg == 't' || ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) // 't' no jouken ha iranai to omou. if( ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) { #if SCOREOUT fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j ); #endif // if( pscore > 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) x-ins-i de seido teika if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) { if( !store_localhom ) ; else if( specifictarget && targetmap[i] == -1 && targetmap[j] == -1) ; else if( alg == 'R' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j, 'h' ); else if( alg == 'r' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd), 'h' );// ????? else if( alg == 'H' ) putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( alg == 'Y' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( !specifictarget && alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-i, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else // putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); { if( targetmap[i] != -1 && targetmap[j] != -1 ) { putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); // sukoshi muda. } else if( targetmap[j] != -1 ) putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); else if( targetmap[i] != -1 ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); #if 0 if( targetmap[i] != -1 ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( targetmap[j] != -1 ) putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); #endif else { reporterr( "okashii\n" ); exit( 1 ); } } } pscore = score2dist( pscore, selfscore[i], selfscore[j] ); // pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // pscore = score2dist( pscore, selfscore[i], selfscore[j] ); // reporterr( "->pscore = %f\n", pscore ); } else { pscore = 2.0; } #if 1 // mutex if( stdout_align ) { pthread_mutex_lock( targ->mutex_stdout ); if( alg != 't' ) { fprintf( stdout, "sequence %d - sequence %d, pairwise distance = %10.5f\n", i+1, j+1, pscore ); fprintf( stdout, ">%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } pthread_mutex_unlock( targ->mutex_stdout ); } if( stdout_dist ) { pthread_mutex_lock( targ->mutex_stdout ); if( j == i+1 ) fprintf( stdout, "%d %d d=%.3f\n", i+1, i+1, 0.0 ); fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore ); pthread_mutex_unlock( targ->mutex_stdout ); } #endif // mutex if( store_dist ) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore; else distancemtx[i][j-i] = pscore; } } } #endif static void pairalign( char **name, int *nlen, char **seq, char **aseq, char **dseq, int *thereisxineachseq, char **mseq1, char **mseq2, int alloclen, Lastresx **lastresx, double **distancemtx, LocalHom **localhomtable, double **expdist, int ngui ) { int i, j, ilim, jst, jj; int off1, off2, dum1, dum2, thereisx; double pscore = 0.0; // by D.Mathog FILE *hat2p, *hat3p; // double **distancemtx; double *selfscore; double *effarr1; double *effarr2; char *pt; char *hat2file = "hat2"; // LocalHom **localhomtable = NULL, LocalHom *tmpptr; int intdum; char ***bpp = NULL; // mxscarna no toki dake char **distseq1, **distseq2; char **dumseq1, **dumseq2; double dist; double scoreoffset; int ntarget; int *targetmap, *targetmapr; if( specifictarget ) { targetmap = calloc( njob, sizeof( int ) ); ntarget = 0; for( i=0; i_focus_' to the title lines of the sequences to be focused on.\n\n" ); exit( 1 ); } else { reporterr( "nfocus = %d \n", ntarget ); } } else { ntarget = njob; targetmap = calloc( njob, sizeof( int ) ); targetmapr = calloc( njob, sizeof( int ) ); for( i=0; i 0 ) // alg=='r' || alg=='R' -> nthread:=0 (sukoshi ue) { Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex_counter; pthread_mutex_t mutex_stdout; thread_arg_t *targ; if( alg == 'Y' || alg == 'r' ) jobpos.j = njob - nadd - 1; else jobpos.j = 0; jobpos.i = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); pthread_mutex_init( &mutex_stdout, NULL ); for( i=0; i 0.0 ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd; else ilim = njob - 1; for( i=0; i 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); // dist = score2dist( L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ), selfscore[i], selfscore[j] ); // 2014/Feb/20 // reporterr( "dist(%d,%d)=%f\n", i, j, dist ); if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); G__align11( dynamicmtx, mseq1, mseq2, alloclen, outgap, outgap ); } // pscore = (double)naivepairscore11( *mseq1, *mseq2, 0.0 ); } #endif } else pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki } off1 = off2 = 0; break; case( 'N' ): // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen ); if( usenaivescoreinsteadofalignmentscore ) { genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { pscore = genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) { strcpy( dumseq1[0], distseq1[0] ); strcpy( dumseq2[0], distseq2[0] ); pscore = genL__align11( n_dis_consweight_multi, dumseq1, dumseq2, alloclen, &dum1, &dum2 ); // uwagaki } #if 1 if( specificityconsideration > 0.0 ) { // fprintf( stderr, "dist = %f\n", score2dist( pscore, selfscore[i], selfscore[j] ) ); if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); genL__align11( dynamicmtx, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } break; case( 'R' ): if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi pscore = 0.0; else pscore = (double)lastresx[i][j].score; // all pair break; case( 'r' ): if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) pscore = (double)lastresx[i][j-(njob-nadd)].score; else pscore = 0.0; break; case( 'L' ): if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi pscore = 0.0; else { if( usenaivescoreinsteadofalignmentscore ) { L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { // if( store_localhom ) if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) ) { pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); // all pair if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // all pair #if 1 if( specificityconsideration > 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( ( scoreoffset = dist2offset( dist ) ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); L__align11( dynamicmtx, scoreoffset, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // all pair } } // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // CHUUI!!!!!! break; case( 'Y' ): if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) // new sequence vs exiting sequence nomi keisan { if( usenaivescoreinsteadofalignmentscore ) { L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { if( store_localhom ) { pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); } } else pscore = 0.0; break; case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, 1, 1, alloclen ); off1 = off2 = 0; break; #if 0 case( 'K' ): pscore = genG__align11( mseq1, mseq2, alloclen ); off1 = off2 = 0; break; #endif case( 'H' ): pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen ); break; case( 'B' ): case( 'T' ): pscore = recalllara( mseq1, mseq2, alloclen ); off1 = off2 = 0; break; case( 's' ): pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; case( 'G' ): pscore = calldafs_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; case( 'M' ): pscore = MSalign11( mseq1, mseq2, alloclen ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } if( alg == 't' || ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) // 't' no jouken ha iranai to omou. if( ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) { #if SCOREOUT fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j ); #endif // if( pscore > 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) // x-ins-i de seido teika if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) { if( !store_localhom ) ; else if( specifictarget && targetmap[i] == -1 && targetmap[j] == -1) ; else if( alg == 'R' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j, 'h' ); else if( alg == 'r' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd), 'h' );// ????? else if( alg == 'H' ) putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( alg == 'Y' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( !specifictarget && alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-i, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else { if( targetmap[i] != -1 && targetmap[j] != -1 ) { putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); // sukoshi muda. } else if( targetmap[j] != -1 ) putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); else if( targetmap[i] != -1 ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else { reporterr( "okashii\n" ); exit( 1 ); } } } pscore = score2dist( pscore, selfscore[i], selfscore[j] ); } else { pscore = 2.0; } if( stdout_align ) { if( alg != 't' ) { fprintf( stdout, "sequence %d - sequence %d, pairwise distance = %10.5f\n", i+1, j+1, pscore ); fprintf( stdout, ">%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } } if( stdout_dist ) fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore ); if( store_dist) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore; else distancemtx[i][j-i] = pscore; } } } if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); } if( store_dist && ngui == 0 ) { hat2p = fopen( hat2file, "w" ); if( !hat2p ) ErrorExit( "Cannot open hat2." ); if( alg == 'Y' || alg == 'r' ) WriteHat2_part_pointer( hat2p, njob, nadd, name, distancemtx ); else // WriteHat2_pointer( hat2p, njob, name, distancemtx ); WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, distancemtx ); // jissiha double fclose( hat2p ); } hat3p = fopen( "hat3", "w" ); if( !hat3p ) ErrorExit( "Cannot open hat3." ); if( store_localhom && ngui == 0 ) { fprintf( stderr, "\n\n##### writing hat3\n" ); if( alg == 'Y' || alg == 'r' ) ilim = njob-nadd; else if( specifictarget ) ilim = ntarget; else ilim = njob-1; for( i=0; inext ) { // fprintf( stderr, "j=%d, jj=%d\n", j, jj ); if( tmpptr->opt == -1.0 ) continue; // tmptmptmptmptmp // if( alg == 'B' || alg == 'T' ) // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); // else if( targetmap[j] == -1 || targetmap[i] < targetmap[j] ) fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", targetmapr[i], j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2+1, tmpptr->end2+1 ); // zettai dame!!!! } } } // if( ngui == 0 ) // { #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif if( alg == 'Y' || alg == 'r' ) FreeLocalHomTable_part( localhomtable, (njob-nadd), nadd ); else if( specifictarget ) FreeLocalHomTable_part( localhomtable, ntarget, njob ); else FreeLocalHomTable_half( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif // } } fclose( hat3p ); if( alg == 's' ) { char **ptpt; for( i=0; i M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try --6merpair --addfragments for such large data.\n" ); exit( 1 ); } } if( ( alg == 'r' || alg == 'R' ) && dorp == 'p' ) { fprintf( stderr, "Not yet supported\n" ); exit( 1 ); } alloclen = nlenmax*2; if( ngui ) { seq = seqgui; name = namegui; } else { seq = AllocateCharMtx( njob, alloclen+10 ); name = AllocateCharMtx( njob, B ); } aseq = AllocateCharMtx( 2, alloclen+10 ); bseq = AllocateCharMtx( njob, alloclen+10 ); dseq = AllocateCharMtx( njob, alloclen+10 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); nlen = AllocateIntVec( njob ); thereisxineachseq = AllocateIntVec( njob ); if( alg == 'R' ) { lastresx = calloc( njob+1, sizeof( Lastresx * ) ); for( i=0; i 0 ) // protein, not text. hitsuyou? { for( i=0; i 1 ) { fprintf( stderr, "\nThe order of distances is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself, using sort -n -k 2 | sort -n -k 1 -s\n" ); } if( stdout_align && nthread > 1 ) { fprintf( stderr, "\nThe order of pairwise alignments is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself.\n" ); } #if 1 if( lastresx ) { for( i=0; lastresx[i]; i++ ) { for( j=0; lastresx[i][j].naln!=-1; j++ ) { for( k=0; k0; i-- ) { if( alignmentlength != strlen( seq[i] ) ) { fprintf( stderr, "#################################################################################\n" ); fprintf( stderr, "# ERROR! \n" ); fprintf( stderr, "# For the --add option, the original%4d sequences must be aligned \n", njob-nadd ); fprintf( stderr, "#################################################################################\n" ); exit( 1 ); } } } if( specifictarget ) { reporterr( "specifictarget\n" ); ntarget = 0; for( i=0; i_focus_' to the title lines of the sequences to be focused on.\n\n" ); exit( 1 ); } else { reporterr( "nfocus = %d \n", ntarget ); } } else { ntarget = njob; // targetmap = calloc( njob, sizeof( int ) ); // targetmapr = calloc( njob, sizeof( int ) ); // for( i=0; i #include #define DEBUG 0 #define TEST 0 int howmanyx( char *s ) { int val = 0; if( scoremtx == -1 ) { do { if( !strchr( "atgcuATGCU", *s ) ) val++; } while( *++s ); } else { do { if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; } while( *++s ); } return( val ); } void arguments( int argc, char *argv[] ) { int c; disopt = 0; while( --argc > 0 && (*++argv)[0] == '-' ) while ( c = *++argv[0] ) switch( c ) { case 'i': disopt = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } if( argc != 0 ) { fprintf( stderr, "options: -i\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { int ktuple; int i, j; FILE *hat2p; char **seq; char **seq1; static char name[M][B]; static char name1[M][B]; static int nlen1[M]; double **mtx; double **mtx2; static int nlen[M]; char b[B]; double max; char com[B]; int opt[M]; int res; char *home; char queryfile[B]; char datafile[B]; char fastafile[B]; char hat2file[B]; int pid = (int)getpid(); #if 0 home = getenv( "HOME" ); #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ home = NULL; #endif #if DEBUG if( home ) fprintf( stderr, "home = %s\n", home ); #endif if( !home ) home = ""; sprintf( queryfile, "%s/tmp/query-%d\0", home, pid ); sprintf( datafile, "%s/tmp/data-%d\0", home, pid ); sprintf( fastafile, "%s/tmp/fasta-%d\0", home, pid ); sprintf( hat2file, "hat2-%d\0", pid ); arguments( argc, argv ); #if 0 PreRead( stdin, &njob, &nlenmax ); #else getnumlen( stdin ); #endif rewind( stdin ); seq = AllocateCharMtx( njob, nlenmax+1 ); seq1 = AllocateCharMtx( 2, nlenmax+1 ); mtx = AllocateDoubleMtx( njob, njob ); mtx2 = AllocateDoubleMtx( njob, njob ); #if 0 FRead( stdin, name, nlen, seq ); #else readData( stdin, name, nlen, seq ); #endif if( scoremtx == -1 ) ktuple = 6; else ktuple = 1; for( i=0; i %s\0", M, M, 0, queryfile, datafile, ktuple, fastafile ); else sprintf( com, "fasta3 -Q -h -b%d -E%d -d%d %s %s %d > %s\0", M, M, 0, queryfile, datafile, ktuple, fastafile ); res = system( com ); if( res ) ErrorExit( "error in fasta" ); hat2p = fopen( fastafile, "r" ); if( hat2p == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); ReadFasta3( hat2p, mtx[i], njob-i, name1 ); if( i == 0 ) for( j=0; j %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); } } } for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'e': eregfile = *++argv; fprintf( stderr, "eregfile = %s\n", eregfile ); --argc; goto nextoption; case 'r': regfile = *++argv; fprintf( stderr, "regfile = %s\n", regfile ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'n' : outnumber = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } void readereg( FILE *regfp, int **regtable, char **revtable, int *outtable, int *noutpt, int *loutpt ) { char gett[1000]; int j; int mem; char cmem; char reg[5][100]; char out[100]; int startpos, endpos; *noutpt = 0; *loutpt = 0; fgets( gett, 999, regfp ); reg[0][0] = reg[1][0] = reg[2][0] = reg[3][0] = reg[4][0] = 'n'; reg[0][1] = reg[1][1] = reg[2][1] = reg[3][1] = reg[4][1] = 0; sscanf( gett, "%c %s %s %s %s %s", &cmem, reg[0], reg[1], reg[2], reg[3], reg[4] ); if( cmem != 'e' ) { fprintf( stderr, "Format error\n" ); exit( 1 ); } for( j=0; j<5; j++ ) { // reporterr( "reg[j]=%s\n", reg[j] ); sscanf( reg[j], "%d-%d-%c", regtable[0]+(j*2), regtable[0]+(j*2)+1, revtable[0]+j ); fprintf( stderr, "%d %d-%d\n", 0, regtable[0][j*2], regtable[0][j*2+1] ); startpos = regtable[0][j*2]; endpos = regtable[0][j*2+1]; // reporterr( "startpod=%d, endpos=%d, *loutpt=%d\n", startpos, endpos, *loutpt ); if( startpos > endpos ) { endpos = regtable[0][j*2]; startpos = regtable[0][j*2+1]; } if( startpos != -1 && endpos != -1 ) *loutpt += endpos - startpos + 1; } while( 1 ) { fgets( gett, 999, regfp ); if( feof( regfp ) ) break; sscanf( gett, "%d o=%s", &mem, out ); if( mem >= njob ) { fprintf( stderr, "Out of range\n" ); exit( 1 ); } outtable[mem] = atoi( out ); if( outtable[mem] ) *noutpt += 1; } } void readreg( FILE *regfp, int **regtable, char **revtable, int *outtable ) { char gett[1000]; int j; int mem; char reg[5][100]; char out[100]; while( 1 ) { fgets( gett, 999, regfp ); if( feof( regfp ) ) break; sscanf( gett, "%d %s %s %s %s %s o=%s", &mem, reg[0], reg[1], reg[2], reg[3], reg[4], out ); if( mem >= njob ) { fprintf( stderr, "Out of range\n" ); exit( 1 ); } for( j=0; j<5; j++ ) { sscanf( reg[j], "%d-%d-%c", regtable[mem]+(j*2), regtable[mem]+(j*2)+1, revtable[mem]+j ); fprintf( stderr, "%d %d-%d\n", mem, regtable[mem][j*2], regtable[mem][j*2+1] ); } outtable[mem] = atoi( out ); } } int main( int argc, char *argv[] ) { FILE *infp; FILE *regfp; int nlenmin; int **regtable; char **revtable; int *outtable; int i, nout, lout; char **outseq; char **name; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; dorp = NOTSPECIFIED; getnumlen_nogap( infp, &nlenmin ); if( regfile ) { regfp = fopen( regfile, "r" ); if( !regfp ) { fprintf( stderr, "Cannot open %s\n", regfile ); exit( 1 ); } regtable = AllocateIntMtx( njob, 5*2 ); revtable = AllocateCharMtx( njob, 5 ); outtable = AllocateIntVec( njob ); readreg( regfp, regtable, revtable, outtable ); cutData( infp, regtable, revtable, outtable ); } else if( eregfile ) { regfp = fopen( eregfile, "r" ); if( !regfp ) { fprintf( stderr, "Cannot open %s\n", eregfile ); exit( 1 ); } regtable = AllocateIntMtx( 1, 5*2 ); revtable = AllocateCharMtx( 1, 5 ); outtable = AllocateIntVec( njob ); readereg( regfp, regtable, revtable, outtable, &nout, &lout ); fprintf( stderr, "nout = %d, lout = %d\n", nout, lout ); outseq = AllocateCharMtx( nout, lout+1 ); name = AllocateCharMtx( nout, B ); cutAlignment( infp, regtable, revtable, outtable, name, outseq ); fprintf( stderr, "gappick! nout = %d\n", nout ); commongappick( nout, outseq ); for( i=0; i #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define SHISHAGONYU 0 // for debug // from tbfast static int treein; static int treeout; // from pairlocalalign static int stdout_dist; static void arguments( int argc, char *argv[] ) { int c; nthread = 1; nadd = 0; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; stdout_dist = 0; // dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; penalty_shift_factor = 1000.0; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; specificityconsideration = 0.0; usenaivescoreinsteadofalignmentscore = 0; specifictarget = 0; nwildcard = 0; compacttree = 2; // tsuneni! treein = 0; treeout = 0; fastathreshold = 2.7; constraint = 2; // localhomfile = 0; // tbfast.c no wo tsukaunode comment out // reporterr( "argc=%d\n", argc ); // reporterr( "*argv=%s\n", *argv ); // reporterr( "(*argv)[0]=%c\n", (*argv)[0] ); while( --argc > 0 && (*++argv)[0] == '-' ) { // reporterr( "(*argv)[0] in while loop = %s\n", (*argv) ); while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; // fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; // fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; #if 0 case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #else #endif case 'C': nthread = myatoi( *++argv ); if( nthread == 0 ) nthread = 1; // fprintf( stderr, "nthread = %d\n", nthread ); --argc; #ifndef enablemultithread nthread = 0; #endif goto nextoption; case 'I': nadd = myatoi( *++argv ); // fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'u': specificityconsideration = (double)myatof( *++argv ); // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; case 'c': stdout_dist = 1; break; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; #if 0 case 'r': fmodel = -1; break; #endif case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; #endif #if 0 case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; case 'a': alg = 'a'; break; case 'S': alg = 'S'; break; #endif case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'L': alg = 'L'; break; case 'Z': usenaivescoreinsteadofalignmentscore = 1; break; case 'B': // hitsuyou! memopt -M -B no tame break; #if 0 case 'Y': alg = 'Y'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> L; break; case 's': alg = 's'; break; case 'G': alg = 'G'; break; case 'B': // hitsuyou! memopt -M -B no tame break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'r': alg = 'r'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> R, last break; case 'V': alg = 'V'; break; #endif case 'T': // tbfast.c no noalign ni taiou break; case 'F': use_fft = 1; break; case 'U': treein = 1; break; case 't': treeout = 1; break; case 'y': divpairscore = 1; break; case '=': specifictarget = 1; break; case ':': nwildcard = 1; break; case 'q': lhlimit = myatoi( *++argv ); --argc; goto nextoption; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "pairlocalalign options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static int *nlen = NULL; static int *selfscore = NULL; static char **name = NULL, **seq = NULL; static double *eff = NULL; int i; static int ***topol = NULL; static Treedep *dep = NULL; static double **len = NULL; FILE *infp = NULL; char c; #if 1 // int required = MPI_THREAD_MULTIPLE; int required = MPI_THREAD_FUNNELED; int provided; MPI_Init_thread(&argc, &argv, required, &provided); #else MPI_Init(&argc,&argv); #endif int my_rank; int num_of_processes; MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); MPI_Comm_size(MPI_COMM_WORLD,&num_of_processes); if (provided < required) { #if 0 if (my_rank == 0) { reporterr( "MPI_THREAD_MULTIPLE (mpi thread support level) is required\n"); reporterr( "required level is %d and provided level is %d\n", required, provided ); } MPI_Finalize(); exit( 1 ); #else if (my_rank == 0) reporterr( "WARNING: mpi thread support level %d is required, but provided level is %d.\n", required, provided ); #endif } else { if (my_rank == 0) reporterr( "mpi thread support level : required level is %d and provided level is %d\n", required, provided ); } arguments( argc, argv ); if( alg != 'A' && alg != 'L' && alg != 'N' ) { reporterr( "alg %c is not yet supported\n", alg ); exit( 1 ); } if( alg != 'N' && usenaivescoreinsteadofalignmentscore == 1 ) { reporterr( "The combination of usenaivescoreinsteadofalignmentscore and alg %c is not yet supported\n", alg ); exit( 1 ); } if( fastathreshold < 0.0001 ) { constraint = 0; lhlimit = 0; } if(my_rank==0){ if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); } // getnumlen MPI_Bcast(&njob,1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nlenmax,1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&dorp,1, MPI_INT, 0, MPI_COMM_WORLD); fprintf( stderr, "my_rank: %d, njob: %d, nlenmax: %d\n", my_rank, njob, nlenmax); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } #ifndef mingw setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou. #endif seq = AllocateCharMtx( njob, nlenmax+1 ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); selfscore = AllocateIntVec( njob ); topol = AllocateIntCub( njob, 2, 0 ); len = AllocateFloatMtx( njob, 2 ); eff = AllocateDoubleVec( njob ); dep = (Treedep *)calloc( njob, sizeof( Treedep ) ); if(my_rank==0){ #if 0 readData( infp, name, nlen, seq ); #else readData_pointer( infp, name, nlen, seq ); fclose( infp ); #endif } for(i=0;i #ifdef enableatomic #include #endif #include #define SEMAPHORE 1 #define NSTREAM 1 #define DEBUG 0 #define CANONICALTREEFORMAT 1 #define MEMSAVE 1 #define HAT3SORTED 0 #define DISPPAIRID 0 // tbfast ha ugokanakunaru #define LHBLOCKFACTOR 2 #define LHDIVIDE 1.0 #define MINBLOCKLEN2 1000000000 // 100000 pairs * 100 sites * 100 sites #define N0LOOPFIRST 0 #define YOUNGER0TREE 1 // --add ni hitsuyou #define REPORTCOSTS 0 #define EXACTLYSAMEASPAIRLOCALALIGN 0 // test you. itchi saseruniha guide tree mo ataeru #define RECURSIVETOP 1 #define ENABLEMPIDEBUG 0 //the rough expectation number of chunks which will be assigned to each process #define GRANULARITY 10 static char lockfile[1000]; static int lockthisjobonly; #if enableatomic static ATOMICINT *lockaddr; #else #if SEMAPHORE static sem_t *lockaddr; #else static pthread_mutex_t *lockaddr; #endif #endif static int lockfd; int createshm; #if 0 int seqlen( char *seq ) { int val = 0; while( *seq ) if( *seq++ != '-' ) val++; return( val ); } #else int seqlen( char *seq ) { int val = 0; if( *newgapstr == '-' ) { while( *seq ) if( *seq++ != '-' ) val++; } else { while( *seq ) { if( *seq != '-' && *seq != *newgapstr ) val++; seq++; } } return( val ); } #endif int intlen( int *num ) { int value = 0; while( *num++ != -1 ) value++; return( value ); } char seqcheck( char **seq ) { int i, len; char **seqbk = seq; while( *seq ) { len = strlen( *seq ); for( i=0; i output\n" ); reporterr( "=== \n" ); reporterr( "========================================================================= \n" ); reporterr( "========================================================================= \n" ); return( (int)(*seq)[i] ); } } seq++; } return( 0 ); } void intcat( int *s1, int *s2 ) { while( *s1 != -1 ) s1++; while( *s2 != -1 ) { // reporterr( "copying %d\n", *s2 ); *s1++ = *s2++; } *s1 = -1; } void intcpy( int *s1, int *s2 ) { while( *s2 != -1 ) { // reporterr( "copying %d\n", *s2 ); *s1++ = *s2++; } *s1 = -1; } void intncpy( int *s1, int *s2, int n ) { while( n-- ) *s1++ = *s2++; } void fltncpy( double *s1, double *s2, int n ) { while( n-- ) *s1++ = *s2++; } static int countmem( int *s ) { int v = 0; while( *s++ != -1 ) v++; return( v ); } static int lastmem( int *s ) { while( *s++ != -1 ) ; return( *(s-2) ); } void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx ) { int i, j, lgth; lgth = strlen( aseq[0] ); for( j=0; j DISPSEQF ) imax = DISPSEQF; else imax = nseq; reporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); for( i=0; i<+imax; i++ ) { strncpy( b, seq[i]+DISPSITEI, 120 ); b[120] = 0; reporterr( "%3d %s\n", i+1, b ); } } void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; unsigned char ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == '-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(unsigned char)mseq2[++k]) == '-' ) ; // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // reporterr( "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) ; // tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // reporterr( "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_multimtx( int **whichmtx, double ***scoringmatrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k, c; int len2 = len - 2; int mn1, mn2; double tmpscore; char *mseq1, *mseq2; double efficient; int gapnum = amino_n['-']; double gaptmpscore; double gapscore = 0.0; // reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( mn2 == gapnum ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; tmpscore += (double)scoringmatrices[c][mn1][mn2]; // tmpscore += (double)scoringmtx[mn1][mn2]; while( (mn2=amino_n[(unsigned char)mseq2[++k]]) == gapnum ) tmpscore += (double)scoringmatrices[c][mn1][mn2]; // tmpscore += (double)scoringmtx[mn1][mn2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } // reporterr( "done." ); #if 0 reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_dynmtx( double **offsetmtx, int scoringmtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; double gaptmpscore; double gapscore = 0.0; // reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600; // tmpscore += (double)scoringmtx[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600; // tmpscore += (double)scoringmtx[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } reporterr( "done." ); #if 0 reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; unsigned char ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; double gaptmpscore; double gapscore = 0.0; // reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == '-' ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(unsigned char)mseq2[++k]) == '-' ) // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } #if 0 reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ { int i, j, k; double c; int len = strlen( seq[0] ); double score; double tmpscore; char *mseq1, *mseq2; double efficient; #if DEBUG FILE *fp; #endif score = 0.0; c = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; /* fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient ); */ } /* fprintf( stdout, "total score = %f\n", score ); */ for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; } } /* reporterr( "score in score_calc5 = %f\n", score ); */ return( (double)score ); /* fprintf( trap_g, "score by fast = %f\n", (double)score ); tmpscore = score = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[24][0]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ); */ score += (double)tmpscore * efficient; } } score /= c; return( (double)score ); } void upg2( int nseq, double **eff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; static char **pair = NULL; if( !pair ) { pair = AllocateCharMtx( njob, njob ); } for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } #define BLOCKSIZE 100 #define LARGEBLOCKSIZE 100 typedef struct _generaltdistarrthread_arg { int para; int njob; // int thread_no; int m; int *nlen; char **seq; int **skiptable; int **pointt; int *ttable; int *tselfscore; int *posshared; int *joblist; double *result; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } generaldistarrthread_arg_t; static void *generalkmerdistarrthread( void *arg ) // enablemultithread == 0 demo tsukau { generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg; int njob = targ->njob; int para = targ->para; int m = targ->m; int *nlen = targ->nlen; int **pointt = targ->pointt; int *ttable = targ->ttable; int *tselfscore = targ->tselfscore; int *joblist = targ->joblist; int *posshared = targ->posshared; double *result = targ->result; // double **partmtx = targ->partmtx; int i, posinjoblist, n; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += LARGEBLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int m = targ->m; int *tselfscore = targ->tselfscore; char **seq = targ->seq; int **skiptable = targ->skiptable; int *joblist = targ->joblist; int *posshared = targ->posshared; double *result = targ->result; // double **partmtx = targ->partmtx; int i, posinjoblist, n; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif return( NULL ); } posinjoblist = *posshared; *posshared += LARGEBLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; n", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; // result = calloc( nseq, sizeof( double ) ); // joblist = calloc( nseq, sizeof( int ) ); for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } if( j ) { // reporterr( "resetting in parallel!! j=%d\n", j ); // exit( 1 ); int posshared; generaldistarrthread_arg_t *targ; #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); posshared = 0; pthread_mutex_init( &mutex, NULL ); for( i=0; inext; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } } *mindisfrompt = mindisfrom; *nearestpt = nearest; // free( joblist ); // free( result ); } #else static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *resultnotused, int *joblistnotused ) { int j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // reporterr( "resetnearest..\r" ); // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; // else if( seq ) // tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); else tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; // else if( seq ) // tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); else tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } // printf( "mindisfrom = %f\n", mindisfrom ); *mindisfrompt = mindisfrom; *nearestpt = nearest; } #endif #if 1 static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *result, int *joblist ) { int i, j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; // double *result; // int *joblist; mindisfrom = 999.9; nearest = -1; // reporterr( "resetnearest..\r" ); // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; // result = calloc( nseq, sizeof( double ) ); // joblist = calloc( nseq, sizeof( int ) ); // for( acptj=acpt,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { i = acptj->pos; // if( i == pos ) continue; if( distfrompt[pos] ) { tmpdouble = result[i] = distfrompt[pos][i]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else if( distfrompt[i] ) { tmpdouble = result[i] = distfrompt[i][pos]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = i; } } else joblist[j++] = i; } if( j ) { // reporterr( "resetting in parallel!! j=%d\r", j ); // exit( 1 ); int posshared; generaldistarrthread_arg_t *targ; posshared = 0; #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; inext; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; tmpdouble = result[j]; if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } } // printf( "mindisfrom = %f\n", mindisfrom ); *mindisfrompt = mindisfrom; *nearestpt = nearest; // free( joblist ); // free( result ); } #else static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *resultnotused, int *joblistnotused ) { int j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // reporterr( "resetnearest..\r" ); // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); // mindisfrom = 999.9; // nearest = -1; for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; else tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); // else // tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru { j = acptj->pos; if( distfrompt[pos] ) tmpdouble=distfrompt[pos][j]; else if( distfrompt[j] ) tmpdouble=distfrompt[j][pos]; else tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); // else // tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); if( tmpdouble < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } // printf( "mindisfrom = %f\n", mindisfrom ); *mindisfrompt = mindisfrom; *nearestpt = nearest; } #endif static int getdensest( int *m, double *d ) { int i; double dmax = -100.0; int pmax = -1; for( i=0; m[i]>-1; i++ ) { if( d[m[i]] > dmax ) { dmax = d[m[i]]; pmax = m[i]; } } return( pmax ); } static void setdensity( int nseq, Bchain *acpt, double **eff, double *density, int pos ) { int j; double tmpdouble; // double **effptpt; Bchain *acptj; // printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt ); // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; tmpdouble = 0.0; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( eff[pos][j-pos] < 1.0 ) tmpdouble += (2.0-eff[pos][j-pos]); } // effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; if( eff[j][pos-j] < 1.0 ) tmpdouble += (2.0-eff[j][pos-j]); } *density = tmpdouble; // printf( "p=%d, d=%f \n", pos, *density ); } static void setnearest( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { int j; double tmpdouble; double mindisfrom; int nearest; // double **effptpt; Bchain *acptj; mindisfrom = 999.9; nearest = -1; // printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt ); // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=*effpt++) < *mindisfrompt ) if( (tmpdouble=eff[pos][j-pos]) < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } // effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpdouble=eff[j][pos-j]) < mindisfrom ) { mindisfrom = tmpdouble; nearest = j; } } *mindisfrompt = mindisfrom; *nearestpt = nearest; // printf( "%f, %d \n", pos, *mindisfrompt, *nearestpt ); } static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { int j; double tmpdouble; double **effptpt; Bchain *acptj; *mindisfrompt = 999.9; *nearestpt = -1; // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=*effpt++) < *mindisfrompt ) if( (tmpdouble=eff[pos][j]) < *mindisfrompt ) { *mindisfrompt = tmpdouble; *nearestpt = j; } } effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpdouble=eff[j][pos]) < *mindisfrompt ) { *mindisfrompt = tmpdouble; *nearestpt = j; } } } static void loadtreeoneline( int *ar, double *len, FILE *fp ) { static char gett[1000]; int res; char *p; p = fgets( gett, 999, fp ); if( p == NULL ) { reporterr( "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } res = sscanf( gett, "%d %d %lf %lf", ar, ar+1, len, len+1 ); if( res != 4 ) { reporterr( "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } ar[0]--; ar[1]--; if( ar[0] >= ar[1] ) { reporterr( "\n\nIncorrect guide tree\n" ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } // reporterr( "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); // reporterr( "len[0] = %f, len[1] = %f\n", len[0], len[1] ); } void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep *dep ) { int i, j, k, minijm, maxijm; int *intpt, *intpt2; int *hist = NULL; Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; int *pt1, *pt2, *pt11, *pt22; int *nmemar; int nmemim, nmemjm; char **tree; char *treetmp; char *nametmp, *nameptr, *tmpptr; char namec; FILE *fp; int node[2]; double *height; double clusterdist; int mpair, mi, mj; fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( nseq ); ac = (Bchain *)malloc( nseq * sizeof( Bchain ) ); nmemar = AllocateIntVec( nseq ); // treetmp = AllocateCharVec( nseq*50 ); treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( nseq, nseq*50 ); tree = AllocateCharMtx( nseq, 0 ); height = AllocateFloatVec( nseq ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } #else len[k][0] = len[k][1] = -1.0; loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { reporterr( "\n\nCheck the guide tree.\n" ); reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } #endif prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } nmemjm = nmemar[jm]; prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } // len[k][0] = ( minscore - tmptmplen[im] ); // len[k][1] = ( minscore - tmptmplen[jm] ); // len[k][0] = -1; // len[k][1] = -1; hist[im] = k; nmemar[im] = nmemim + nmemjm; if( len[k][0] == -1 || len[k][1] == -1 ) { reporterr( "Re-computing the length of branch %d..\n", k ); clusterdist = 0.0; mpair = 0; for( i=0; (mi=topol[k][0][i])>-1; i++ ) for( j=0; (mj=topol[k][1][j])>-1; j++ ) { minijm = MIN(mi,mj); maxijm = MAX(mi,mj); clusterdist += mtx[minijm][maxijm-minijm]; mpair += 1; } clusterdist /= (double)mpair; reporterr( "clusterdist = %f\n", clusterdist ); if( len[k][0] == -1 ) len[k][0] = clusterdist/2.0 - height[im]; if( len[k][1] == -1 ) len[k][1] = clusterdist/2.0 - height[im]; fprintf( stderr, "len0 = %f\n", len[k][0] ); fprintf( stderr, "len1 = %f\n\n", len[k][1] ); } #if 0 fprintf( stderr, "vSTEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); fprintf( stderr, "\n" ); #endif height[im] += len[k][0]; // for ig tree, 2015/Dec/25 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25 // reporterr( "##### dep[%d].distfromtip = %f\n", k, height[im] ); treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // reporterr( "calling setnearest\n" ); // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fprintf( fp, "#by loadtop\n" ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); free( hist ); free( (char *)ac ); free( (void *)nmemar ); free( height ); } static void shufflelennum( Lennum *ary, int size ) { int i; for(i=0;ilen - ((Lennum *)p)->len ); } #if 0 static int compfuncpair( const void *p, const void *q ) { return( ((Pairnum *)q)->npairs - ((Pairnum *)p)->npairs ); } #endif static int compfuncpairpt( const void *p, const void *q ) { return( (*(Pairnum **)q)->npairs - (*(Pairnum **)p)->npairs ); } void limitlh( int *uselh, Lennum *in, int size, int limit ) { int i; // for(i=0;i size ) limit = size; // reporterr( "numpairs=%llu, ULLONG_MAX=%llu, nn=%lld, INT_MAX=%d, n=%d\n", numpairs, ULLONG_MAX, nn, INT_MAX, n ); for(i=0;i INT_MAX ) nn = INT_MAX; n = (int)nn; if( n > size ) n = size; // reporterr( "numpairs=%llu, ULLONG_MAX=%llu, nn=%lld, INT_MAX=%d, n=%d\n", numpairs, ULLONG_MAX, nn, INT_MAX, n ); for(i=0;idep)[pos].child0) == -1 ) { *order++ = (tdp->topol)[pos][0][0]; *order = -1; } else { order = topolorder_lessargs( order, tdp, child ); } if( (child=(tdp->dep)[pos].child1) == -1 ) { *order++ = (tdp->topol)[pos][1][0]; *order = -1; } else { order = topolorder_lessargs( order, tdp, child ); } return( order ); } int *topolorderz( int *order, int ***topol, Treedep *dep, int pos, int nchild ) { TopDep td; td.topol = topol; td.dep = dep; int child; if( nchild == 0 || nchild == 2 ) { if( (child=(td.dep)[pos].child0) == -1 ) { *order++ = (td.topol)[pos][0][0]; *order = -1; } else { order = topolorder_lessargs( order, &td, child ); } } if( nchild == 1 || nchild == 2 ) { if( (child=(td.dep)[pos].child1) == -1 ) { *order++ = (td.topol)[pos][1][0]; *order = -1; } else { order = topolorder_lessargs( order, &td, child ); } } return ( order ); } #if RECURSIVETOP #else static void topolorder_mudaari( int nseq, int *n1, int *n2, int *order1, int *order2, int ***topol, Treedep *dep, int pos ) // memhist[][] wo free sezu, recalcpairs4thread() ni wataseba, kono kansuu ha iranai. -> V7.383 // memhist[][] no memory shiyou ryou ha, saiaku no baai O(N^2) { int **memhist, **localmem; int i, s1, s2, c1, c2; memhist = AllocateIntMtx( pos, 0 ); localmem = AllocateIntMtx( 2, 0 ); for( i=0; i<=pos; i++ ) memhist[i] = NULL; for( i=0; i<=pos; i++ ) { c1 = dep[i].child0; c2 = dep[i].child1; if( c1 == -1 ) { localmem[0] = calloc( sizeof( int ), 2 ); localmem[0][0] = topol[i][0][0]; localmem[0][1] = -1; s1 = 1; } else { localmem[0] = memhist[c1]; s1 = intlen( localmem[0] ); } if( c2 == -1 ) { localmem[1] = calloc( sizeof( int ), 2 ); localmem[1][0] = topol[i][1][0]; localmem[1][1] = -1; s2 = 1; } else { localmem[1] = memhist[c2]; s2 = intlen( localmem[1] ); } if( i == pos ) { intcpy( order1, localmem[0] ); intcpy( order2, localmem[1] ); *n1 = s1; *n2 = s2; } else { memhist[i] = calloc( sizeof( int ), s1+s2+1 ); intcpy( memhist[i], localmem[0] ); intcpy( memhist[i]+s1, localmem[1] ); memhist[i][s1+s2] = -1; } free( localmem[0] ); free( localmem[1] ); if( c1 != -1 ) memhist[c1] = NULL; if( c2 != -1 ) memhist[c2] = NULL; // reporterr( "freeing memhist[%d]\n", dep[i].child0 ); // reporterr( "freeing memhist[%d]\n", dep[i].child1 ); } for( i=0; i<=pos; i++ ) { if( memhist[i] ) free( memhist[i] ); memhist[i] = NULL; } free( memhist ); free( localmem ); } #endif #if CANONICALTREEFORMAT void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed ) { FILE *fp; int i, j; double l, ll; int treelen; char **tree; char *instanttree; int posinit; // char *treetmp, *tt; char *nametmp, *nameptr, *tmpptr; char namec; int *order; int im, jm, mm; if( treeout ) { // treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi tree = AllocateCharMtx( nseq, 0 ); treelen = nseq; for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); treelen += strlen( tree[i] ) + 20; } instanttree = calloc( treelen, sizeof( char ) ); posinit = 0; for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); treelen += strlen( tree[i] ) + 20; } instanttree = calloc( treelen, sizeof( char ) ); posinit = 0; for( i=0; i k ) { fprintf( fp, "%d %d %f %f\n", k+1, jm+1, len[i][0], len[i][1] ); } else { fprintf( fp, "%d %d %f %f\n", jm+1, k+1, len[i][1], len[i][0] ); k = jm; } } #endif fclose( fp ); free( order ); } #endif void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int *hist = NULL; Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; int nmemim, nmemjm; char **tree; char *treetmp; char *nametmp, *nameptr, *tmpptr; char namec; FILE *fp; int node[2]; double *height; fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } reporterr( "Loading a tree\n" ); if( !hist ) { hist = AllocateIntVec( nseq ); ac = (Bchain *)malloc( nseq * sizeof( Bchain ) ); nmemar = AllocateIntVec( nseq ); // treetmp = AllocateCharVec( nseq*50 ); if( dep ) height = AllocateFloatVec( nseq ); } if( treeout ) { treetmp = NULL; nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( nseq, nseq*50 ); tree = AllocateCharMtx( nseq, 0 ); for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } #else len[k][0] = len[k][1] = -1.0; loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; // if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) if( im > nseq-1 || jm > nseq-1 ) { reporterr( "\n\nCheck the guide tree.\n" ); reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } if( len[k][0] == -1.0 || len[k][1] == -1.0 ) { reporterr( "\n\nERROR: Branch length is not given.\n" ); exit( 1 ); } if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; #endif prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } nmemjm = nmemar[jm]; prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } // len[k][0] = ( minscore - tmptmplen[im] ); // len[k][1] = ( minscore - tmptmplen[jm] ); // len[k][0] = -1; // len[k][1] = -1; hist[im] = k; nmemar[im] = nmemim + nmemjm; // mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } } } if( treeout ) { treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); } // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // reporterr( "calling setnearest\n" ); // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stderr, "vSTEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); fprintf( stderr, "\n" ); #endif if( dep ) { height[im] += len[k][0]; // for ig tree, 2015/Dec/25 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25 // reporterr( "##### dep[%d].distfromtip = %f\n\n", k, height[im] ); } // reporterr( "dep[%d].child0 = %d\n", k, dep[k].child0 ); // reporterr( "dep[%d].child1 = %d\n", k, dep[k].child1 ); // reporterr( "dep[%d].distfromtip = %f\n", k, dep[k].distfromtip ); } fclose( fp ); if( treeout ) { fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fprintf( fp, "#by loadtree\n" ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); } free( hist ); free( (char *)ac ); free( (void *)nmemar ); if( dep ) free( height ); } int check_guidetreefile( int *seed, int *npick, double *limitram ) { char string[100]; char *sizestring; FILE *fp; double tanni; double tmpd; *seed = 0; *npick = 200; *limitram = 10.0 * 1000 * 1000 * 1000; // 10GB fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } fgets( string, 999, fp ); fclose( fp ); if( !strncmp( string, "shuffle", 7 ) ) { sscanf( string+7, "%d", seed ); reporterr( "shuffle, seed=%d\n", *seed ); return( 's' ); } else if( !strncmp( string, "pileup", 6 ) ) { reporterr( "pileup.\n" ); return( 'p' ); } else if( !strncmp( string, "auto", 4 ) ) { sscanf( string+4, "%d %d", seed, npick ); reporterr( "auto, seed=%d, npick=%d\n", *seed, *npick ); if( *npick < 2 ) { reporterr( "Check npick\n" ); exit( 1 ); } return( 'a' ); } else if( !strncmp( string, "test", 4 ) ) { sscanf( string+4, "%d %d", seed, npick ); reporterr( "calc, seed=%d, npick=%d\n", *seed, *npick ); if( *npick < 2 ) { reporterr( "Check npick\n" ); exit( 1 ); } return( 't' ); } else if( !strncmp( string, "compact", 7 ) ) { sizestring = string + 7; reporterr( "sizestring = %s\n", sizestring ); if( strchr( sizestring, 'k' ) || strchr( sizestring, 'k' ) ) tanni = 1.0 * 1000; // kB else if( strchr( sizestring, 'M' ) || strchr( sizestring, 'm' ) ) tanni = 1.0 * 1000 * 1000; // GB else if( strchr( sizestring, 'G' ) || strchr( sizestring, 'g' ) ) tanni = 1.0 * 1000 * 1000 * 1000; // GB else if( strchr( sizestring, 'T' ) || strchr( sizestring, 't' ) ) tanni = 1.0 * 1000 * 1000 * 1000 * 1000; // TB else { reporterr( "\nSpecify initial ram usage by '--initialramusage xGB'\n\n\n" ); exit( 1 ); } sscanf( sizestring, "%lf", &tmpd ); *limitram = tmpd * tanni; reporterr( "Initial RAM usage = %10.3fGB\n", *limitram/1000/1000/1000 ); return( 'c' ); } else if( !strncmp( string, "very compact", 12 ) ) { reporterr( "very compact.\n" ); return( 'C' ); } else if( !strncmp( string, "nodepair", 8 ) ) { reporterr( "Use nodepair.\n" ); return( 'n' ); } else { reporterr( "loadtree.\n" ); return( 'l' ); } } static double sueff1, sueff05; //static double sueff1_double, sueff05_double; static double cluster_mix_double( double d1, double d2 ) { return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); } static double cluster_average_double( double d1, double d2 ) { return( ( d1 + d2 ) * 0.5 ); } static double cluster_minimum_double( double d1, double d2 ) { return( MIN( d1, d2 ) ); } #if 0 static double cluster_mix_double( double d1, double d2 ) { return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); } static double cluster_average_double( double d1, double d2 ) { return( ( d1 + d2 ) * 0.5 ); } static double cluster_minimum_double( double d1, double d2 ) { return( MIN( d1, d2 ) ); } #endif static void increaseintergroupdistanceshalfmtx( double **eff, int ngroup, int **groups, int nseq ) { int nwarned = 0; int i, k, m, s1, s2, sl, ss; int *others, *tft; double maxdist, *dptr, dtmp; tft = calloc( nseq, sizeof( int * ) ); others = calloc( nseq, sizeof( int * ) ); // for( m=0; m-1; m++ ) tft[s1] = 1; for( m=0,k=0; m-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dtmp = eff[ss][sl-ss]; if( dtmp > maxdist ) maxdist = dtmp; } // reporterr( "maxdist = %f\n", maxdist ); for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ ) { if( s2 > s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dptr = eff[ss] + sl-ss; if( *dptr < maxdist ) { if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 ); *dptr = maxdist; } } // for( m=0; m 100 ) reporterr( "# Sequenc.... (more pairs)\n" ); free( tft ); free( others ); } static void increaseintergroupdistancesfullmtx( double **eff, int ngroup, int **groups, int nseq ) { int nwarned = 0; int i, k, m, s1, s2, sl, ss; int *others, *tft; double maxdist, *dptr, dtmp; tft = calloc( nseq, sizeof( int * ) ); others = calloc( nseq, sizeof( int * ) ); reporterr( "\n" ); // Hitsuyou desu. for( i=0; i-1; m++ ) tft[s1] = 1; for( m=0,k=0; m-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dtmp = eff[ss][sl]; if( dtmp > maxdist ) maxdist = dtmp; } // reporterr( "maxdist = %f\n", maxdist ); for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ ) { if( s2 > s1 ) { sl = s2; ss = s1; } else { sl = s1; ss = s2; } dptr = eff[ss] + sl; if( *dptr < maxdist ) { if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 ); *dptr = maxdist; } } } if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" ); // for( m=0; m _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!! ninconsistentpairs = 0; firsttime = 1; while( 1 ) { if( firsttime ) { firsttime = 0; minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } } else { minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[i][j] && (tmpdouble=eff[i][j-i]) < minscore ) { minscore = tmpdouble; im = i; jm = j; } } for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[j][i] && (tmpdouble=eff[j][i-j]) < minscore ) { minscore = tmpdouble; im = j; jm = i; } } } } allinconsistent = 1; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { if( inconsistent[acpti->pos][acptj->pos] == 0 ) { allinconsistent = 0; goto exitloop_f; } } } exitloop_f: if( allinconsistent ) { reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); exit( 1 ); } #if 1 intpt = testtopol; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } *intpt = -1; // reporterr( "testtopol = \n" ); // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 ); // reporterr( "\n" ); #endif for( i=0; i-1; j++ ) reporterr( " %03d", groups[i][j]+1 ); // reporterr( "\n" ); if( overlapmember( groups[i], testtopol ) ) { if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) { if( !warned[i] ) { warned[i] = 1; reporterr( "\n###################################################################\n" ); reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); reporterr( "###################################################################\n" ); } inconsistent[im][jm] = 1; inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) ); inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 ); reporterr( "reallocating inconsistentpairlist, size=%d\n", ninconsistentpairs+1 ); inconsistentpairlist[ninconsistentpairs][0] = im; inconsistentpairlist[ninconsistentpairs][1] = jm; ninconsistentpairs++; break; } } } if( i == ngroup ) { // reporterr( "OK\n" ); break; } } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; eff[im][jm-im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif #if 1 if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } #endif } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 reporterr( "\noSTEP-%03d:\n", k+1 ); reporterr( "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 ); reporterr( "\n" ); reporterr( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 ); reporterr( "\n\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); free( testtopol ); FreeIntMtx( inconsistent ); FreeIntMtx( inconsistentpairlist ); free( warned ); } void makecompositiontable_global( int *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } typedef struct _resetnearestthread_arg { int para; // int thread_no; int im; int nseq; double **partmtx; double *mindist; int *nearest; char **seq; int **skiptable; int *tselfscore; int **pointt; int *nlen; double *result; int *joblist; Bchain **acpt; Bchain *ac; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } resetnearestthread_arg_t; static void *msaresetnearestthread( void *arg ) { resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg; // int thread_no = targ->thread_no; int para = targ->para; int im = targ->im; int nseq = targ->nseq; double **partmtx = targ->partmtx; double *mindist = targ->mindist; int *nearest = targ->nearest; char **seq = targ->seq; int **skiptable = targ->skiptable; int *tselfscore = targ->tselfscore; double *result = targ->result; int *joblist = targ->joblist; Bchain **acpt = targ->acpt; Bchain *ac = targ->ac; Bchain *acptbk; Bchain *acptinit; int i; acptinit = *acpt; while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *acpt == NULL ) { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } acptbk = *acpt; *acpt = (*acpt)->next; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif i = acptbk->pos; if( nearest[i] == im ) { if( partmtx[im][i] > mindist[i] ) { msaresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, result, joblist ); } } } } static void *kmerresetnearestthread( void *arg ) { resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg; // int thread_no = targ->thread_no; int para = targ->para; int im = targ->im; int nseq = targ->nseq; double **partmtx = targ->partmtx; double *mindist = targ->mindist; int *nearest = targ->nearest; int *tselfscore = targ->tselfscore; int **pointt = targ->pointt; int *nlen = targ->nlen; double *result = targ->result; int *joblist = targ->joblist; Bchain **acpt = targ->acpt; Bchain *ac = targ->ac; int *singlettable1; Bchain *acptbk; Bchain *acptinit; int i; acptinit = *acpt; while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *acpt == NULL ) { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } acptbk = *acpt; *acpt = (*acpt)->next; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif i = acptbk->pos; if( nearest[i] == im ) { if( partmtx[im][i] > mindist[i] ) { if( pointt ) // kmer { singlettable1 = (int *)calloc( tsize, sizeof( int ) ); makecompositiontable_global( singlettable1, pointt[i] ); } kmerresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, tselfscore, pointt, nlen, singlettable1, result, joblist ); if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer if( pointt ) commonsextet_p( NULL, NULL ); } } } } typedef struct _compactdistarrthread_arg { int para; int njob; // int thread_no; int im; int jm; int *nlen; char **seq; int **skiptable; int **pointt; int *table1; int *table2; int *tselfscore; Bchain **acpt; int *posshared; double *mindist; double *newarr; double **partmtx; int *nearest; int *joblist; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } compactdistarrthread_arg_t; static void *verycompactkmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau { compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg; int njob = targ->njob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *nlen = targ->nlen; int **pointt = targ->pointt; int *table1 = targ->table1; int *table2 = targ->table2; int *tselfscore = targ->tselfscore; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; // double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *nlen = targ->nlen; int **pointt = targ->pointt; int *table1 = targ->table1; int *table2 = targ->table2; int *tselfscore = targ->tselfscore; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *tselfscore = targ->tselfscore; char **seq = targ->seq; int **skiptable = targ->skiptable; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; // double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nnjob; int para = targ->para; int im = targ->im; int jm = targ->jm; // int thread_no = targ->thread_no; int *tselfscore = targ->tselfscore; char **seq = targ->seq; int **skiptable = targ->skiptable; int *joblist = targ->joblist; int *posshared = targ->posshared; double *mindist = targ->mindist; int *nearest = targ->nearest; double **partmtx = targ->partmtx; double *newarr = targ->newarr; int i, posinjoblist, n; double tmpdist1; double tmpdist2; double tmpdouble; // for( acpti=ac; acpti!=NULL; acpti=acpti->next ) while( 1 ) { #ifdef enablemultithread if( para ) pthread_mutex_lock( targ->mutex ); #endif if( *posshared >= njob ) // block no toki >= { #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif commonsextet_p( NULL, NULL ); return( NULL ); } posinjoblist = *posshared; *posshared += BLOCKSIZE; #ifdef enablemultithread if( para ) pthread_mutex_unlock( targ->mutex ); #endif for( n=0; nrep1 == -1 ) return; if( pt->child0 ) reformat_younger0_rec( ori, pt->child0, n, lastappear, topol, len, dep, pos ); if( pt->child1 ) reformat_younger0_rec( ori, pt->child1, n, lastappear, topol, len, dep, pos ); topol[*pos][0] = (int *)realloc( topol[*pos][0], ( 2 ) * sizeof( int ) ); topol[*pos][1] = (int *)realloc( topol[*pos][1], ( 2 ) * sizeof( int ) ); topol[*pos][0][1] = -1; topol[*pos][1][1] = -1; if( pt->rep0 < pt->rep1 ) { topol[*pos][0][0] = pt->rep0; topol[*pos][1][0] = pt->rep1; len[*pos][0] = pt->len0; len[*pos][1] = pt->len1; dep[*pos].child0 = lastappear[pt->rep0]; dep[*pos].child1 = lastappear[pt->rep1]; } else { topol[*pos][1][0] = pt->rep0; topol[*pos][0][0] = pt->rep1; len[*pos][1] = pt->len0; len[*pos][0] = pt->len1; dep[*pos].child1 = lastappear[pt->rep0]; dep[*pos].child0 = lastappear[pt->rep1]; } lastappear[pt->rep0] = *pos; lastappear[pt->rep1] = *pos; dep[*pos].distfromtip = pt->height; // reporterr( "STEP %d\n", *pos ); // reporterr( "%d %f\n", topol[*pos][0][0], len[*pos][0] ); // reporterr( "%d %f\n", topol[*pos][1][0], len[*pos][1] ); (*pos)++; } #if 0 static void reformat_rec( Treept *ori, Treept *pt, int n, int *lastappear, int ***topol, double **len, Treedep *dep, int *pos ) { if( pt->rep1 == -1 ) return; if( pt->child0 ) reformat_rec( ori, pt->child0, n, lastappear, topol, len, dep, pos ); if( pt->child1 ) reformat_rec( ori, pt->child1, n, lastappear, topol, len, dep, pos ); topol[*pos][0] = (int *)realloc( topol[*pos][0], ( 2 ) * sizeof( int ) ); topol[*pos][1] = (int *)realloc( topol[*pos][1], ( 2 ) * sizeof( int ) ); topol[*pos][0][0] = pt->rep0; topol[*pos][0][1] = -1; topol[*pos][1][0] = pt->rep1; topol[*pos][1][1] = -1; len[*pos][0] = pt->len0; len[*pos][1] = pt->len1; dep[*pos].child0 = lastappear[pt->rep0]; dep[*pos].child1 = lastappear[pt->rep1]; lastappear[pt->rep0] = *pos; lastappear[pt->rep1] = *pos; dep[*pos].distfromtip = pt->height; // reporterr( "STEP %d\n", *pos ); // reporterr( "%d %f\n", topol[*pos][0][0], len[*pos][0] ); // reporterr( "%d %f\n", topol[*pos][1][0], len[*pos][1] ); (*pos)++; } #endif static char *reformat_rec_newick( char **subtree, Treept *pt ) { char *newick, *newick0, *newick1; if( pt->rep1 == -1 ) return( subtree[pt->rep0] ); newick0 = reformat_rec_newick( subtree, pt->child0 ); newick1 = reformat_rec_newick( subtree, pt->child1 ); newick = calloc( strlen( newick0 ) + strlen( newick1 ) + 100, sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( pt->rep0 < pt->rep1 ) sprintf( newick, "(%s:%7.5f,%s:%7.5f)", newick0, pt->len0, newick1, pt->len1 ); else sprintf( newick, "(%s:%7.5f,%s:%7.5f)", newick1, pt->len1, newick0, pt->len0 ); free( newick0 ); free( newick1 ); return( newick ); } static void reformattree( Treept *root, Treept *ori, int n, int ***topol, double **len, Treedep *dep, char **name, int treeout, int my_rank ) // my_rank tsuika, 2017/Mar/6 { int i, pos; char *newick; int *lastappear; // int rootpos; // for( rootpos=n*2-2; rootpos>n; rootpos-- ) if( ori[rootpos].parent == NULL ) break; // reporterr( "Reformat, i=%d\n", i ); // reporterr( "njob=%d, treept[%d].parent,child0,child1 - self = %p,%p,%p - %p\n", n, i, treept[i].parent,treept[i].child0,treept[i].child1,treept+i ); if( treeout && my_rank == 0 ) // my_rank tsuika, 2017/Mar/6 // if( treeout ) { // reporterr( "treeout!\n" ); FILE *fp; int j; char namec, *nametmp, *tmpptr, **tree, *nameptr; nametmp = AllocateCharVec( 1000 ); // nagasugi tree = AllocateCharMtx( njob, 0 ); for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } free( nametmp ); newick = reformat_rec_newick( tree, root ); // tree[] ha free sareru fp = fopen( "infile.tree", "w" ); // my_rank==0 nomi fprintf( fp, "%s;\n", newick ); fclose( fp ); free( tree ); // free[] ha free sareteiru free( newick ); // FreeCharMtx( tree ); } lastappear = (int *)malloc( sizeof( int ) * n ); if( lastappear == NULL ) { reporterr( "Cannot allocate lastappear\n" ); exit( 1 ); } for( i=0; i&2" ); // system( "df 1>&2" ); // system( "lfs df 1>&2" ); // system( "cat /proc/fs/lustre/lov/work2-clilov-*/target_obd 1>&2" ); reporterr( "error in fwrite(), retrying, lim=%d\n", lim ); sleep( 1 ); } reporterr( "error in myfwrite()" ); exit( 1 ); } #endif static int writehat3node_noaddress( int n, int i, int j, int ii, int jj, FILE **fpp, int tmpint, LocalHom *lh, int *uselh ) { // reporterr( "node=%d (*fpp=%p), i=%d, j=%d\n", n, *fpp, i+1, j+1 ); { LocalHom *tmpptr; double opt; int len; int size; char c; // if( i < j ) // iranai { // if( lh->opt == -1.0 || uselh[i] == 0 || uselh[j] == 0 ) if( uselh[i] == 0 && uselh[j] == 0 ) { return( 0 ); } #if DISPPAIRID fprintf( *fpp, "node %d: %d (%d) - %d (%d) (%c)\n", n, i+1, ii, j+1, jj, tmpint ); #endif opt = lh->opt; size = 0; for( tmpptr=lh; tmpptr; tmpptr=tmpptr->next ) size++; #if HAT3SORTED if( fwrite( &size, sizeof( int ), 1, *fpp ) != 1 || fwrite( &opt, sizeof( double ), 1, *fpp ) != 1 ) #else if( fwrite( &ii, sizeof( int ), 1, *fpp ) != 1 || fwrite( &jj, sizeof( int ), 1, *fpp ) != 1 || fwrite( &size, sizeof( int ), 1, *fpp ) != 1 || fwrite( &opt, sizeof( double ), 1, *fpp ) != 1 ) #endif { reporterr( "write error, n=%d\n", n ); return( -1 ); } for( tmpptr=lh; tmpptr; tmpptr=tmpptr->next ) { len = tmpptr->end1-tmpptr->start1; if( fwrite( &(tmpptr->start1), sizeof( int ), 1, *fpp ) != 1 || fwrite( &(tmpptr->start2), sizeof( int ), 1, *fpp ) != 1 || fwrite( &len, sizeof( int ), 1, *fpp ) != 1 ) { reporterr( "write error, n=%d\n", n ); return( -1 ); } // reporterr( "reg1:%d-%d, reg2:%d-%d, len=%d, score=%f\n", tmpptr->start1, tmpptr->start1+len, tmpptr->start2, tmpptr->start2+len, len, opt ); } c = '\n'; if( fwrite( &c, sizeof( char ), 1, *fpp ) != 1 ) { reporterr( "write error, n=%d\n", n ); return( -1 ); } } #if HAT3SORTED return( size * 3 * sizeof( int ) + sizeof( char ) + sizeof( int ) + sizeof( double ) ); #else return( size * 3 * sizeof( int ) + sizeof( char ) + 3 * sizeof( int ) + sizeof( double ) ); #endif } } #if 0 typedef struct _calcnearestthread_arg { char **bseq; int thread_no; int *posshared; int alloclen; int nlim; double *selfscore; double *mindists; int *neighbors; pthread_mutex_t *mutex; } calcnearestthread_arg_t; static void *calcnearestthread( void *arg ) { calcnearestthread_arg_t *targ = (calcnearestthread_arg_t *)arg; char **bseq = targ->bseq; int thread_no = targ->thread_no; int *posshared = targ->posshared; int alloclen = targ->alloclen; int nlim = targ->nlim; double *selfscore = targ->selfscore; double *mindists = targ->mindists; int *neighbors = targ->neighbors; pthread_mutex_t *mutex = targ->mutex; int pos; double tmpdist, mindist; int progress; int neighbor, i; double (*distfunc)( char *, char *, double, double, int ); if( alg == 'A' ) distfunc = distdp_noalign; else if( alg == 'L' ) distfunc = distdpL_noalign; else if( alg == 'N' ) distfunc = distdpN_noalign; while( 1 ) { pthread_mutex_lock( mutex ); // if( *posshared > nlim ) if( *posshared < 1 ) { pthread_mutex_unlock( mutex ); // reporterr( "freeing tmpseq1\n" ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11_noalign( NULL, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); return( NULL ); } pos = *posshared; *posshared -= 1; pthread_mutex_unlock( mutex ); if( (nlim-pos) % 100 == 0 ) { //progress = ( (unsigned long long)pos * (unsigned long long)nlim - (unsigned long long)pos*((unsigned long long)pos-1.0)*0.5 ) / ( (unsigned long long)nlim * ((unsigned long long)nlim-1.0) *0.5 ) * 100; // progress = ( (double)pos * nlim - pos*(pos-1.0)*0.5 ) / ( nlim * (nlim-1.0) * 0.5 ) * 100; progress = ( (double)(nlim-pos) * nlim - (nlim-pos)*((nlim-pos)-1.0)*0.5 ) / ( nlim * (nlim-1.0) * 0.5 ) * 100; reporterr( "Step %d (%d%%), thread %d \r", (nlim-pos), progress, thread_no ); } mindist = 999.9; // for( i=pos+1; i-1; i-- ) { #if 0 tmpdist = 0.0; // test! #else tmpdist = distfunc( bseq[pos], bseq[i], selfscore[pos], selfscore[i], alloclen ); #endif if( mindist > tmpdist ) { mindist = tmpdist; neighbor = i; } } mindists[pos] = mindist; neighbors[pos] = neighbor; } } #endif typedef struct _calcnearestthread_arg_mpi { char **bseq; int thread_no; int *posshared; int alloclen; // int nlim; double *selfscore; double *mindists_partial; int *neighbors_partial; int start; int end; pthread_mutex_t *mutex; } calcnearestthread_arg_t_mpi; static void *calcnearestthread_mpi( void *arg ) { calcnearestthread_arg_t_mpi *targ = (calcnearestthread_arg_t_mpi *)arg; char **bseq = targ->bseq; // int thread_no = targ->thread_no; int *posshared = targ->posshared; int alloclen = targ->alloclen; // int nlim = targ->nlim; double *selfscore = targ->selfscore; double *mindists_partial = targ->mindists_partial; int *neighbors_partial = targ->neighbors_partial; int start = targ->start; // int end = targ->end; pthread_mutex_t *mutex = targ->mutex; int pos; double tmpdist, mindist; // int progress; int neighbor, i; double (*distfunc)( char *, char *, double, double, int ); if( alg == 'A' ) distfunc = distdp_noalign; else if( alg == 'L' ) distfunc = distdpL_noalign; else if( alg == 'N' ) distfunc = distdpN_noalign; while( 1 ) { pthread_mutex_lock( mutex ); if( *posshared < start ) { pthread_mutex_unlock( mutex ); // reporterr( "freeing tmpseq1\n" ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11_noalign( NULL, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); return( NULL ); } pos = *posshared; *posshared -= 1; pthread_mutex_unlock( mutex ); // if( (nlim-pos) % 100 == 0 ) // { // //progress = ( (unsigned long long)pos * (unsigned long long)nlim - (unsigned long long)pos*((unsigned long long)pos-1.0)*0.5 ) / ( (unsigned long long)nlim * ((unsigned long long)nlim-1.0) *0.5 ) * 100; // // progress = ( (double)pos * nlim - pos*(pos-1.0)*0.5 ) / ( nlim * (nlim-1.0) * 0.5 ) * 100; // progress = ( (double)(nlim-pos) * nlim - (nlim-pos)*((nlim-pos)-1.0)*0.5 ) / ( nlim * (nlim-1.0) * 0.5 ) * 100; // reporterr( "Step %d (%d%%), thread %d \r", (nlim-pos), progress, thread_no ); // } mindist = 999.9; // for( i=pos+1; i-1; i-- ) { #if 0 tmpdist = 0.0; // test! #else tmpdist = distfunc( bseq[pos], bseq[i], selfscore[pos], selfscore[i], alloclen ); #endif if( mindist > tmpdist ) { mindist = tmpdist; neighbor = i; } } mindists_partial[pos-start] = mindist; neighbors_partial[pos-start] = neighbor; } } typedef struct _jobplan { int node; int start; int end; int subid; int divided; unsigned long long npairs; } Jobplan; static void flushlockinit( int my_rank ) { int namelen; int ntry; if( my_rank == 0 ) { #if 0 lockthisjobonly = 0; struct stat st; system( "lfs getstripe . > _lfscheck 2>/dev/null" ); stat( "_lfscheck", &st ); if( 1 || st.st_size == 0 ) { lockthisjobonly = 1; reporterr( "Use shm lock. This job only.\n" ); sprintf( lockfile, "/mafftflush-%s-%d", getenv( "HOSTNAME" ), getpid() ); // reporterr( "Use filesystem's lock\n" ); // lockfile[0]=0; // namelen = 0; namelen = strlen( lockfile ) + 1; } else { // lockthisjobonly = 1; // reporterr( "Use intra-job lock.\n" ); // sprintf( lockfile, "/tmp/mafftflush-%s-%d", getenv( "HOSTNAME" ), getpid() ); lockthisjobonly = 0; reporterr( "Use shm lock. Lock other jobs, too.\n" ); sprintf( lockfile, "/mafftflush-%s", getenv( "USER" ) ); namelen = strlen( lockfile ) + 1; } reporterr( "rank=%d, lockfile=%s\n", my_rank, lockfile ); #else lockthisjobonly = 1; sprintf( lockfile, "/mafftflush-%s-%d", getenv( "HOSTNAME" ), getpid() ); #ifdef enableatomic reporterr( "Use shm, semaphore, use stdatomic.h, key=%s\n", lockfile ); #else reporterr( "Use shm, pthread's shared mutex or semaphore, key=%s\n", lockfile ); #endif namelen = strlen( lockfile ) + 1; #endif } MPI_Bcast(&namelen, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(lockfile, namelen, MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Bcast(&lockthisjobonly, 1, MPI_INT, 0, MPI_COMM_WORLD); #ifdef enableatomic lockfd = shm_open( lockfile, O_RDWR|O_CREAT|O_EXCL, 0600 ); if( lockfd == -1 ) { lockfd = shm_open( lockfile, O_RDWR, 0600 ); // reporterr( "Trying to open existing shm %s on %s, without re-ftruncate, fixed mkdircommand allocation and handling fflush failure. Fixed a bug in flushlock\n", lockfile, getenv("HOSTNAME") ); if( lockfd == -1 ) { reporterr( "Failed to open %s on %s\n", lockfile, getenv("HOSTNAME") ); exit( 1 ); } } else reporterr( "Trying to create and initialize shm %s on %s\n", lockfile, getenv("HOSTNAME") ); // reporterr( "lockfd=%d, (rank=%d, host=%s)\n", lockfd, my_rank, getenv("HOSTNAME") ); if( ftruncate( lockfd, sizeof(ATOMICINT) ) != 0 ) // naito tomaru.. { perror("ftruncate"); exit( 1 ); } lockaddr = (ATOMICINT *)mmap( NULL, sizeof(ATOMICINT), PROT_READ|PROT_WRITE, MAP_SHARED, lockfd, 0 ); if( lockaddr == MAP_FAILED ) { reporterr( "error in mmap on %s\n", getenv("HOSTNAME") ); exit( 1 ); } atomic_store( lockaddr, 0 ); // nenn no tame #else lockfd = shm_open( lockfile, O_RDWR|O_CREAT|O_EXCL, 0600 ); if( lockfd == -1 ) { // reporterr( "Use existing lockfile in %s. Initialize it in each process\n", getenv("HOSTNAME") ); lockfd = shm_open( lockfile, O_RDWR, 0600 ); if( lockfd == -1 ) { reporterr( "Failed to open %s on %s\n", lockfile, getenv("HOSTNAME") ); exit( 1 ); } createshm = 0; } else { #if SEMAPHORE reporterr( "trying to create shm %s for semaphore, NSTREAM=%d\n", lockfile, NSTREAM ); if( ftruncate(lockfd, sizeof(sem_t)) != 0 ) #else if( ftruncate(lockfd, sizeof(pthread_mutex_t)) != 0 ) #endif { perror( "ftruncate" ); exit( 1 ); } createshm = 1; reporterr( "Created and truncated lockfile exclusively in %s.\n", getenv("HOSTNAME") ); } for( ntry=0; ntry<10; ntry++ ) // mada ftruncate dekite naikamo shirenainode { // reporterr( "trying to mmap in %s\n", getenv("HOSTNAME") ); #if SEMAPHORE reporterr( "trying to mmap in %s for semaphore, NSTREAM=%d\n", getenv("HOSTNAME"), NSTREAM ); lockaddr = (sem_t *)mmap( NULL, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, lockfd, 0 ); #else lockaddr = (pthread_mutex_t *)mmap( NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, MAP_SHARED, lockfd, 0 ); #endif if( lockaddr == MAP_FAILED ) { perror( "mmap" ); sleep( 1 ); continue; } // reporterr( "succeeded to mmap, size= %d\n", sizeof(pthread_mutex_t) ); break; } if( ntry == 10 ) { reporterr( "Cannot mmap %s in %s\n", lockfile, getenv("HOSTNAME") ); exit( 1 ); } if( createshm ) // hokademo shokika sureba kantan ni naru { #if SEMAPHORE if( sem_init( lockaddr, 1, NSTREAM ) ) { perror("sem_init"); // exit( 1 ); // POSIX.1-2001 does not always return zero. } #else pthread_mutexattr_t attr; if(pthread_mutexattr_init(&attr)) { perror("pthread_mutexattr_init"); exit( 1 ); } if(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { perror("pthread_mutexattr_setpshared"); exit( 1 ); } if(pthread_mutex_init(lockaddr, &attr)) { perror("pthread_mutex_init"); // reporterr( "Probably because of duplicate initialization of shared mutex. Can be ignored.\n" ); exit( 1 ); } #endif } #endif MPI_Barrier(MPI_COMM_WORLD); // shokika no mae ni tsukawareruno wo fusegu. // reporterr( "Successfully mapped SHM %s on %s. val=%d\n", lockfile, getenv( "HOSTNAME" ), *lockaddr ); } static void flushlockremove() { if( lockaddr == NULL ) return; // nai #ifdef enableatomic if( munmap( lockaddr, sizeof(ATOMICINT) ) ) { reporterr( "Failed to unmap SHM\n" ); exit( 1 ); } #else #if SEMAPHORE if( createshm == 1 ) { reporterr( "sem_destroy in %s\n", getenv("HOSTNAME") ); if( sem_destroy(lockaddr) ) { perror("sem_destroy"); //exit( 1 ); // tajuu ni destroy. } } if( munmap( lockaddr, sizeof(sem_t) ) ) { reporterr( "Failed to unmap SHM\n" ); exit( 1 ); } #else // if( pthread_mutex_destroy(lockaddr) ) // { // perror("pthread_mutex_destroy"); // exit( 1 ); // } reporterr( "Did not destroy mutex.\n" ); if( munmap( lockaddr, sizeof(pthread_mutex_t) ) ) { reporterr( "Failed to unmap SHM\n" ); exit( 1 ); } #endif #endif if( close( lockfd ) ) { perror( "close" ); exit( 1 ); } if( lockthisjobonly ) { // if( shm_unlink( lockfile ) ) perror( "shm_unlink" ); shm_unlink( lockfile ); // No such file -> no problem reporterr( "Unmapped, closed and unlinked SHM.\n" ); } else reporterr( "Successfully unmapped and closed SHM, but did not unlink.\n" ); } static void flushlock() { if( lockaddr == NULL ) return; #ifdef enableatomic int interval = 10; while( 1 ) { if( atomic_fetch_add( lockaddr, 1 ) < NSTREAM ) // atomic return; atomic_fetch_sub( lockaddr, 1 ); // kouritsu warui? usleep( interval * 1000 ); interval += 10; if( interval > 5000 ) interval = 5000; } #else #if SEMAPHORE if( sem_wait(lockaddr) ) { perror("sem_wait"); exit( 1 ); } #else pthread_mutex_lock(lockaddr); #endif #endif } static void flushunlock() { if( lockaddr == NULL ) return; #ifdef enableatomic int tmpint; // atomic_fetch_sub( lockaddr, 1 ); if( (tmpint=atomic_fetch_sub( lockaddr, 1 )) > NSTREAM ) { reporterr( "Strange. *lockaddr == %d before subtraction (too large), %s\n", tmpint, getenv("HOSTNAME") ); atomic_store( lockaddr, NSTREAM-1 ); } else if( tmpint < 1 ) { reporterr( "Strange. *lockaddr == %d before subtraction (too small), %s\n", tmpint, getenv("HOSTNAME") ); atomic_store( lockaddr, 0 ); } #else #if SEMAPHORE if( sem_post(lockaddr) ) { perror("sem_post"); exit( 1 ); } #else pthread_mutex_unlock(lockaddr); #endif #endif } typedef struct _recalcpairs4thread_arg_mpi { // int thread_no; int nseq; // int numjob; Jobplan *jobplan; char **bseq; #if EXACTLYSAMEASPAIRLOCALALIGN char **dseq; #endif int *joborder; int *posshared; int *uselh; double *selfscore; int alloclen; int ***topol; Treedep *dep; unsigned long long *done; int start; int end; pthread_mutex_t *mutex; int my_rank; } recalcpairs4thread_arg_t_mpi; static void *recalcpairs4thread_mpi( void *arg )// no TLS { recalcpairs4thread_arg_t_mpi *targ = (recalcpairs4thread_arg_t_mpi *)arg; char **bseq = targ->bseq; int nseq=targ->nseq; // int numjob=targ->numjob; int *posshared = targ->posshared; int *joborder = targ->joborder; int *uselh = targ->uselh; // int thread_no = targ->thread_no; int ***topol = targ->topol; Jobplan *jobplan=targ->jobplan; Treedep *dep = targ->dep; int alloclen = targ->alloclen; double *selfscore = targ->selfscore;; unsigned long long *done = targ->done; int my_rank = targ->my_rank; int writetry, writeres; // int start = targ->start; int end = targ->end; pthread_mutex_t *mutex = targ->mutex; int i, j, m0, m1, m00, m11, n, step, istart, iend, n1, n0, subid; int prevn; char *tmpseq1, *tmpseq2; LocalHom *localhomtable; int *mem0, *mem1; FILE *localfp; char *fn; int progress = 0; unsigned long long totalpairs = (unsigned long long)nseq*(nseq-1)/2; int tmpnodepairs; double **dynamicmtx = NULL; double **mtxptr; double (*distfunc)( double **, char *, char *, LocalHom *, double, double, int ); int sizewritten; void *stbuf; if( alg == 'A' ) distfunc = distdp; else if( alg == 'L' ) distfunc = distdpL; else if( alg == 'N' ) distfunc = distdpN; else { reporterr( "alg %c is not yet supported\n", alg ); exit( 1 ); } #if EXACTLYSAMEASPAIRLOCALALIGN double tmpdist; char **dseq = targ->dseq; double (*distfunc_noalign)( char *, char *, double, double, int ); if( alg == 'A' ) distfunc_noalign = distdp_noalign; else if( alg == 'L' ) distfunc_noalign = distdpL_noalign; else if( alg == 'N' ) distfunc_noalign = distdpN_noalign; #endif mem0 = calloc( sizeof( int ), njob ); mem1 = calloc( sizeof( int ), njob ); tmpseq1 = calloc( sizeof( char ), alloclen ); tmpseq2 = calloc( sizeof( char ), alloclen ); localhomtable = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); freelocalhom1( localhomtable ); if( specificityconsideration > 0.0 ) { dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); mtxptr = dynamicmtx; } else mtxptr = n_dis_consweight_multi; prevn = -1; while( 1 ) { pthread_mutex_lock( mutex ); if( *posshared >= end ) // if( *posshared <= -1 ) { pthread_mutex_unlock( mutex ); // reporterr( "freeing tmpseq1\n" ); free( tmpseq1 ); tmpseq1 = NULL; free( tmpseq2 ); tmpseq2 = NULL; free( mem0 ); mem0 = NULL; free( mem1 ); mem1 = NULL; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; G__align11( NULL, NULL, NULL, 0, 0, 0 ); L__align11( NULL, 0.0, NULL, NULL, 0, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); #if EXACTLYSAMEASPAIRLOCALALIGN L__align11_noalign( NULL, NULL, NULL ); G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); #endif free( localhomtable ); if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); return( NULL ); } n = jobplan[step=joborder[*posshared]].node; // *posshared -= 1; *posshared += 1; *done += jobplan[step].npairs; // reporterr( "nodenum=%d, step=%d, npairs=%lld\n", n, step, jobplan[step].npairs ); pthread_mutex_unlock( mutex ); istart = jobplan[step].start; iend = jobplan[step].end; subid = jobplan[step].subid; #if EXACTLYSAMEASPAIRLOCALALIGN #else if( specificityconsideration > 0.0 ) makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[n].distfromtip ); #endif if( step%100==0 ) // if( *done % 100 == 0 ) { progress = (int)( (double)*done / totalpairs * 100 ); // reporterr( "Node %06d-%03d (%d%%) \r", n, subid, progress ); } if( n != prevn ) { prevn = n; #if N0LOOPFIRST #if RECURSIVETOP mem0[0] = -1; n0 = topolorderz( mem0, topol, dep, n, 1 ) - mem0; mem1[0] = -1; n1 = topolorderz( mem1, topol, dep, n, 0 ) - mem1; #else topolorder_mudaari( njob, &n1, &n0, mem1, mem0, topol, dep, n ); #endif #else #if RECURSIVETOP mem0[0] = -1; n0 = topolorderz( mem0, topol, dep, n, 0 ) - mem0; mem1[0] = -1; n1 = topolorderz( mem1, topol, dep, n, 1 ) - mem1; #else topolorder_mudaari( njob, &n0, &n1, mem0, mem1, topol, dep, n ); #endif #endif } stbuf = malloc( MYBUFSIZE ); if( !stbuf ) { reporterr( "Cannot allocate stbuf, size=d\n", MYBUFSIZE ); exit( 1 ); } fn = calloc( 100, sizeof( char ) ); sprintf( fn, "hat3dir/%d-/hat3node-%d-%d", (int)(n/HAT3NODEBLOCK)*HAT3NODEBLOCK, n, subid ); for( writetry=0; writetry<10; writetry++ ) { flushlock(); localfp = fopen( fn, "wb" ); if( !localfp ) { char *mkdircommand; mkdircommand = calloc( sizeof( char ), 1000 ); sprintf( mkdircommand, "mkdir -p hat3dir/%d-/", (int)(n/HAT3NODEBLOCK)*HAT3NODEBLOCK ); system( mkdircommand ); free( mkdircommand ); reporterr( "Failed to open %s but will retry (%d).\n", fn, writetry ); sleep( 1 ); flushunlock(); continue; } if( setvbuf( localfp, stbuf, _IOFBF, MYBUFSIZE ) ) { reporterr( "Cannot change the buffer size to %d\n", MYBUFSIZE ); flushunlock(); exit( 1 ); } flushunlock(); sizewritten = 0; writeres = 0; // nen no tame tmpnodepairs = 0; for( i=istart; i m11 ) { m0 = m11; m1= m00; } else { m0 = m00; m1= m11; } if( nadd ) { if( m1 < njob-nadd ) continue; // if( m0 >= njob-nadd || m1 < njob-nadd ) continue; // oosugi! } tmpnodepairs++; // reporterr( "node%d, %d x %d\n", n, m0+1, m1+1 ); strcpy( tmpseq1, bseq[m0] ); strcpy( tmpseq2, bseq[m1] ); #if EXACTLYSAMEASPAIRLOCALALIGN if( specificityconsideration > 0.0 ) { tmpdist = distfunc_noalign( dseq[m0], dseq[m1], selfscore[m0], selfscore[m1], alloclen ); makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * tmpdist ); // upgma ni awaseru. distfunc( dynamicmtx, tmpseq1, tmpseq2, localhomtable, selfscore[m0], selfscore[m1], alloclen ); } else { distfunc( n_dis_consweight_multi, tmpseq1, tmpseq2, localhomtable, selfscore[m0], selfscore[m1], alloclen ); } #else distfunc( mtxptr, tmpseq1, tmpseq2, localhomtable, selfscore[m0], selfscore[m1], alloclen ); #endif // reporterr( "tmpdist = %f\n", tmpdist ); #if N0LOOPFIRST writeres = writehat3node_noaddress( n, m0, m1, j, i, &localfp, 'n', localhomtable, uselh ); #else writeres = writehat3node_noaddress( n, m0, m1, i, j, &localfp, 'n', localhomtable, uselh ); #endif freelocalhom1( localhomtable ); if( writeres == -1 ) { reporterr( "Failed to write %s (%d). rank=%d\n", fn, writetry, my_rank ); reporterr( "sizewritten=%d, MYBUFSIZE=%d, writeres=%d\n", sizewritten, MYBUFSIZE, writeres ); sleep( 10 ); goto rewrite; } sizewritten += writeres; if( sizewritten > MYBUFSIZE - 4000 ) // avoid automatic flush { flushlock(); // reporterr( "flush at the middle of a file, MYBUFSIZE=%d, sizewritten=%d, writeres=%d, file=%s\n", MYBUFSIZE, sizewritten, writeres, fn ); // reporterr( "sizewritten=%d, flushing %s\n", sizewritten, fn ); sizewritten = 0; if( fflush( localfp ) ) { reporterr( "Failed to fflush in the middle of a file, rank=%d, hostname=%s\n", my_rank, getenv( "HOSTNAME" ) ); system( "date" ); sleep( 10 ); flushunlock(); // fclose no ato no houga anzen. continue; } flushunlock(); } } } flushlock(); if( fflush( localfp ) ) { reporterr( "Failed to fflush %s. rank=%d, hostname=%s\n", fn, my_rank, getenv( "HOSTNAME" ) ); system( "date" ); sleep( 10 ); //__fpurge( localfp ); flushunlock(); // fclose no ato no houga anzen. continue; } // flushunlock(); // fclose no ato no houga anzen. if( fclose( localfp ) ) { reporterr( "Failed to close %s (%d). rank=%d\n", fn, writetry, my_rank ); sleep( 10 ); flushunlock(); // fclose no ato no houga anzen. continue; } flushunlock(); // koko no houga anzen. // if( !writeres ) // { // char com[100]; // sprintf( com, "ls -ltr %s; hostname", fn ); // reporterr( "wrote %s successfully, rank=%d\n", fn, my_rank ); // system( com ); break; // } // reporterr( "node = %d, tmpnodepairs = %d\n", n, tmpnodepairs ); // rewrite: reporterr( "Will not close localfp. filenamd=%s, rank=%d\n", fn, my_rank ); ; } if( writetry == 10 ) { reporterr( "Failed to write %s 10 times in %s. Give up.\n", fn, getenv( "HOSTNAME" ) ); system( "hostname" ); exit( 1 ); } free( stbuf ); free( fn ); } } #if 0 static void calcnearest_para( int njob, double *selfscore, char **bseq, int alloclen, int *neighbors, double *mindists ) { int i; calcnearestthread_arg_t *targ; pthread_t *handle; pthread_mutex_t mutex; int posshared; #if REPORTCOSTS time_t starttime, startclock; starttime = time(NULL); startclock = clock(); #endif // posshared = 0; posshared = njob-1; targ = calloc( nthread, sizeof( calcnearestthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; injob; const unsigned int chunksize = targ->chunksize; // const unsigned int num_of_chunks = targ->num_of_chunks; int *neighbors = targ->neighbors; double *mindists = targ->mindists; int *chunk_counter_pt = targ->chunk_counter_pt; pthread_mutex_t *mutex_count = targ->mutex_count; int i; int my_rank; int num_of_processes; MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); MPI_Comm_size(MPI_COMM_WORLD,&num_of_processes); enum {TODO,DONE,EXIT}; MPI_Status status; unsigned int mpi_chunk_balance = 0; // unsigned short *const chunks_assignment = (unsigned short *)malloc(num_of_chunks*sizeof(unsigned short)); // const int first_child = nthread>1?0:1; const int first_child = 1; pthread_mutex_lock( mutex_count ); // if( nthread > 1 ) // { // fprintf( stderr, "my_rank: %d, chunks_assignment: %d, 0\n", my_rank, *chunk_counter_pt); // (*chunk_counter_pt)--; // } for(i=first_child;injob?njob:(chunk_index+1)*chunksize+1);i++) fprintf( stderr, "%d, ", neighbors[i]); fprintf( stderr, "\n"); #endif MPI_Recv(&mindists[chunk_index*chunksize+1],num_of_jobs,MPI_DOUBLE,sender_id,DONE,MPI_COMM_WORLD,&status); #if ENABLEMPIDEBUG for(i=chunk_index*chunksize+1;i<((chunk_index+1)*chunksize>njob?njob:(chunk_index+1)*chunksize+1);i++) fprintf( stderr, "%lf, ", mindists[i]); fprintf( stderr, "\n"); #endif pthread_mutex_lock( mutex_count ); chunk_counter = (*chunk_counter_pt)--; pthread_mutex_unlock( mutex_count ); if(chunk_counter>=0){ MPI_Send(&chunk_counter,1,MPI_INT,sender_id,TODO,MPI_COMM_WORLD); // chunks_assignment[*chunk_counter_pt]=sender_id; fprintf( stderr, "my_rank: %d, chunks_assignment: %d, %d\n", my_rank, chunk_counter, sender_id); mpi_chunk_balance++; } mpi_chunk_balance--; } for(i=first_child;imy_rank; const int num_thread = my_rank==0?nthread-1:nthread; if(num_thread>0){ const int njob = sarg->njob; const unsigned int chunksize = sarg->chunksize; char **bseq = sarg->bseq; double *selfscore = sarg->selfscore; int alloclen = sarg->alloclen; int *neighbors = sarg->neighbors; double *mindists = sarg->mindists; int *chunk_counter_pt = sarg->chunk_counter_pt; pthread_mutex_t *mutex_count = sarg->mutex_count; enum {TODO,DONE,EXIT}; MPI_Status status; int *neighbors_partial = (int *)malloc(chunksize*sizeof(int)); double *mindists_partial = (double *)malloc(chunksize*sizeof(double)); int i; calcnearestthread_arg_t_mpi *targ; pthread_t *handle; pthread_mutex_t mutex; int posshared; int chunk_index; while(1){ if( my_rank == 0 ){ pthread_mutex_lock( mutex_count ); chunk_index = (*chunk_counter_pt)--; pthread_mutex_unlock( mutex_count ); if(chunk_index<0) break; fprintf( stderr, "my_rank: %d, chunks_assignment: %d, 0\n", my_rank, chunk_index); } else { MPI_Recv(&chunk_index,1,MPI_UNSIGNED,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status); if(status.MPI_TAG==EXIT) break; } int start = chunk_index*chunksize+1; int end = (chunk_index+1)*chunksize+1>njob?njob:(chunk_index+1)*chunksize+1; int num_of_jobs = end-start; #if ENABLEMPIDEBUG fprintf( stderr, "my_rank: %d, chunk_index: %d, start: %d, end: %d, nthread: %d\n", my_rank, chunk_index, start, end, nthread); #endif posshared = end-1; targ = calloc( num_thread, sizeof( calcnearestthread_arg_t_mpi ) ); handle = calloc( num_thread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i=0){ // chunk_index = (*chunk_counter_pt)--; // }else{ // rank0status = EXIT; // ??? // } // pthread_mutex_unlock( mutex_count ); // // if(rank0status!=EXIT) fprintf( stderr, "my_rank: %d, chunks_assignment: %d, 0\n", my_rank, chunk_index); } else { MPI_Send(&chunk_index,1,MPI_INT,0,DONE,MPI_COMM_WORLD); MPI_Send(&num_of_jobs,1,MPI_INT,0,DONE,MPI_COMM_WORLD); MPI_Send(neighbors_partial,num_of_jobs,MPI_UNSIGNED,0,DONE,MPI_COMM_WORLD); MPI_Send(mindists_partial,num_of_jobs,MPI_DOUBLE,0,DONE,MPI_COMM_WORLD); // MPI_Recv(&chunk_index,1,MPI_INT,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status); } } free(neighbors_partial); free(mindists_partial); } return( NULL ); } static void calcnearest_para_mpi(int njob, double *selfscore, char **bseq, int alloclen, int *neighbors, double *mindists ) { #if REPORTCOSTS time_t starttime, startclock; starttime = time(NULL); startclock = clock(); #endif int my_rank; int num_of_processes; MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); MPI_Comm_size(MPI_COMM_WORLD,&num_of_processes); if(num_of_processes*nthread < 2){ if(my_rank==0) reporterr( "(number of processes)*(number of threads) must be more than 1\n"); if(my_rank==0) reporterr( "number of processes: %d, number of threads: %d\n", num_of_processes, nthread ); exit(1); } const unsigned int chunksize = (njob-1)/(num_of_processes*GRANULARITY)==0?1:(njob-1)/(num_of_processes*GRANULARITY); const unsigned int num_of_chunks = (njob-1)%chunksize==0?(njob-1)/chunksize:(njob-1)/chunksize+1; int chunk_counter_pt[1]; pthread_mutex_t mutex_count; *chunk_counter_pt = num_of_chunks-1; calcthreads_dp1_arg_t args = { njob, chunksize, my_rank, bseq, selfscore, alloclen, neighbors, mindists, chunk_counter_pt, &mutex_count }; if(my_rank==0) { pthread_t th; fprintf( stderr, "my_rank: %d, chunksize: %u, num_of_chunks: %u\n", my_rank, chunksize, num_of_chunks); pthread_mutex_init( &mutex_count, NULL ); // calcthreads_dp1( (void *)&args ); pthread_create(&th, NULL, calcthreads_dp1, (void *)&args ); // sub thread. Does not use MPI when myrank==0 calcnearest_para_thread_rank0_mpi_arg_t arg = {chunksize, neighbors, mindists, chunk_counter_pt, &mutex_count}; // pthread_create(&th, NULL, calcnearest_para_thread_rank0_mpi, (void *)&arg ); calcnearest_para_thread_rank0_mpi( (void *)&arg ); // main thread. Uses MPI pthread_join( th, NULL ); pthread_mutex_destroy( &mutex_count ); } else { calcthreads_dp1( (void *)&args ); // main thread. Uses MPI } #if REPORTCOSTS reporterr( "\n" ); reporterr( "my_rank: %d, dp1, real = %f min\n", my_rank, (float)(time(NULL) - starttime)/60.0 ); reporterr( "my_rank: %d, dp1, user = %f min\n", my_rank, (float)(clock()-startclock)/CLOCKS_PER_SEC/60); reporterr( "my_rank: %d, dp1, ratio = %f", my_rank, (float)(clock()-startclock)/CLOCKS_PER_SEC / (time(NULL) - starttime) ); use_getrusage(); reporterr( "\n" ); #endif } #if 0 #if EXACTLYSAMEASPAIRLOCALALIGN static void recalcpairs_para4( int njob, int ***topol, Treedep *dep, char **bseq, char **dseq, double *selfscore, int alloclen, int *uselh, int *nfilesfornode ) #else static void recalcpairs_para4( int njob, int ***topol, Treedep *dep, char **bseq, double *selfscore, int alloclen, int *uselh, int *nfilesfornode ) #endif { int i; recalcpairs4thread_arg_t *targ; pthread_t *handle; pthread_mutex_t mutex; int possharedn, numjob; int n, n0, n1, j, k, b, blocksize, blocksize0, i0, i1; int *mem0, *mem1, *addmem; char *mergeoralign; Jobplan *jobplan; double done, half; // int donen; unsigned long long doneull; int nallocated; int nnodesdivided; int *joborder; int subid; double sizeav; Pairnum *npairs; #if REPORTCOSTS time_t starttime, startclock; starttime = time(NULL); startclock = clock(); #endif nallocated = njob*2; mem0 = calloc( sizeof( int ), njob ); mem1 = calloc( sizeof( int ), njob ); jobplan = calloc( sizeof( Jobplan), nallocated ); mergeoralign = AllocateCharVec( njob ); npairs = calloc( sizeof( Pairnum ), njob ); sizeav = 0.0; if( nadd ) { addmem = AllocateIntVec( nadd+1 ); for( i=0; i-1; n-- ) for( k=0; k half ) // if( n0 * n1 < sizeav*LHBLOCKFACTOR ) { blocksize0 = n0; // bunkatsu shinai } else #endif { done += (double)npairs[k].npairs; // blocksize0 = LHBLOCKSIZE/n1; blocksize0 = (int)(sizeav * LHBLOCKFACTOR)/n1; if( blocksize0 == 0 ) blocksize0 = 1; } // reporterr( "blocksize0 = %d, n0=%d, n1=%d\n", blocksize0, n0, n1 ); if( numjob + n0/blocksize0+1 > nallocated ) { nallocated += n0/blocksize0+1; // reporterr( "new nallocated = %d\n", nallocated ); jobplan = realloc( jobplan, nallocated*sizeof( Jobplan ) ); } if( n0 > blocksize0 ) { nnodesdivided++; // reporterr( "node %d will be divided, because n0, %d > blocksize0, %d. nnodesdivided=%d, numjob=%d\n", n, n0, blocksize0, nnodesdivided, numjob ); } subid = 0; for( b=0; b blocksize0 ) jobplan[numjob].divided = 1; else jobplan[numjob].divided = 0; numjob++; subid++; } nfilesfornode[n] = subid; } reporterr( "numjob=%d, nnodesdivided=%d, njob=%d\n", numjob, nnodesdivided, njob ); reporterr( "Divided %d nodes to %d jobs\n", nnodesdivided, numjob-(njob-1) ); joborder = calloc( sizeof( int ), numjob ); for( i=0; inumjob; // const unsigned int chunksize = targ->chunksize; const unsigned int num_of_chunks = targ->num_of_chunks; int *chunk_counter_pt = targ->chunk_counter_pt; pthread_mutex_t *mutex_count = targ->mutex_count; int i; int my_rank; int num_of_processes; MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); MPI_Comm_size(MPI_COMM_WORLD,&num_of_processes); enum {TODO,DONE,EXIT}; MPI_Status status; unsigned int mpi_chunk_balance = 0; // int chunk_counter = 0; // unsigned short *const chunks_assignment = (unsigned short *)malloc(num_of_chunks*sizeof(unsigned short)); // const int first_child = nthread>1?0:1; const int first_child = 1; pthread_mutex_lock( mutex_count ); // if( nthread > 1 ) // { // fprintf( stderr, "my_rank: %d, chunks_assignment: %d, 0\n", my_rank, *chunk_counter_pt); // (*chunk_counter_pt)++; // } for(i=first_child;i=num_of_chunks) break; MPI_Send(chunk_counter_pt,1,MPI_UNSIGNED,i,TODO,MPI_COMM_WORLD); // chunks_assignment[*chunk_counter_pt]=i; fprintf( stderr, "my_rank: %d, chunks_assignment: %d, %d\n", my_rank, *chunk_counter_pt, i); (*chunk_counter_pt)++; mpi_chunk_balance++; } pthread_mutex_unlock( mutex_count ); while(mpi_chunk_balance){ unsigned int chunk_index; unsigned int chunk_counter; MPI_Recv(&chunk_index,1,MPI_UNSIGNED,MPI_ANY_SOURCE,DONE,MPI_COMM_WORLD,&status); int sender_id = status.MPI_SOURCE; mpi_chunk_balance--; pthread_mutex_lock( mutex_count ); chunk_counter = (*chunk_counter_pt)++; pthread_mutex_unlock( mutex_count ); if(chunk_countermy_rank; const int num_thread = my_rank==0?nthread-1:nthread; if(num_thread>0){ const int njob = sarg->njob; const int numjob = sarg->numjob; const unsigned int chunksize = sarg->chunksize; const unsigned int num_of_chunks = sarg->num_of_chunks; char **bseq = sarg->bseq; #if EXACTLYSAMEASPAIRLOCALALIGN char **bseq = sarg->dseq; #endif double *selfscore = sarg->selfscore; int alloclen = sarg->alloclen; int *joborder = sarg->joborder; int *uselh = sarg->uselh; int ***topol = sarg->topol; Treedep *dep = sarg->dep; Jobplan *jobplan = sarg->jobplan; int *chunk_counter_pt = sarg->chunk_counter_pt; pthread_mutex_t *mutex_count = sarg->mutex_count; enum {TODO,DONE,EXIT}; MPI_Status status; int i; int possharedn; unsigned long long doneull; recalcpairs4thread_arg_t_mpi *targ; pthread_t *handle; pthread_mutex_t mutex; int chunk_index; while(1){ if( my_rank == 0 ) { pthread_mutex_lock( mutex_count ); chunk_index = (*chunk_counter_pt)++; pthread_mutex_unlock( mutex_count ); if(chunk_index>=num_of_chunks) break; fprintf( stderr, "my_rank: %d, chunks_assignment: %d, 0\n", my_rank, chunk_index); } else { MPI_Recv(&chunk_index,1,MPI_UNSIGNED,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status); if (status.MPI_TAG==EXIT) break; } int start = chunk_index*chunksize; int end = (chunk_index+1)*chunksize>numjob?numjob:(chunk_index+1)*chunksize; #if ENABLEMPIDEBUG fprintf( stderr, "my_rank: %d, chunk_index: %d, start: %d, end: %d, nthread: %d\n", my_rank, chunk_index, start, end, nthread); #endif // possharedn = numjob-1; possharedn = start; doneull = 0; targ = calloc( num_thread, sizeof( recalcpairs4thread_arg_t_mpi ) ); handle = calloc( num_thread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 7.383 if( my_rank == 0 ) { // qsort( npairspt, njob-1, sizeof( Pairnum * ), compfuncpairpt ); // for( i=0; inum; for( i=0; inum; } MPI_Bcast( norder, njob-1, MPI_INT, 0, MPI_COMM_WORLD ); if( nadd ) free( addmem ); // for( i=0; i-1; n-- ) for( i=0; i (double)MINBLOCKLEN2 ) { blocksize0 = (int)( (double)MINBLOCKLEN2/n1/lenav/lenav ); if( blocksize0 == 0 ) blocksize0 = 1; // reporterr( "dividing node %d, size=%f, size*len2=%f > %f\n", n, (double)n0*n1, (double)n0*n1*lenav*lenav, MINBLOCKLEN2 ); // reporterr( "blocksize0=%d, n0=%d, n1=%d\n", blocksize0, n0, n1 ); } else { blocksize0 = n0; // reporterr( "did not divide node %d, size=n1*n2=%f, size*len2=%f < %f\n", n, (double)n0*n1, (double)n0*n1*lenav*lenav, MINBLOCKLEN2 ); } #endif } // reporterr( "blocksize0 = %d, n0=%d, n1=%d\n", blocksize0, n0, n1 ); if( numjob + n0/blocksize0+1 > nallocated ) { nallocated += n0/blocksize0+1; // reporterr( "new nallocated = %d\n", nallocated ); jobplan = realloc( jobplan, nallocated*sizeof( Jobplan ) ); } if( n0 > blocksize0 ) { nnodesdivided++; // reporterr( "node %d will be divided, because n0, %d > blocksize0, %d. nnodesdivided=%d, numjob=%d\n", n, n0, blocksize0, nnodesdivided, numjob ); } subid = 0; for( b=0; b blocksize0 ) jobplan[numjob].divided = 1; else jobplan[numjob].divided = 0; numjob++; subid++; } nfilesfornode[n] = subid; } reporterr( "my_rank: %d, numjob=%d, nnodesdivided=%d, njob=%d\n", my_rank, numjob, nnodesdivided, njob ); reporterr( "my_rank: %d, Divided %d nodes to %d jobs\n", my_rank, nnodesdivided, numjob-(njob-1) ); joborder = calloc( sizeof( int ), numjob ); for( i=0; inumjob, 2016/Mar/3 // for( i=0; i-1; i-- ) #else tmpdist = mindists[1]; treept[0].parent = treept+n; treept[1].parent = treept+n; treept[n].child0 = treept+0; treept[n].child1 = treept+1; treept[n].height = tmpdist * 0.5; treept[n].len0 = tmpdist * 0.5; treept[n].len1 = tmpdist * 0.5; treept[n].parent = NULL; treept[n].rep0 = 0; treept[n].rep1 = 1; root = treept+n; for( i=2; iparent; p!=NULL; b=p,p=p->parent ) { // reporterr( "checking %p->%p (height=%f)\n", b, p, p->height ); if( p->height > mindist * 0.5 ) break; } if( p == NULL ) { treept[n].parent = NULL; root = treept+n; } else if( p->child0 == b ) { p->child0 = treept+n; p->len0 = p->height-mindist*0.5; treept[n].parent = p; } else if( p->child1 == b ) { p->child1 = treept+n; p->len1 = p->height-mindist*0.5; treept[n].parent = p; } else { reporterr( "okashii\n" ); exit( 1 ); } treept[i].parent = treept+n; b->parent = treept+n; treept[n].child0 = b; treept[n].child1 = treept+i; treept[n].height = mindist * 0.5; treept[n].rep0 = b->rep0; treept[n].rep1 = treept[i].rep0; treept[n].len0 = mindist*0.5-b->height; treept[n].len1 = mindist*0.5; } reformattree( root, treept, njob, topol, len, dep, name, treeout, 0 ); // 2017/Mar/6 free( treept ); // free( neighbors ); // free( mindists ); } void compacttreedpdist( int njob, char **bseq, char **dseq, double *selfscore, int ***topol, double **len, char **name, Treedep *dep, int treeout, int alloclen, int *uselh, int *nfilesfornode, int treegiven ) { int i, neighbor, n; double tmpdist; double mindist; // int *commonanc; int my_rank; int num_of_processes; MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); MPI_Comm_size(MPI_COMM_WORLD,&num_of_processes); if( !treegiven ) { Treept *treept = NULL; Treept *p, *b; Treept *root; int *neighbors; double *mindists; neighbors = calloc( sizeof( int ), njob ); mindists = calloc( sizeof( double ), njob ); // calcnearest_para( njob, selfscore, dseq, alloclen, neighbors, mindists ); calcnearest_para_mpi( njob, selfscore, dseq, alloclen, neighbors, mindists ); MPI_Bcast(neighbors, njob, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(mindists, njob, MPI_DOUBLE, 0, MPI_COMM_WORLD); #if ENABLEMPIDEBUG if(my_rank==0){ for(i=0;i-1; i-- ) #else // commonanc = (int *)calloc( njob, sizeof( int ) ); tmpdist = mindists[1]; // reporterr( "tmpdist = %f\n", tmpdist ); // reporterr( "%f ?= %f\n", tmpdist, distfromfile( njob, njob-2, njob-1, hat2fp ) ); // reporterr( "%f ?= %f\n", tmpdist, mindists[njob-2] ); treept[0].parent = treept+n; treept[1].parent = treept+n; treept[n].child0 = treept+0; treept[n].child1 = treept+1; treept[n].height = tmpdist * 0.5; treept[n].len0 = tmpdist * 0.5; treept[n].len1 = tmpdist * 0.5; treept[n].parent = NULL; treept[n].rep0 = 0; treept[n].rep1 = 1; root = treept+n; for( i=2; iparent; p!=NULL; b=p,p=p->parent ) { // reporterr( "checking %p->%p (height=%f)\n", b, p, p->height ); if( p->height > mindist * 0.5 ) break; } if( p == NULL ) { treept[n].parent = NULL; root = treept+n; } else if( p->child0 == b ) { p->child0 = treept+n; p->len0 = p->height-mindist*0.5; treept[n].parent = p; } else if( p->child1 == b ) { p->child1 = treept+n; p->len1 = p->height-mindist*0.5; treept[n].parent = p; } else { reporterr( "okashii\n" ); exit( 1 ); } treept[i].parent = treept+n; b->parent = treept+n; treept[n].child0 = b; treept[n].child1 = treept+i; treept[n].height = mindist * 0.5; treept[n].rep0 = b->rep0; treept[n].rep1 = treept[i].rep0; treept[n].len0 = mindist*0.5-b->height; treept[n].len1 = mindist*0.5; } reformattree( root, treept, njob, topol, len, dep, name, treeout, my_rank ); // 2017/Mar/6 free( treept ); free( neighbors ); free( mindists ); } else { reporterr( "treegiven\n" ); } #if EXACTLYSAMEASPAIRLOCALALIGN recalcpairs_para4_mpi( njob, topol, dep, bseq, dseq, selfscore, alloclen, uselh, nfilesfornode ); // recalcpairs_para4( njob, topol, dep, bseq, dseq, selfscore, alloclen, uselh, nfilesfornode ); #else recalcpairs_para4_mpi( njob, topol, dep, bseq, selfscore, alloclen, uselh, nfilesfornode ); // recalcpairs_para4( njob, topol, dep, bseq, selfscore, alloclen, uselh, nfilesfornode ); #endif #if HAT3SORTED // recalcpairs_para2( njob, topol, dep, bseq, selfscore, alloclen, hat3node, fd0, fd1, uselh ); #else // recalcpairs_para3( njob, topol, dep, bseq, selfscore, alloclen, hat3node, fd0, fd1, uselh ); #endif // recalcpairs( njob, topol, dep, bseq, selfscore, alloclen, hat3node, fd0, fd1, uselh ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // 20130603 if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; #if ENABLEMPIDEBUG reporterr("my_rank: %d, end compacttreedpdist\n", my_rank); #endif // free( commonanc ); // commonsextet_p( NULL, NULL ); // distdppairs_para( 0, 0, NULL, NULL, 0, 0, NULL, 0, NULL, NULL ); // distdppairsthread( NULL ); } void compacttree_memsaveselectable( int nseq, double **partmtx, int *nearest, double *mindist, int **pointt, int *tselfscore, char **seq, int **skiptable, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int howcompact, int memsave ) { int i, j, k; // int miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; // double tmpdouble; // double eff1, eff0; double *tmptmplen = NULL; //static? int *hist = NULL; //static? Bchain *ac = NULL; //static? int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; //static? int nmemim, nmemjm; double minscore; char **tree; //static? char *treetmp; //static? char *nametmp, *nameptr, *tmpptr; //static? FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; int *singlettable1 = NULL; int *singlettable2 = NULL; double *newarr; void *(*distarrfunc)( void * ); void *(*resetnearestfunc)( void * ); int numfilled; int nthreadtree; compactdistarrthread_arg_t *distarrarg; resetnearestthread_arg_t *resetarg; int *joblist, nactive, posshared; double *result; sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( howcompact == 2 ) { if( seq ) { // distarrfunc = verycompactmsadistarrthread; distarrfunc = verycompactmsadistarrthreadjoblist; resetnearestfunc = NULL; } else { // distarrfunc = verycompactkmerdistarrthread; distarrfunc = verycompactkmerdistarrthreadjoblist; resetnearestfunc = NULL; } } else { if( seq ) { distarrfunc = msadistarrthreadjoblist; resetnearestfunc = msaresetnearestthread; } else { distarrfunc = kmerdistarrthreadjoblist; resetnearestfunc = kmerresetnearestthread; } } distarrarg = calloc( MAX( nthread, 1 ), sizeof( compactdistarrthread_arg_t ) ); resetarg = calloc( MAX( nthread, 1 ), sizeof( resetnearestthread_arg_t ) ); joblist = calloc( njob, sizeof( int ) ); if( howcompact != 2 ) result = calloc( njob, sizeof( double ) ); else result = NULL; if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); if( treeout ) { treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi tree = AllocateCharMtx( njob, 0 ); } } if( treeout ) { for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindist[i] ); if( mindist[i] < minscore ) // muscle { im = i; minscore = mindist[i]; } } // printf( "minscore=%f\n", minscore ); jm = nearest[im]; // printf( "im=%d\n", im ); // printf( "jm=%d\n", jm ); if( jm < im ) { j=jm; jm=im; im=j; } if( partmtx[im] == NULL && howcompact != 2 ) numfilled++; if( partmtx[jm] != NULL ) numfilled--; prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; if( memsave ) intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave else intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); // memsave if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } if( memsave ) { *intpt++ = *pt11; *intpt = -1; } else { reporterr( "This version supports memsave=1 only\n" ); // fukkatsu saseru tokiha pt22 wo dainyu. exit( 1 ); for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; if( memsave ) intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave else intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); // memsave if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } if( memsave ) { *intpt++ = *pt11; *intpt = -1; } else { reporterr( "This version supports memsave=1 only\n" ); // fukkatsu saseru tokiha pt22 wo dainyu. exit( 1 ); for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } } minscore *= 0.5; // printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] ); len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindist[im] = 999.9; if( pointt ) // kmer { singlettable1 = (int *)calloc( tsize, sizeof( int ) ); singlettable2 = (int *)calloc( tsize, sizeof( int ) ); makecompositiontable_global( singlettable1, pointt[im] ); makecompositiontable_global( singlettable2, pointt[jm] ); } newarr = calloc( nseq, sizeof( double ) ); // nthreadtree = MAX( 1, nthread ); nthreadtree = nthread; for( acpti=ac,nactive=0; acpti!=NULL; acpti=acpti->next ) joblist[nactive++] = acpti->pos; // sukoshi muda... #ifdef enablemultithread if( nthreadtree > 0 ) { compactdistarrthread_arg_t *targ; pthread_t *handle; pthread_mutex_t mutex; posshared = 0; // targ = calloc( nthreadtree, sizeof( compactdistarrthread_arg_t ) ); targ = distarrarg; handle = calloc( nthreadtree, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); if( k % 100 == 0 ) reporterr( " (%d threads, nactive=%d, nfilled=%d) \r", nthreadtree, nactive, numfilled ); for( i=0; inext ) // antei sei no tame { i = acpti->pos; if( i != im && i != jm ) { // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii. // if( newarr[i] < mindist[i] ) // { // mindist[i] = newarr[i]; // nearest[i] = im; // } if( newarr[i] < mindist[im] ) { mindist[im] = newarr[i]; nearest[im] = i; } // if( nearest[i] == jm ) // { // nearest[i] = im; // } } } #endif } else #endif { if( k % 100 == 0 ) reporterr( " (serial, nactive=%d, nfilled=%d) \r", nactive, numfilled ); compactdistarrthread_arg_t *targ; posshared = 0; // targ = calloc( 1, sizeof( compactdistarrthread_arg_t ) ); targ = distarrarg; for( i=0; i<1; i++ ) { targ[i].para = 0; targ[i].njob = nactive; // targ[i].thread_no = i; targ[i].im = im; targ[i].jm = jm; targ[i].tselfscore = tselfscore; targ[i].nlen = nlen; targ[i].seq = seq; targ[i].skiptable = skiptable; targ[i].pointt = pointt; targ[i].table1 = singlettable1; targ[i].table2 = singlettable2; targ[i].joblist = joblist; targ[i].posshared = &posshared; targ[i].mindist = mindist; targ[i].nearest = nearest; targ[i].newarr = newarr; targ[i].partmtx = partmtx; distarrfunc( targ+i ); // pthread_create( handle, NULL, distarrfunc, (void *)(targ) ); } // free( targ ); } for( acpti=ac; acpti!=NULL; acpti=acpti->next ) // antei sei no tame { i = acpti->pos; if( i != im && i != jm ) { // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii. // if( newarr[i] < mindist[i] ) // { // mindist[i] = newarr[i]; // nearest[i] = im; // } if( newarr[i] < mindist[im] ) { mindist[im] = newarr[i]; nearest[im] = i; } // if( nearest[i] == jm ) // { // nearest[i] = im; // } } } // printf( "im=%d, jm=%d\n", im, jm ); #if 0 printf( "matrix = \n" ); for( i=0; inext = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; #if 0 // muscle seems to miss this. // int nwork = 0; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindist[i] ); if( nearest[i] == im ) { // printf( "reset nearest, i=%d, k=%d\n", i, k ); if( partmtx[im][i] > mindist[i] ) { // nwork++; // printf( "go\n" ); if( pointt ) // kmer { singlettable1 = (int *)calloc( tsize, sizeof( int ) ); makecompositiontable_global( singlettable1, pointt[i] ); } resetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, pointt, nlen, singlettable1 ); if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer if( pointt ) commonsextet_p( NULL, NULL ); } } } // reporterr( "nwork = %d\n", nwork ); #else if( howcompact == 2 ) continue; #if 0 if( 0 && nthreadtree > 0 ) { resetnearestthread_arg_t *targ; pthread_t *handle; pthread_mutex_t mutex; Bchain *acshared; acshared = ac; // targ = calloc( nthreadtree, sizeof( resetnearestthread_arg_t ) ); targ = resetarg; handle = calloc( nthreadtree, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); printf( "\n" ); #endif } if( treeout ) { fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); } for( im=0; im _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindisfrom[i] ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } // printf( "minscore=%f\n", minscore ); jm = nearest[im]; // printf( "im=%d\n", im ); // printf( "jm=%d\n", jm ); if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 // memsave *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 // memsave *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } minscore *= 0.5; // printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] ); len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); // printf( "tmpdouble=%f, eff0=%f, eff1=%f\n", tmpdouble, eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // printf( "im=%d, jm=%d\n", im, jm ); #if 0 printf( "matrix = \n" ); for( i=0; ij ) { minijm=j; maxijm=i; } else { minijm=i; maxijm=j; } printf( "%f ", eff[minijm][maxijm-minijm] ); } printf( "\n" ); } #endif treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; // Ato de fukkatsu } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; // printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindisfrom[i] ); if( nearest[i] == im ) { // printf( "reset nearest, i=%d, k=%d\n", i, k ); if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) { // printf( "go\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } } #else reporterr( "CHUUI!\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); if( treeout == 2 ) { int *mem = calloc( sizeof( int ), nseq ); fprintf( fp, "\nDensity:" ); for( k=0; k-1; i++ ) fprintf( fp, " %03d", topol[k][0][i]+1 ); fprintf( fp, "%d:", getdensest( mem, density )+1 ); for( i=0; mem[i]>-1; i++ ) fprintf( fp, " %d", mem[i]+1 ); fprintf( fp, "\n" ); topolorderz( mem, topol, dep, k, 1 ); // fprintf( fp, "len1 = %f\n", len[k][1] ); // for( i=0; topol[k][1][i]>-1; i++ ) fprintf( fp, " %03d", topol[k][1][i]+1 ); fprintf( fp, "%d:", getdensest( mem, density )+1 ); for( i=0; mem[i]>-1; i++ ) fprintf( fp, " %d", mem[i]+1 ); fprintf( fp, "\n" ); } free( mem ); } fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); if( treeout == 2 ) free( density ); } void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; double *tmptmplen = NULL; //static? int *hist = NULL; //static? Bchain *ac = NULL; //static? int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; //static? int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess char **tree; //static? char *treetmp; //static? char *nametmp, *nameptr, *tmpptr; //static? FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #else reporterr( "chuui!\n" ); #endif #if 0 printf( "\nooSTEP-%03d:\n", k+1 ); printf( "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); printf( "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); free( tree[0] ); free( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp, *nameptr, *tmpptr; FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; sueff1 = 1.0 - sueff_global; sueff05 = sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } #if 0 if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*150 ); nametmp = AllocateCharVec( 91 ); tree = AllocateCharMtx( njob, njob*150 ); } for( i=0; i _ no tame sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } #else if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } #endif for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; nmemim = nmemar[im]; // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); intpt = topol[k][0]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 tmpdouble = eff[miniim][maxiim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } #if 0 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #else treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); #endif acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim] > mindisfrom[i] ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "\nvSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); #if 0 FreeCharMtx( tree ); #else free( tree[0] ); free( tree ); #endif free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti, *acptj; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; double minscore; int *nearest = NULL; // by D.Mathog, a guess double *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp, *nameptr, *tmpptr; FILE *fp; double (*clusterfuncpt[1])(double,double); char namec; int *testtopol, **inconsistent; int **inconsistentpairlist; int ninconsistentpairs; int *warned; int allinconsistent; int firsttime; increaseintergroupdistancesfullmtx( eff, ngroup, groups, nseq ); sueff1 = 1 - sueff_global; sueff05 = sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } #if 0 if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*150 ); nametmp = AllocateCharVec( 91 ); tree = AllocateCharMtx( njob, njob*150 ); } for( i=0; i _ no tame sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } #else if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateDoubleVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateDoubleVec( njob ); nearest = AllocateIntVec( njob ); // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? treetmp = NULL; // kentou 2013/06/12 nametmp = AllocateCharVec( 1000 ); // nagasugi // tree = AllocateCharMtx( njob, njob*600 ); tree = AllocateCharMtx( njob, 0 ); testtopol = AllocateIntVec( njob + 1 ); inconsistent = AllocateIntMtx( njob, njob ); // muda // inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda inconsistentpairlist = AllocateIntMtx( 1, 2 ); warned = AllocateIntVec( ngroup ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } #endif for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } } else { minscore = 999.9; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[i][j] && (tmpdouble=eff[i][j]) < minscore ) { minscore = tmpdouble; im = i; jm = j; } } for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) { j = acptj->pos; if( !inconsistent[j][i] && (tmpdouble=eff[j][i]) < minscore ) { minscore = tmpdouble; im = j; jm = i; } } } } allinconsistent = 1; for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { if( inconsistent[acpti->pos][acptj->pos] == 0 ) { allinconsistent = 0; goto exitloop_d; } } } exitloop_d: if( allinconsistent ) { reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); exit( 1 ); } #if 1 intpt = testtopol; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; } else { for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) *intpt++ = *intpt2++; } *intpt = -1; // reporterr( "testtopol = \n" ); // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 ); // reporterr( "\n" ); #endif for( i=0; i-1; j++ ) reporterr( " %03d", groups[i][j]+1 ); // reporterr( "\n" ); if( overlapmember( testtopol, groups[i] ) ) { if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) { if( !warned[i] ) { warned[i] = 1; reporterr( "\n###################################################################\n" ); reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); reporterr( "###################################################################\n" ); } inconsistent[im][jm] = 1; inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) ); inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 ); inconsistentpairlist[ninconsistentpairs][0] = im; inconsistentpairlist[ninconsistentpairs][1] = jm; ninconsistentpairs++; break; } } } if( i == ngroup ) { // reporterr( "OK\n" ); break; } } prevnode = hist[im]; nmemim = nmemar[im]; // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); intpt = topol[k][0]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; eff[im][jm] = 999.9; // eff[im][jm-im] = 999.9; // bug?? for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 tmpdouble = eff[miniim][maxiim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpdouble = eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif #if 1 if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } #endif } } #if 0 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #else treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); #endif acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim] > mindisfrom[i] ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "\ncSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); fclose( fp ); #if 0 FreeCharMtx( tree ); #else free( tree[0] ); free( tree ); #endif free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); free( testtopol ); FreeIntMtx( inconsistent ); FreeIntMtx( inconsistentpairlist ); free( warned ); } void fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt; double tmpdouble; double eff1, eff0; double *tmptmplen = NULL; // static TLS -> local, 2012/02/25 int *hist = NULL; // static TLS -> local, 2012/02/25 Bchain *ac = NULL; // static TLS -> local, 2012/02/25 int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11; int *nmemar; // static TLS -> local, 2012/02/25 int nmemim, nmemjm; double minscore; int *nearest = NULL; // by Mathog, a guess double *mindisfrom = NULL; // by Mathog, a guess double (*clusterfuncpt[1])(double,double); sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; // pt22 = pt1; } else { pt11 = pt1; // pt22 = pt2; } #if 1 *intpt++ = *pt11; *intpt = -1; #else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; #endif } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; tmpdouble = eff[miniim][maxiim-miniim] = #if 0 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_musclesupg_double_realloc_nobk_halfmtx( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; double *tmptmplen = NULL; // static TLS -> local, 2012/02/25 int *hist = NULL; // static TLS -> local, 2012/02/25 Bchain *ac = NULL; // static TLS -> local, 2012/02/25 int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; int *nmemar; // static TLS -> local, 2012/02/25 int nmemim, nmemjm; double minscore; int *nearest = NULL; // by Mathog, a guess double *mindisfrom = NULL; // by Mathog, a guess double (*clusterfuncpt[1])(double,double); sueff1 = 1 - (double)sueff_global; sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; if( dep ) dep[k].child1 = prevnode; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( dep ) dep[k].distfromtip = minscore; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; tmpdouble = eff[miniim][maxiim-miniim] = #if 0 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpdouble < mindisfrom[i] ) { mindisfrom[i] = tmpdouble; nearest[i] = im; } if( tmpdouble < mindisfrom[im] ) { mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; if( efffree ) { free( (void *)eff[jm] ); eff[jm] = NULL; } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { if( i < im ) { miniim = i; maxiim = im; } else { miniim = im; maxiim = i; } if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double eff1, eff0; int *hist = NULL; Achain *ac = NULL; double minscore; char **tree; char *treetmp; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; FILE *fp; int node[2]; double lenfl[2]; char *nametmp, *nameptr, *tmpptr; //static? char namec; fp = fopen( "_guidetree", "r" ); if( !fp ) { reporterr( "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { // treetmp = AllocateCharVec( njob*50 ); treetmp = NULL; // tree = AllocateCharMtx( njob, njob*50 ); tree = AllocateCharMtx( njob, 0 ); nametmp = AllocateCharVec( 1000 ); // nagasugi hist = AllocateIntVec( njob ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i _ no tame tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 if( tree[i] == NULL ) { reporterr( "Cannot allocate tree!\n" ); exit( 1 ); } sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } for( i=0; i nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { reporterr( "\n\nCheck the guide tree.\n" ); reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } // reporterr( "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); if( lenfl[0] == -1.0 || lenfl[1] == -1.0 ) { reporterr( "\n\nWARNING: Branch length is not given.\n" ); exit( 1 ); } if( lenfl[0] < 0.0 ) lenfl[0] = 0.0; if( lenfl[1] < 0.0 ) lenfl[1] = 0.0; #endif // reporterr( "im=%d, jm=%d\n", im, jm ); intpt = topol[k][0]; prevnode = hist[im]; if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; #if 0 len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; #else len[k][0] = lenfl[0]; len[k][1] = lenfl[1]; #endif hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + ( eff0 + eff1 ) * 0.5 * sueff_global; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo if( !treetmp ) { reporterr( "Cannot allocate treetmp\n" ); exit( 1 ); } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); free( tree[im] ); free( tree[jm] ); tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); tree[jm] = NULL; if( tree[im] == NULL ) { reporterr( "Cannot reallocate tree!\n" ); exit( 1 ); } strcpy( tree[im], treetmp ); // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); // strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s;\n", treetmp ); // fprintf( fp, "by veryfastsupg_double_loadtree\n" ); fclose( fp ); #if 1 reporterr( "\n" ); free( hist ); free( (char *)ac ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif #if 0 // reporterr( "reconstructing eff[][]\n" ); // Tsune ni hat2 ha aru node koreha iranai. for( k=0; k-1; i++ ) { reporterr( " %03d", im ); } fprintf( stdout, "\n" ); for( i=0; (jm=topol[k][1][i])>-1; i++ ) { reporterr( " %03d", jm ); } for( i=0; (im=topol[k][0][i])>-1; i++ ) for( j=0; (jm=topol[k][1][j])>-1; j++ ) { eff[im][jm] += len[k][0] + len[k][1]; eff[jm][im] += len[k][0] + len[k][1]; } } #endif } #if 0 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + ( eff0 + eff1 ) * 0.5 * sueff_global; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 reporterr( "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } #endif void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; static char **tree; static char *treetmp; static char *nametmp; FILE *fpout; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; double (*clusterfuncpt[1])(double,double); sueff1 = 1 - sueff_global; sueff05 = sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); nametmp = AllocateCharVec( 31 ); } // for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fpout = fopen( "infile.tree", "w" ); fprintf( fpout, "%s;\n", treetmp ); // fprintf( fpout, "by veryfastsupg_double_outtree\n" ); fclose( fpout ); #if 1 reporterr( "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif } void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int tmpint; int eff1, eff0; static double *tmptmplen = NULL; static int **eff = NULL; static int *hist = NULL; static Achain *ac = NULL; int minscore; double minscoref; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !eff ) { eff = AllocateIntMtx( njob, njob ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } #else intpt = topol[k][0]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; #endif len[k][0] = minscoref - tmptmplen[im]; len[k][1] = minscoref - tmptmplen[jm]; tmptmplen[im] = minscoref; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + // int?? ( eff0 + eff1 ) * 0.5 * sueff_global; // int?? } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 FreeIntMtx( eff ); eff = NULL; free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } void fastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static double *tmplen; int *intpt; double tmpdouble; double eff1, eff0; static double **eff = NULL; static char **pair = NULL; static Achain *ac; double minscore; int im = -1, jm = -1; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); ac = (Achain *)calloc( njob, sizeof( Achain ) ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; minscore /= 2.0; len[k][0] = (double)minscore - tmplen[im]; len[k][1] = (double)minscore - tmplen[jm]; tmplen[im] = (double)minscore; for( i=0; i 0 ); for( i=0; i-1; i++ ) reporterr( " %03d", topol[k][0][i] ); reporterr( "\n" ); reporterr( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i] ); reporterr( "\n" ); #endif } reporterr( "\n" ); // FreeFloatMtx( eff ); // FreeCharMtx( pair ); // FreeFloatVec( tmplen ); // free( ac ); } void supg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static double *tmplen; int *intpt; double **doubleptpt; double *doublept; double tmpdouble; double eff1, eff0; static double **eff = NULL; static char **pair = NULL; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = (double)minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } void spg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else double **eff = NULL; char **pair = NULL; if( !eff ) { eff = AllocateDoubleMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); } #endif for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } double ipower( double x, int n ) /* n > 0 */ { double r; r = 1; while( n != 0 ) { if( n & 1 ) r *= x; x *= x; n >>= 1; } return( r ); } void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ { int i, j, k, s1, s2; static double rootnode[M]; if( nseq-2 < 0 ) { reporterr( "Too few sequence for countnode: nseq = %d\n", nseq ); exit( 1 ); } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } } void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ { int i, j, k, s1, s2; int rootnode[M]; for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=localmem[1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } free( localmem[0] ); free( localmem[1] ); } free( localmem ); free( memhist[nseq-2] ); free( memhist ); for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(unsigned char)seq2[++k]) == '-' ) tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } return( tmpscore ); } double score_calc1( char *seq1, char *seq2 ) /* method 1 */ { int k; double score = 0.0; int count = 0; int len = strlen( seq1 ); for( k=0; k 1 ) { if( utree == 0 ) { for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco1+i, "%c", ch+0x61 ); } sco1[len] = 0; for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); tmp = ( tmp - 400 * !scoremtx ) * 2; if( tmp < 0 ) tmp = 0; ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco2+i, "%c", ch+0x61 ); sco[i] = tmp; } sco2[len] = 0; for( i=WIN; i= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog while( str2 >= bk ) { *str2-- = *str1--; } } int isaligned( int nseq, char **seq ) { int i; int len = strlen( seq[0] ); for( i=1; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[0][24]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore / (double)c; #if DEBUG printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); printf( "tmpscore / c = %f\n", tmpscore/(double)c ); #endif } } reporterr( "raw score = %f\n", score ); score /= (double)nseq * ( nseq-1.0 ) / 2.0; score += 400.0; #if DEBUG printf( "score in mltaln9.c = %f\n", score ); #endif return( (double)score ); } void doublencpy( double *vec1, double *vec2, int len ) { while( len-- ) *vec1++ = *vec2++; } double score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ { int i, j, k; int gb1, gb2, gc1, gc2; int cob; int nglen; int len = strlen( seq[0] ); double score; score = 0; nglen = 0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore; } } return( score ); } #define SEGMENTSIZE 150 int searchAnchors( int nseq, char **seq, Segment *seg ) { int i, j, k, kcyc; int status; double score; int value = 0; int len; int length; static double *stra = NULL; static int alloclen = 0; double cumscore; static double threshold; len = strlen( seq[0] ); if( alloclen < len ) { if( alloclen ) { FreeDoubleVec( stra ); } else { threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; } stra = AllocateDoubleVec( len ); alloclen = len; } for( i=0; iskipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; length = 0; /* modified at 01/09/11 */ for( j=0; j threshold ) reporterr( "YES\n" ); else reporterr( "NO\n" ); #endif if( score > threshold ) { if( !status ) { status = 1; seg->start = i; length = 0; cumscore = 0.0; } length++; cumscore += score; } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG reporterr( "%d-%d length = %d\n", seg->start, seg->end, length ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } length = 0; cumscore = 0.0; status = 0; value++; seg++; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG reporterr( "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } return( value ); } void dontcalcimportance_target( int nseq, double *eff, char **seq, LocalHom **localhom, int ntarget ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt; // ptr->fimportance = (double)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; i= j ) continue; for( ptr=localhom[i]+j-i; ptr; ptr=ptr->next ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt; // ptr->fimportance = (double)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 ptr->importance = ptr->opt / ptr->overlapaa; // ptr->fimportance = (double)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } free( nogaplen ); } void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, nseq1; LocalHom *ptr; #if 1 #else int *nogaplen; nogaplen = AllocateIntVec( nseq ); for( i=0; inext ) { // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 // ptr->importance = ptr->opt / ptr->overlapaa; ptr->importance = ptr->opt * 0.5; // tekitou // ptr->fimportance = (double)ptr->importance; // reporterr( "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt ); #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } #if 1 #else free( nogaplen ); #endif } void calcimportance_target( int nseq, int ntarget, double *eff, char **seq, LocalHom **localhom, int *targetmap, int *targetmapr, int alloclen ) { int i, j, pos, len, ti, tj; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_double ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( alloclen ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif // for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } #if 0 reporterr( "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B } #endif // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 printf( "before averaging:\n" ); for( ti=0; tinext ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%30.25f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // reporterr( "average?\n" ); // for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } for( ti=0; tinext ) { if( tmpptr1->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance ); // imp = 1.0 * ( tmpptr1->importance ); tmpptr1->importance = imp; // tmpptr1->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 printf( "after averaging:\n" ); for( ti=0; tinext ) { if( tmpptr->end1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } //exit( 1 ); #endif free( importance ); free( nogaplen ); free( ieff ); } void calcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom, int alloclen ) { int i, j, pos, len; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_double ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( alloclen ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } else { for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } } #if 0 reporterr( "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } } else { if( localhom[j][i-j].opt == -1.0 ) continue; for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->rimportance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } } // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 printf( "before averaging:\n" ); for( i=0; inext ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } else { printf( "%d-%d\n", i, j ); for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->rimportance, tmpptr->opt ); } } } #endif #if 1 // reporterr( "average?\n" ); for( i=0; inext) { if( tmpptr1->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr1->rimportance ); tmpptr1->importance = tmpptr1->rimportance = imp; // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 printf( "after averaging:\n" ); for( i=0; inext ) { if( tmpptr->end1 && tmpptr->start1 != -1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } else for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->end2 && tmpptr->start2 != -1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } exit( 1 ); #endif free( importance ); free( nogaplen ); free( ieff ); } void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, pos, len; double *importance; // static -> local, 2012/02/25 double tmpdouble; double *ieff, totaleff; // counteff_simple_double ni utsusu kamo int *nogaplen; // static -> local, 2012/02/25 LocalHom *tmpptr; importance = AllocateDoubleVec( nlenmax ); nogaplen = AllocateIntVec( nseq ); ieff = AllocateDoubleVec( nseq ); totaleff = 0.0; for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { #if 1 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); importance[pos] += ieff[j]; #else importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } } #if 0 reporterr( "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->fimportance = (double)tmpptr->importance; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B } #endif // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 printf( "before averaging:\n" ); for( i=0; inext ) { printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // reporterr( "average?\n" ); for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; // reporterr( "## importance = %f\n", tmpptr1->importance ); } #if 0 // commented out, 2012/02/10 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { reporterr( "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 printf( "after averaging:\n" ); for( i=0; inext ) { if( tmpptr->end1 && tmpptr->start1 != -1 ) printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } exit( 1 ); #endif free( importance ); free( nogaplen ); free( ieff ); } static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) { // dokka machigatteru if( pt != lh ) // susumeru { pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); pt = pt->next; pt->next = NULL; lh->last = pt; } else // sonomamatsukau { lh->last = pt; } lh->nokori++; // reporterr( "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); pt->start1 = sti; pt->start2 = stj; pt->end1 = eni; pt->end2 = enj; pt->opt = opt; pt->extended = interm; pt->overlapaa = overlp; #if 0 reporterr( "i: %d-%d\n", sti, eni ); reporterr( "j: %d-%d\n", stj, enj ); reporterr( "opt=%f\n", opt ); reporterr( "overlp=%d\n", overlp ); #endif } void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) { int overlp, plim; int i, j, k; int pi, pj, pk, len; int status, sti, stj; int *ipt; int co; static int *ini = NULL; static int *inj = NULL; LocalHom *pt; sti = 0; // by D.Mathog, a guess stj = 0; // by D.Mathog, a guess if( ini == NULL ) { ini = AllocateIntVec( nlenmax+1 ); inj = AllocateIntVec( nlenmax+1 ); } for( i=0; i dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; ipt = ini; co = nlenmax+1; while( co-- ) *ipt++ = -1; ipt = inj; co = nlenmax+1; while( co-- ) *ipt++ = -1; overlp = 0; { for( pt=localhom[i]+k; pt; pt=pt->next ) { // reporterr( "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); if( pt->opt == -1 ) { reporterr( "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pi = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = ini + pk; while( len-- ) *ipt++ = pi++; } } { for( pt=localhom[j]+k; pt; pt=pt->next ) { if( pt->opt == -1 ) { reporterr( "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pj = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = inj + pk; while( len-- ) *ipt++ = pj++; } } #if 0 reporterr( "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; reporterr( " %d", inj[pk] ); } reporterr( "\n" ); reporterr( "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; reporterr( " %d", ini[pk] ); } reporterr( "\n" ); #endif overlp = 0; plim = nlenmax+1; for( pk = 0; pk < plim; pk++ ) if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; status = 0; plim = nlenmax+1; for( pk=0; pknext = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); pt = localhom[j][i].last; // reporterr( "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); // reporterr( "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); // reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); } } if( !status ) // else deha arimasenn. { if( ini[pk] == -1 || inj[pk] == -1 ) continue; sti = ini[pk]; stj = inj[pk]; // reporterr( "start here!\n" ); status = 1; } } // if( status ) reporterr( "end here\n" ); // exit( 1 ); // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #if 0 for( pt=localhomtable[i]+j; pt; pt=pt->next ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #endif } } } int makelocal( char *s1, char *s2, int thr ) { int start, maxstart, maxend; char *pt1, *pt2; double score; double maxscore; pt1 = s1; pt2 = s2; maxend = 0; // by D.Mathog, a guess // reporterr( "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); maxscore = 0.0; score = 0.0; start = 0; maxstart = 0; while( *pt1 ) { // reporterr( "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); if( *pt1 == '-' || *pt2 == '-' ) { // reporterr( "penalty = %d\n", penalty ); score += penalty; while( *pt1 == '-' || *pt2 == '-' ) { pt1++; pt2++; } continue; } score += ( amino_dis[(unsigned char)*pt1++][(unsigned char)*pt2++] - thr ); // score += ( amino_dis[(int)*pt1++][(int)*pt2++] ); if( score > maxscore ) { // reporterr( "score = %f\n", score ); maxscore = score; maxstart = start; // reporterr( "## max! maxstart = %d, start = %d\n", maxstart, start ); } if( score < 0.0 ) { // reporterr( "## resetting, start = %d, maxstart = %d\n", start, maxstart ); if( start == maxstart ) { maxend = pt1 - s1; // reporterr( "maxend = %d\n", maxend ); } score = 0.0; start = pt1 - s1; } } if( start == maxstart ) maxend = pt1 - s1 - 1; // reporterr( "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); s1[maxend+1] = 0; s2[maxend+1] = 0; return( maxstart ); } void resetlocalhom( int nseq, LocalHom **lh ) { int i, j; LocalHom *pt; for( i=0; inext ) pt->opt = 1.0; } } void gapireru( char *res, char *ori, char *gt ) { char g; while( (g = *gt++) ) { if( g == '-' ) { *res++ = *newgapstr; } else { *res++ = *ori++; } } *res = 0; } void getkyokaigap( char *g, char **s, int pos, int n ) { // char *bk = g; // while( n-- ) *g++ = '-'; while( n-- ) *g++ = (*s++)[pos]; // reporterr( "bk = %s\n", bk ); } void new_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat ) #if 0 { int i, j, gc, gb; double feff; for( i=0; i", i, gaplen, k, (*fpt)[k].freq ); (*fpt)[k].freq += feff; // reporterr( "%f\n", (*fpt)[k].freq ); gaplen = 0; } } fpt++; } } #if 1 for( j=0; jnext = ac; acori->pos = -1; ac[0].prev = acori; // for( i=0; i tmpmin ) { minscore = tmpmin; nearest = i; } } nearesto = nearest; minscoreo = minscore; // for( i=0; i-1; j++ ) { reporterr( "%d ", topol[i][0][j]+1 ); } reporterr( "\n" ); reporterr( "len=%f\n", len[i][0] ); reporterr( "group1 = \n" ); for( j=0; topol[i][1][j]>-1; j++ ) { reporterr( "%d ", topol[i][1][j]+1 ); } reporterr( "\n" ); reporterr( "len=%f\n", len[i][1] ); reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 ); reporterr( "i = %d\n", i ); if( leaf2node[nearest] == -1 ) { reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] ); } else { reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] ); } #endif nearestnode = leaf2node[nearest]; if( nearestnode == -1 ) reflen = nogaplen[nearest]; else reflen = alnleninnode[nearestnode]; // reflen = alnleninnode[i]; // BUG!! if( noalign ) seqlengthcondition = 1; else seqlengthcondition = ( nogaplentoadd <= reflen ); //seqlengthcondition = 1; // CHUUI //seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku. // if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau. { // reporterr( "INSERT HERE, %d-%d\n", nearest, norg ); // reporterr( "nearest = %d\n", nearest ); // reporterr( "\n\n\nminscore = %f\n", minscore ); // reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 ); // reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] ); if( nearestnode == -1 ) { // reporterr( "INSERTING to 0!!!\n" ); // reporterr( "lastlength = %d\n", nogaplen[norg] ); // reporterr( "reflength = %d\n", nogaplen[nearest] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][0][0] = nearest; topolc[posinnew][0][1] = -1; addedlen = lenc[posinnew][0] = minscore / 2; } else { // reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) ); // reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] ); // reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) ); // reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] ); intcpy( topolc[posinnew][0], topol[nearestnode][0] ); intcat( topolc[posinnew][0], topol[nearestnode][1] ); // addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!! addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10 // fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] ); } neighbor = lastmem( topolc[posinnew][0] ); if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { reporterr( "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " distance: %f\n", minscore ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // reporterr( "INSERTING to 1!!!\n" ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) ); topolc[posinnew][1][0] = norg; topolc[posinnew][1][1] = -1; lenc[posinnew][1] = minscore / 2; // reporterr( "STEP %d (newnew)\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); repnorg = nearest; // reporterr( "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); // reporterr( "\n len=%f\n", lenc[i][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); // reporterr( "\n len=%f\n", lenc[i][1] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); posinnew++; } // reporterr( "minscore = %f\n", minscore ); // reporterr( "distfromtip = %f\n", dep[i].distfromtip ); // reporterr( "Modify matrix, %d-%d\n", nearest, norg ); eff0 = iscorec[mem0][norg-mem0]; eff1 = iscorec[mem1][norg-mem1]; // iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 ); iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local; iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda acprev = ac[mem1].prev; acnext = ac[mem1].next; acprev->next = acnext; if( acnext != NULL ) acnext->prev = acprev; if( ( nearest == mem1 || nearest == mem0 ) ) { minscore = 9999.9; // for( j=0; j iscorec[j][norg-j] ) // { // minscore = iscorec[j][norg-j]; // nearest = j; // } // } // reporterr( "searching on modified ac " ); for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda { // reporterr( "." ); j = acpt->pos; tmpmin = iscorec[j][norg-j]; if( minscore > tmpmin ) { minscore = tmpmin; nearest = j; } } // reporterr( "done\n" ); } // reporterr( "posinnew = %d\n", posinnew ); if( topol[i][0][0] == repnorg ) { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); intcat( topolc[posinnew][0], additionaltopol ); lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip ); // fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] ); // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] ); addedlen = 0.0; } else { topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topol[i][0] ); lenc[posinnew][0] = len[i][0]; } if( topol[i][1][0] == repnorg ) { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); intcat( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip ); // fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] ); // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] ); addedlen = 0.0; repnorg = topolc[posinnew][0][0]; // juuyou } else { topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][1], topol[i][1] ); lenc[posinnew][1] = len[i][1]; } // reporterr( "\nSTEP %d (new)\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); // reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] ); for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i; for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i; // reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] ); // im = topolc[posinnew][0][0]; // jm = topolc[posinnew][1][0]; // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); // strcpy( tree[im], treetmp ); // // reporterr( "%s\n", treetmp ); posinnew++; } if( nstep ) { i--; topolo0 = topol[i][0]; topolo1 = topol[i][1]; } else { // i = 0; // free( topol[i][0] );//? // free( topol[i][1] );//? // topol[i][0] = calloc( 2, sizeof( int ) ); // topol[i][1] = calloc( 1, sizeof( int ) ); // topol[i][0][0] = 0; // topol[i][0][1] = -1; // topol[i][1][0] = -1; topoldum0 = calloc( 2, sizeof( int ) ); topoldum1 = calloc( 1, sizeof( int ) ); topoldum0[0] = 0; topoldum0[1] = -1; topoldum1[0] = -1; topolo0 = topoldum0; topolo1 = topoldum1; } if( repnorg == -1 ) { // topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); // intcpy( topolc[posinnew][0], topol[i][0] ); // intcat( topolc[posinnew][0], topol[i][1] ); topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) ); intcpy( topolc[posinnew][0], topolo0 ); intcat( topolc[posinnew][0], topolo1 ); // lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken if( nstep ) lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21 else lenc[posinnew][0] = minscore / 2; // reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip ); // reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] ); topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) ); intcpy( topolc[posinnew][1], additionaltopol ); lenc[posinnew][1] = minscore / 2; // neighbor = lastmem( topolc[posinnew][0] ); neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji if( treeout ) { #if 0 fp = fopen( "infile.tree", "a" ); // kyougou!! if( fp == 0 ) { reporterr( "File error!\n" ); exit( 1 ); } fprintf( fp, "\n" ); fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); fprintf( fp, " cousin: " ); for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); fprintf( fp, "\n" ); fclose( fp ); #else addtree[iadd].nearest = nearesto; addtree[iadd].dist1 = minscoreo; addtree[iadd].dist2 = minscore; neighborlist[0] = 0; npt = neighborlist; for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); npt += strlen( npt ); } addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); strcpy( addtree[iadd].neighbors, neighborlist ); #endif } // reporterr( "STEP %d\n", posinnew ); // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); // reporterr( "\n len=%f", lenc[posinnew][0] ); // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); // reporterr( "\n len=%f\n", lenc[posinnew][1] ); } if( topoldum0 ) free( topoldum0 ); if( topoldum1 ) free( topoldum1 ); free( leaf2node ); free( additionaltopol ); free( ac ); free( acori ); if( treeout ) free( neighborlist ); #if 0 // create a newick tree for CHECK char **tree; char *treetmp; int im, jm; treetmp = AllocateCharVec( njob*150 ); tree = AllocateCharMtx( njob, njob*150 ); for( i=0; i-1; ) { if( mem[i++] != cand[j++] ) return( 0 ); } if( cand[j] == -1 ) { return( 1 ); } else { return( 0 ); } } #else int samemember( int *mem, int *cand ) { int i, j; int nm, nc; nm = 0; for( i=0; mem[i]>-1; i++ ) nm++; nc = 0; for( i=0; cand[i]>-1; i++ ) nc++; if( nm != nc ) return( 0 ); for( i=0; mem[i]>-1; i++ ) { for( j=0; cand[j]>-1; j++ ) if( mem[i] == cand[j] ) break; if( cand[j] == -1 ) return( 0 ); } if( mem[i] == -1 ) { #if 0 reporterr( "mem = " ); for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] ); reporterr( "\n" ); reporterr( "cand = " ); for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] ); reporterr( "\n" ); #endif return( 1 ); } else { return( 0 ); } } #endif int samemembern( int *mem, int *cand, int nc ) { int i, j; int nm; nm = 0; for( i=0; mem[i]>-1; i++ ) { nm++; if( nm > nc ) return( 0 ); } if( nm != nc ) return( 0 ); for( i=0; mem[i]>-1; i++ ) { for( j=0; j-1; i++ ) { for( j=0; cand[j]>-1; j++ ) if( mem[i] == cand[j] ) break; if( cand[j] == -1 ) return( 0 ); } // reporterr( "INCLUDED! mem[0]=%d\n", mem[0] ); return( 1 ); } int overlapmember( int *mem1, int *mem2 ) { int i, j; for( i=0; mem1[i]>-1; i++ ) for( j=0; mem2[j]>-1; j++ ) if( mem1[i] == mem2[j] ) return( 1 ); return( 0 ); } void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth ) { int i, j; double fr; // for( i=0; i %f\n", i, freq[i] ); } // reporterr( "\n" ); return; } void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth ) { int i, j; double fr; // for( i=0; i 0.0 ) val = 0.0; return val; } void makedynamicmtx( double **out, double **in, double offset ) { int i, j, ii, jj; double av; offset = dist2offset( offset * 2.0 ); // offset 0..1 -> 0..2 // if( offset > 0.0 ) offset = 0.0; // reporterr( "dynamic offset = %f\n", offset ); for( i=0; i%f\n", rep0, distfromtip0, distfromtip[rep0] ); #if 0 for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%3d ", topol[i][1][j] ); reporterr( "\n" ); reporterr( "len=%f\n", len[i][1] ); #endif rep1 = topol[i][1][0]; distfromtip1 = distfromtip[rep1]; distfromtip[rep1] += len[i][1]; // reporterr( "distfromtip[%d] = %f->%f\n", rep1, distfromtip1, distfromtip[rep1] ); if( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] ) { // reporterr( "HIT 0!\n" ); *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) ); for( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ ) nmem++; // reporterr( "allocating %d\n", nmem+1 ); (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) ); (*tablept)[*nsubpt+1] = NULL; intcpy( (*tablept)[*nsubpt], topol[i][0] ); if( *maxmempt < nmem ) *maxmempt = nmem; *nsubpt += 1; } if( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] ) { // reporterr( "HIT 1!\n" ); *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) ); for( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ ) nmem++; // reporterr( "allocating %d\n", nmem+1 ); (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) ); (*tablept)[*nsubpt+1] = NULL; intcpy( (*tablept)[*nsubpt], topol[i][1] ); if( *maxmempt < nmem ) *maxmempt = nmem; *nsubpt += 1; } } if( distfromtip[0] <= threshold ) { free( distfromtip ); return( 1 ); } free( distfromtip ); return( 0 ); } double sumofpairsscore( int nseq, char **seq ) { double v = 0; int i, j; for( i=1; i 10 ) value = 10.0; // 2015/Mar/17 return( value ); } } double distcompact( int len1, int len2, int *table1, int *point2, int ss1, int ss2 ) { double longer, shorter, lenfac, value; if( len1 > len2 ) { longer=(double)len1; shorter=(double)len2; } else { longer=(double)len2; shorter=(double)len1; } lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // reporterr( "lenfac=%f\n", lenfac ); // reporterr( "commonsextet_p()=%d\n", commonsextet_p( table1, point2 ) ); // reporterr( "ss1=%d, ss2=%d\n", ss1, ss2 ); // reporterr( "val=%f\n", (1.0-(double)commonsextet_p( table1, point2 )/ss1) ); if( ss1 == 0 || ss2 == 0 ) return( 2.0 ); value = ( 1.0 - (double)commonsextet_p( table1, point2 ) / MIN(ss1,ss2) ) * lenfac * 2.0; return( value ); // 2013/Oct/17 -> 2bai } static void movereg( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt ) { char *pt; int tmpint; pt = seq1; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } *start1pt = (int)( pt - seq1 ) - 1; if( tmpptr->start1 == tmpptr->end1 ) *end1pt = *start1pt; else { while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } *end1pt = (int)( pt - seq1 ) - 1; } pt = seq2; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } *start2pt = (int)( pt - seq2 ) - 1; if( tmpptr->start2 == tmpptr->end2 ) *end2pt = *start2pt; else { while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } *end2pt = (int)( pt - seq2 ) - 1; } } static void movereg_swap( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt ) { char *pt; int tmpint; pt = seq1; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } *start1pt = (int)( pt - seq1 ) - 1; if( tmpptr->start2 == tmpptr->end2 ) *end1pt = *start1pt; else { while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } *end1pt = (int)( pt - seq1 ) - 1; } pt = seq2; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } *start2pt = (int)( pt - seq2 ) - 1; if( tmpptr->start1 == tmpptr->end1 ) *end2pt = *start2pt; else { while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } *end2pt = (int)( pt - seq2 ) - 1; } } void fillimp( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 ) { int i, j, k1, k2, start1, start2, end1, end2; double effij, effijx, effij_kozo; char *pt1, *pt2; LocalHom *tmpptr; void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); #if 0 fprintf( stderr, "eff1 in _init_strict = \n" ); for( i=0; iorinum2[j] ) movefunc = movereg_swap; else movefunc = movereg; } // effij = eff1[i] * eff2[j] * effijx; effij = eff1[i] * eff2[j] * effijx; effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx; tmpptr = localhom[i][j]; while( tmpptr ) { // fprintf( stderr, "start1 = %d\n", tmpptr->start1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; // impmtx[k1][k2] += tmpptr->fimportance * effij; if( tmpptr->korh == 'k' ) impmtx[k1][k2] += tmpptr->importance * effij_kozo; else impmtx[k1][k2] += tmpptr->importance * effij; // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } } } #if 0 printf( "orinum1=%d, orinum2=%d\n", *orinum1, *orinum2 ); if( *orinum1 == 0 ) { fprintf( stdout, "impmtx = \n" ); for( k2=0; k2nokori++ > 0 ) { tmpptr1 = localhomtable->last; tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); tmpptr1 = tmpptr1->next; tmpptr1->extended = -1; tmpptr1->next = NULL; localhomtable->last = tmpptr1; } else { tmpptr1 = localhomtable; } tmpptr1->start1 = st1; tmpptr1->start2 = st2; tmpptr1->end1 = st1 + len; tmpptr1->end2 = st2 + len; // tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600; // tmpptr1->opt = opt; tmpptr1->opt = ( (double)opt + 0.00 ) / 5.8 * 600; tmpptr1->importance = ( (double)opt + 0.00 ) / 5.8 * 600; // C0 to itchi shinai tmpptr1->overlapaa = len; // tsukau toki ha chuui tmpptr1->korh = 'h'; // fprintf( stderr, " %f %d-%d %d-%d \n", tmpptr1->opt, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2 ); } free( m ); fread( &c, sizeof( char ), 1, fp ); if( c != '\n' ) { reporterr( "\n\nError in binary hat3 \n" ); exit( 1 ); } } static int readlocalhomfromfile_autofid( LocalHom *lhpt, int nodeid, FILE *fp, int o1, int o2 ) // for hat3node { // pthread_mutex_t *filemutex = h3i->filemutex; // int fidcheck; int k1, k2; // int *fds = h3i->fds; int swap; // unsigned long long k1tri; lhpt->start1 = -1; lhpt->end1 = -1; lhpt->start2 = -1; lhpt->end2 = -1; lhpt->overlapaa = -1.0; lhpt->opt = -1.0; lhpt->importance = -1.0; lhpt->next = NULL; lhpt->nokori = 0; lhpt->extended = -1; lhpt->last = lhpt; lhpt->korh = 'h'; #if 0 // specific target ni taiousuru! if( h3i->specifictarget ) { int *targetmap = h3i->targetmap; if( targetmap[o1] == -1 && targetmap[o2] == -1 ) return( -1 ); if( targetmap[o1] == -1 ) { k1 = targetmap[o2]; k2 = o1; swap = 1; } else { k1 = targetmap[o1]; k2 = o2; swap = 0; } k1tri = 0; } else #endif { if( o2 > o1 ) { k1 = o1; k2 = o2-o1; swap = 0; } else { k1 = o2; k2 = o1-o2; swap = 1; } // k1tri = (unsigned long long)k1*(k1-1)/2; } if( fp ) { readlocalhomtable2_single_bin_noseek( fp, lhpt ); } return( swap ); } static int whichpair( int *ipt, int *jpt, FILE *fp ) { if( fread( ipt, sizeof( int ), 1, fp ) < 1 ) return( 1 ); if( fread( jpt, sizeof( int ), 1, fp ) < 1 ) return( 1 ); // <1 ha nai return( 0 ); } typedef struct _readloopthread_arg { // int thread_no; int nodeid; int nfiles; double **impmtx; char **seq1; char **seq2; int *orinum1; int *orinum2; double *eff1; double *eff2; unsigned long long *ndone; int *subidpt; pthread_mutex_t *mutex; } readloopthread_arg_t; static void *readloopthread( void *arg ) { readloopthread_arg_t *targ = (readloopthread_arg_t *)arg; int nodeid = targ->nodeid; // int thread_no = targ->thread_no; double **impmtx = targ->impmtx; char **seq1 = targ->seq1; char **seq2 = targ->seq2; int *orinum1 = targ->orinum1; int *orinum2 = targ->orinum2; double *eff1 = targ->eff1; double *eff2 = targ->eff2; unsigned long long *ndone = targ->ndone; int *subidpt = targ->subidpt; int nfiles = targ->nfiles; int subid = -1; pthread_mutex_t *mutex = targ->mutex; int i, j, k1, k2, start1, start2, end1, end2; double effij, effijx; char *pt1, *pt2; LocalHom *tmpptr; FILE *fp = NULL; char *fn; LocalHom lhsingle; int res; void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); initlocalhom1( &lhsingle ); effijx = 1.0 * fastathreshold; #if 0 int block; if( nfiles > 10*nthreadreadlh ) block=10; else block=1; #endif while( 1 ) { if( subid == -1 || whichpair( &i, &j, fp ) ) { while( 1 ) { if( fp ) fclose( fp ); #if 0 if( (subid+1)%block==0 ) { if( mutex ) pthread_mutex_lock( mutex ); subid = (*subidpt); (*subidpt) += block; if( mutex ) pthread_mutex_unlock( mutex ); } else subid++; #else if( mutex ) pthread_mutex_lock( mutex ); subid = (*subidpt)++; if( mutex ) pthread_mutex_unlock( mutex ); #endif if( subid >= nfiles ) return( NULL ); fn = calloc( 100, sizeof( char ) ); sprintf( fn, "hat3dir/%d-/hat3node-%d-%d", (int)(nodeid/HAT3NODEBLOCK)*HAT3NODEBLOCK, nodeid, subid ); // reporterr( "fopen %s by thread %d\n", fn, thread_no ); fp = fopen( fn, "rb" ); if( fp == NULL ) { reporterr( "Cannot open %s\n", fn ); exit( 1 ); } free( fn ); if( !whichpair( &i, &j, fp ) ) break; } } (*ndone)++; { // effij = eff1[i] * eff2[j] * effijx; effij = eff1[i] * eff2[j] * effijx; // effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx; res = readlocalhomfromfile_autofid( &lhsingle, nodeid, fp, orinum1[i], orinum2[j] ); if( res == -1 ) tmpptr = NULL; else if( res == 1 ) { movefunc = movereg_swap; // h3i ga arutoki swaplist ha mushi tmpptr = &lhsingle; } else { movefunc = movereg; // h3i ga arutoki swaplist ha mushi tmpptr = &lhsingle; } while( tmpptr ) { // fprintf( stderr, "start1 = %d\n", tmpptr->start1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); // if( end1 - start1 != end2 - start2 ) // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { impmtx[k1][k2] += tmpptr->importance * effij; k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } freelocalhom1( &lhsingle ); } } } void fillimp_file( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2, int *uselh, int *seedinlh1, int *seedinlh2, int nodeid, int nfiles ) { int i, j, k1, k2, start1, start2, end1, end2, m0, m1, m2; double effijx, effij_kozo; char *pt1, *pt2; LocalHom *tmpptr; unsigned long long npairs; // LocalHom lhsingle; // FILE *fp = NULL; // char *fn; // int subid, res; void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); pthread_t *handle; readloopthread_arg_t *targ; pthread_mutex_t mutex; double ***localimpmtx; int nth; unsigned long long *localndone; unsigned long long ndone; int subid; #if 0 fprintf( stderr, "eff1 in _init_strict = \n" ); for( i=0; i m2 ) { m0=m1; m1=m2; m2=m0; } if( m2 >= njob-nadd && ( uselh==NULL || uselh[m1] || uselh[m2] ) ) // saikentou { // reporterr( "%d x %d\n", m1, m2 ); npairs++; } } #if REPORTCOSTS reporterr( "node %d, npairs = %d, nfiles = %d\n", nodeid, npairs, nfiles ); #endif } else if( uselh ) { // npairs = (unsigned long long)clus1 * clus2; npairs = 0; for( i=0; iorinum2[j] ) movefunc = movereg_swap; else movefunc = movereg; while( tmpptr ) { movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { if( tmpptr->korh == 'k' ) impmtx[k1][k2] += tmpptr->importance * effij_kozo; else // naihazu { reporterr( "okashii\n" ); exit( 1 ); } // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } tmpptr = tmpptr->next; } } } } #if 0 if( 0 || nfiles < 2 ) { unsigned long long nread; readloop_serial( nodeid, impmtx, seq1, seq2, orinum1, orinum2, eff1, eff2, &nread ); npairs -= nread; } else #endif { nth = MIN(nthreadreadlh,nfiles); subid = 0; if( nth > 1 ) { localndone = calloc( sizeof(unsigned long long), nth ); localimpmtx = calloc( sizeof(double **), nth ); for( i=0; i 1 ) { targ[i].ndone = localndone+i; targ[i].impmtx = localimpmtx[i]; targ[i].mutex = &mutex; pthread_create( handle+i, NULL, readloopthread, (void *)(targ+i) ); } else { targ[i].ndone = &ndone; targ[i].impmtx = impmtx; targ[i].mutex = NULL; readloopthread( targ+i ); } } if( nth > 1 ) { for( j=0; j 1 ) { for( i=0; inaln; apt = lastresx->aln; if( naln == 0 ) return; while( naln-- ) { rpt1 = apt->reg1; rpt2 = apt->reg2; nreg = apt->nreg; isumscore = 0; sumoverlap = 0; while( nreg-- ) { if( nlocalhom++ > 0 ) { // fprintf( stderr, "reallocating ...\n" ); tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // fprintf( stderr, "done\n" ); tmppt = tmppt->next; tmppt->next = NULL; } tmppt->start1 = rpt1->start; tmppt->start2 = rpt2->start; tmppt->end1 = rpt1->end; tmppt->end2 = rpt2->end; tmppt->korh = 'h'; if( rpt1 == apt->reg1 ) localhompt0 = tmppt; // ? // fprintf( stderr, "in putlocalhom, reg1: %d-%d (nreg=%d)\n", rpt1->start, rpt1->end, lastresx->nreg ); // fprintf( stderr, "in putlocalhom, reg2: %d-%d (nreg=%d)\n", rpt2->start, rpt2->end, lastresx->nreg ); len = tmppt->end1 - tmppt->start1 + 1; // fprintf( stderr, "tmppt->start1=%d\n", tmppt->start1 ); // fprintf( stderr, "tmppt->start2=%d\n", tmppt->start2 ); // fprintf( stderr, "s1+tmppt->start1=%*.*s\n", len, len, s1+tmppt->start1 ); // fprintf( stderr, "s2+tmppt->start2=%*.*s\n", len, len, s2+tmppt->start2 ); pt1 = s1 + tmppt->start1; pt2 = s2 + tmppt->start2; iscore = 0; while( len-- ) { iscore += n_dis[(int)amino_n[(unsigned char)*pt1++]][(int)amino_n[(unsigned char)*pt2++]]; // - offset $B$O$$$i$J$$$+$b(B // fprintf( stderr, "len=%d, %c-%c, iscore(0) = %d\n", len, *(pt1-1), *(pt2-1), iscore ); } if( divpairscore ) { tmppt->overlapaa = tmppt->end2-tmppt->start2+1; tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600; } else { isumscore += iscore; sumoverlap += tmppt->end2-tmppt->start2+1; } rpt1++; rpt2++; } #if 0 fprintf( stderr, "iscore (1)= %d\n", iscore ); fprintf( stderr, "al1: %d - %d\n", start1, end1 ); fprintf( stderr, "al2: %d - %d\n", start2, end2 ); #endif if( !divpairscore ) { for( tmppt2=localhompt0; tmppt2; tmppt2=tmppt2->next ) { tmppt2->overlapaa = sumoverlap; tmppt2->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap ); // fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt ); } } apt++; } } static int countcomma( char *s ) { int v = 0; while( *s ) if( *s++ == ',' ) v++; return( v ); } static double recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen ) { static FILE *fp = NULL; double value; char *aln1; char *aln2; int of1tmp, of2tmp; if( fp == NULL ) { fp = fopen( "_foldalignout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _foldalignout\n" ); exit( 1 ); } } aln1 = calloc( alloclen, sizeof( char ) ); aln2 = calloc( alloclen, sizeof( char ) ); readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen ); if( strstr( foldalignopt, "-global") ) { fprintf( stderr, "Calling G__align11\n" ); value = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap ); *of1pt = 0; *of2pt = 0; } else { fprintf( stderr, "Calling L__align11\n" ); value = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, of1pt, of2pt ); } // value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame if( aln1[0] == 0 ) { fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 ); } else { strcpy( *mseq1, aln1 ); strcpy( *mseq2, aln2 ); *of1pt = of1tmp; *of2pt = of2tmp; } // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara. // fclose( fp ); // saigo dake yatta houga yoi. // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); free( aln1 ); free( aln2 ); return( value ); } static void block2reg( char *block, Reg *reg1, Reg *reg2, int start1, int start2 ) { Reg *rpt1, *rpt2; char *tpt, *npt; int pos1, pos2; int len, glen1, glen2; pos1 = start1; pos2 = start2; rpt1 = reg1; rpt2 = reg2; while( block ) { block++; // fprintf( stderr, "block = %s\n", block ); tpt = strchr( block, ':' ); npt = strchr( block, ',' ); if( !tpt || tpt > npt ) { len = atoi( block ); reg1->start = pos1; reg2->start = pos2; pos1 += len - 1; pos2 += len - 1; reg1->end = pos1; reg2->end = pos2; // fprintf( stderr, "in loop reg1: %d-%d\n", reg1->start, reg1->end ); // fprintf( stderr, "in loop reg2: %d-%d\n", reg2->start, reg2->end ); reg1++; reg2++; } else { sscanf( block, "%d:%d", &glen1, &glen2 ); pos1 += glen1 + 1; pos2 += glen2 + 1; } block = npt; } reg1->start = reg1->end = reg2->start = reg2->end = -1; while( rpt1->start != -1 ) { // fprintf( stderr, "reg1: %d-%d\n", rpt1->start, rpt1->end ); // fprintf( stderr, "reg2: %d-%d\n", rpt2->start, rpt2->end ); rpt1++; rpt2++; } // *apt1 = *apt2 = 0; // fprintf( stderr, "aln1 = %s\n", aln1 ); // fprintf( stderr, "aln2 = %s\n", aln2 ); } static void readlastresx_singleq( FILE *fp, int n1, int nameq, Lastresx **lastresx ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; if( name2 != nameq ) { fprintf( stderr, "BUG!!!\n" ); exit( 1 ); } // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 1 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 1 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 1 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 1 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #ifdef enablemultithread #if 0 static void readlastresx_group( FILE *fp, Lastresx **lastresx ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 3 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #endif #endif static void readlastresx( FILE *fp, int n1, int n2, Lastresx **lastresx, char **seq1, char **seq2 ) { char *gett; Aln *tmpaln; int prevnaln, naln, nreg; #if 0 int i, pstart, pend, end1, end2; #endif int score, name1, start1, alnSize1, seqSize1; int name2, start2, alnSize2, seqSize2; char strand1, strand2; int includeintoscore; gett = calloc( 10000, sizeof( char ) ); // fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); // fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); while( 1 ) { fgets( gett, 9999, fp ); if( feof( fp ) ) break; if( gett[0] == '#' ) continue; // fprintf( stdout, "gett = %s\n", gett ); if( gett[strlen(gett)-1] != '\n' ) { fprintf( stderr, "Too long line?\n" ); exit( 1 ); } sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d", &score, &name1, &start1, &alnSize1, &strand1, &seqSize1, &name2, &start2, &alnSize2, &strand2, &seqSize2 ); if( alg == 'R' && name2 <= name1 ) continue; // if( lastresx[name1][name2].score ) continue; // dame!!!! prevnaln = lastresx[name1][name2].naln; #if 0 for( i=0; i 3 ) break; if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break; pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0; pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0; end2 = start2 + alnSize2; // fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend ); if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break; if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break; } includeintoscore = ( i == prevnaln ); #else if( prevnaln ) includeintoscore = 0; else includeintoscore = 1; #endif if( !includeintoscore && !lastsubopt ) continue; naln = prevnaln + 1; lastresx[name1][name2].naln = naln; // fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln ); if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" ); exit( 1 ); } else lastresx[name1][name2].aln = tmpaln; nreg = countcomma( gett )/2 + 1; lastresx[name1][name2].aln[prevnaln].nreg = nreg; // lastresx[name1][name2].aln[naln].nreg = -1; // lastresx[name1][name2].aln[naln].reg1 = NULL; // lastresx[name1][name2].aln[naln].reg2 = NULL; // fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln ); if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi { fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" ); exit( 1 ); } // lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai? // lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai? block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 ); if( includeintoscore ) { if( lastresx[name1][name2].score ) score += penalty; lastresx[name1][name2].score += score; } // fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score ); } free( gett ); } #ifdef enablemultithread #if 0 static void *lastcallthread_group( void *arg ) { lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg; int k, i; int nq = targ->nq; int nd = targ->nd; #ifdef enablemultithread int thread_no = targ->thread_no; int *kshare = targ->kshare; #endif Lastresx **lastresx = targ->lastresx; char **dseq = targ->dseq; char **qseq = targ->qseq; char command[5000]; FILE *lfp; int msize; int klim; int qstart, qend, shou, amari; char kd[1000]; if( nthread ) { shou = nq / nthread; amari = nq - shou * nthread; fprintf( stderr, "shou: %d, amari: %d\n", shou, amari ); qstart = thread_no * shou; if( thread_no - 1 < amari ) qstart += thread_no; else qstart += amari; qend = qstart + shou - 1; if( thread_no < amari ) qend += 1; fprintf( stderr, "%d: %d-%d\n", thread_no, qstart, qend ); } k = -1; while( 1 ) { if( nthread ) { if( qstart > qend ) break; if( k == thread_no ) break; fprintf( stderr, "\n%d-%d / %d (thread %d) \n", qstart, qend, nq, thread_no ); k = thread_no; } else { k++; if( k == nq ) break; fprintf( stderr, "\r%d / %d \r", k, nq ); } if( alg == 'R' ) // if 'r' -> calllast_fast { fprintf( stderr, "Not supported\n" ); exit( 1 ); } else // 'r' { kd[0] = 0; } sprintf( command, "_q%d", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open %s", command ); exit( 1 ); } for( i=qstart; i<=qend; i++ ) fprintf( lfp, ">%d\n%s\n", i, qseq[i] ); fclose( lfp ); // if( alg == 'R' ) msize = MAX(10,k+nq); // else msize = MAX(10,nd+nq); if( alg == 'R' ) msize = MAX(10,k*lastm); else msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k ); // sprintf( command, "grep '>' _db%sd", kd ); // system( command ); sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k ); if( system( command ) ) exit( 1 ); sprintf( command, "_lastres%d", k ); lfp = fopen( command, "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres%d", k ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); // fprintf( stderr, "Reading lastres\n" ); readlastresx_group( lfp, lastresx ); fclose( lfp ); } return( NULL ); } #endif #endif static void *lastcallthread( void *arg ) { lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg; int k, i; int nq = targ->nq; int nd = targ->nd; #ifdef enablemultithread int thread_no = targ->thread_no; int *kshare = targ->kshare; #endif Lastresx **lastresx = targ->lastresx; char **dseq = targ->dseq; char **qseq = targ->qseq; char command[5000]; FILE *lfp; int msize; int klim; char kd[1000]; k = -1; while( 1 ) { #ifdef enablemultithread if( nthread ) { pthread_mutex_lock( targ->mutex ); k = *kshare; if( k == nq ) { pthread_mutex_unlock( targ->mutex ); break; } fprintf( stderr, "\r%d / %d (thread %d) \r", k, nq, thread_no ); ++(*kshare); pthread_mutex_unlock( targ->mutex ); } else #endif { k++; if( k == nq ) break; fprintf( stderr, "\r%d / %d \r", k, nq ); } if( alg == 'R' ) // if 'r' -> calllast_fast { klim = MIN( k, njob-nadd ); // klim = k; // dochira demo yoi if( klim == k ) { sprintf( command, "_db%dd", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _db." ); exit( 1 ); } for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); // sprintf( command, "md5sum _db%dd > /dev/tty", k ); // system( command ); if( dorp == 'd' ) sprintf( command, "%s/lastdb _db%dd _db%dd", whereispairalign, k, k ); else sprintf( command, "%s/lastdb -p _db%dd _db%dd", whereispairalign, k, k ); system( command ); sprintf( kd, "%d", k ); } else // calllast_fast de tsukutta nowo riyou { kd[0] = 0; // fprintf( stderr, "klim=%d, njob=%d, nadd=%d, skip!\n", klim, njob, nadd ); } } else // 'r' { kd[0] = 0; } sprintf( command, "_q%d", k ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open %s", command ); exit( 1 ); } fprintf( lfp, ">%d\n%s\n", k, qseq[k] ); fclose( lfp ); // if( alg == 'R' ) msize = MAX(10,k+nq); // else msize = MAX(10,nd+nq); if( alg == 'R' ) msize = MAX(10,k*lastm); else msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k ); // sprintf( command, "grep '>' _db%sd", kd ); // system( command ); sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k ); if( system( command ) ) exit( 1 ); sprintf( command, "_lastres%d", k ); lfp = fopen( command, "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres%d", k ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); // fprintf( stderr, "Reading lastres\n" ); readlastresx_singleq( lfp, nd, k, lastresx ); fclose( lfp ); } return( NULL ); } static void calllast_fast( int nd, char **dseq, int nq, char **qseq, Lastresx **lastresx ) { int i, j; FILE *lfp; char command[1000]; lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } if( dorp == 'd' ) { fprintf( lfp, " " ); for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<4; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } } else { fprintf( lfp, " " ); for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<20; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } } fclose( lfp ); // if( alg == 'r' ) // if 'R' -> lastcallthread, kokonoha nadd>0 no toki nomi shiyou { sprintf( command, "_dbd" ); lfp = fopen( command, "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _dbd" ); exit( 1 ); } if( alg == 'R' ) j = njob-nadd; else j = nd; for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); if( dorp == 'd' ) sprintf( command, "%s/lastdb _dbd _dbd", whereispairalign ); else sprintf( command, "%s/lastdb -p _dbd _dbd", whereispairalign ); system( command ); } #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex; lastcallthread_arg_t *targ; int *ksharept; targ = (lastcallthread_arg_t *)calloc( nthread, sizeof( lastcallthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); ksharept = calloc( 1, sizeof(int) ); *ksharept = 0; pthread_mutex_init( &mutex, NULL ); for( i=0; i%d\n%s\n", i, dseq[i] ); fclose( lfp ); if( dorp == 'd' ) { sprintf( command, "%s/lastdb _db _db", whereispairalign ); system( command ); lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } fprintf( lfp, " " ); for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<4; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } fclose( lfp ); #if 0 sprintf( command, "lastex -s 2 -a %d -b %d -p _scoringmatrixforlast -E 10000 _db.prj _db.prj > _lastex", -penalty, -penalty_ex ); system( command ); lfp = fopen( "_lastex", "r" ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); fgets( command, 4999, lfp ); laste = atoi( command ); fclose( lfp ); fprintf( stderr, "laste = %d\n", laste ); sleep( 10 ); #else // laste = 5000; #endif } else { sprintf( command, "%s/lastdb -p _db _db", whereispairalign ); system( command ); lfp = fopen( "_scoringmatrixforlast", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _scoringmatrixforlast" ); exit( 1 ); } fprintf( lfp, " " ); for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] ); fprintf( lfp, "\n" ); for( i=0; i<20; i++ ) { fprintf( lfp, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] ); fprintf( lfp, "\n" ); } fclose( lfp ); // fprintf( stderr, "Not written yet\n" ); } lfp = fopen( "_q", "w" ); if( !lfp ) { fprintf( stderr, "Cannot open _q" ); exit( 1 ); } for( i=0; i%d\n%s\n", i, qseq[i] ); } fclose( lfp ); msize = MAX(10,nd*lastm); // fprintf( stderr, "Calling lastal from calllast_once, msize=%d\n", msize ); sprintf( command, "%s/lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", whereispairalign, msize, laste, -penalty, -penalty_ex ); // sprintf( command, "lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", 1, laste, -penalty, -penalty_ex ); // sprintf( command, "lastal -v -e 40 -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", -penalty, -penalty_ex ); res = system( command ); if( res ) { fprintf( stderr, "LAST aborted\n" ); exit( 1 ); } lfp = fopen( "_lastres", "r" ); if( !lfp ) { fprintf( stderr, "Cannot read _lastres" ); exit( 1 ); } // readlastres( lfp, nd, nq, lastres, dseq, qseq ); fprintf( stderr, "Reading lastres\n" ); readlastresx( lfp, nd, nq, lastresx, dseq, qseq ); fclose( lfp ); } static void callfoldalign( int nseq, char **mseq ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); res = system( com ); if( res ) { fprintf( stderr, "Error in foldalign\n" ); exit( 1 ); } } static void calllara( int nseq, char **mseq, char *laraarg ) { FILE *fp; int i; int res; static char com[10000]; // for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); // fprintf( stderr, "calling LaRA\n" ); sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); res = system( com ); if( res ) { fprintf( stderr, "Error in lara\n" ); exit( 1 ); } } static double recalllara( char **mseq1, char **mseq2, int alloclen ) { static FILE *fp = NULL; static char *ungap1; static char *ungap2; static char *ori1; static char *ori2; // int res; static char com[10000]; double value; if( fp == NULL ) { fp = fopen( "_laraout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _laraout\n" ); exit( 1 ); } ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); myfgets( com, 9999, fp ); strcpy( *mseq1, com ); myfgets( com, 9999, fp ); strcpy( *mseq2, com ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); t2u( ori1 ); t2u( ori2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); // fclose( fp ); // saigo dake yatta houga yoi. return( value ); } static double calldafs_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j ) { FILE *fp; int res; char *com; double value; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); sprintf( dirname, "_%d-%d", i, j ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); sprintf( com, "%s/_bpporg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname ); system( com ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); sprintf( com, "%s/_dafsinorg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_dafsinorg\n", dirname ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_dafsinorg > %s/_dafsin", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "_dafssh%s", dirname ); fp = fopen( com, "w" ); fprintf( fp, "cd %s\n", dirname ); fprintf( fp, "%s/dafs --mafft-in _bpp _dafsin > _dafsout 2>_dum\n", whereispairalign ); fprintf( fp, "exit $tatus\n" ); fclose( fp ); sprintf( com, "tr -d '\\r' < _dafssh%s > _dafssh%s.unix", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "sh _dafssh%s.unix 2>_dum%s", dirname, dirname ); res = system( com ); if( res ) { fprintf( stderr, "Error in dafs\n" ); exit( 1 ); } sprintf( com, "%s/_dafsout", dirname ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_dafsout\n", dirname ); exit( 1 ); } myfgets( com, 999, fp ); // nagai kanousei ga arunode fgets( com, 999, fp ); myfgets( com, 999, fp ); // nagai kanousei ga arunode fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); #if 0 sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); usleep( 2000 ); system( com ); } #endif free( dirname ); free( com ); return( value ); } static double callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j ) { FILE *fp; int res; char *com; double value; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); sprintf( dirname, "_%d-%d", i, j ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); sprintf( com, "%s/_bpporg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname ); system( com ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); sprintf( com, "%s/_mxscarnainorg", dirname ); fp = fopen( com, "w" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_mxscarnainorg\n", dirname ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); sprintf( com, "tr -d '\\r' < %s/_mxscarnainorg > %s/_mxscarnain", dirname, dirname ); system( com ); // for cygwin, wakaran #if 0 sprintf( com, "cd %s; %s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", dirname, whereispairalign ); #else sprintf( com, "_mxscarnash%s", dirname ); fp = fopen( com, "w" ); fprintf( fp, "cd %s\n", dirname ); fprintf( fp, "%s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum\n", whereispairalign ); fprintf( fp, "exit $tatus\n" ); fclose( fp ); //sleep( 10000 ); sprintf( com, "tr -d '\\r' < _mxscarnash%s > _mxscarnash%s.unix", dirname, dirname ); system( com ); // for cygwin, wakaran sprintf( com, "sh _mxscarnash%s.unix 2>_dum%s", dirname, dirname ); #endif res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } sprintf( com, "%s/_mxscarnaout", dirname ); fp = fopen( com, "r" ); if( !fp ) { fprintf( stderr, "Cannot open %s/_mxscarnaout\n", dirname ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); #if 0 sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); usleep( 2000 ); system( com ); } #endif free( dirname ); free( com ); return( value ); } static void readhat4( FILE *fp, char ***bpp ) { char oneline[1000]; int bppsize; int onechar; // double prob; // int posi, posj; bppsize = 0; // fprintf( stderr, "reading hat4\n" ); onechar = getc(fp); // fprintf( stderr, "onechar = %c\n", onechar ); if( onechar != '>' ) { fprintf( stderr, "Format error\n" ); exit( 1 ); } ungetc( onechar, fp ); fgets( oneline, 999, fp ); while( 1 ) { onechar = getc(fp); ungetc( onechar, fp ); if( onechar == '>' || onechar == EOF ) { // fprintf( stderr, "Next\n" ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = NULL; break; } fgets( oneline, 999, fp ); // fprintf( stderr, "oneline=%s\n", oneline ); // sscanf( oneline, "%d %d %lf", &posi, &posj, &prob ); // fprintf( stderr, "%d %d -> %f\n", posi, posj, prob ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = calloc( 100, sizeof( char ) ); strcpy( (*bpp)[bppsize], oneline ); bppsize++; } } static void preparebpp( int nseq, char ***bpp ) { FILE *fp; int i; fp = fopen( "hat4", "r" ); if( !fp ) { fprintf( stderr, "Cannot open hat4\n" ); exit( 1 ); } for( i=0; i 0 && (*++argv)[0] == '-' ) { // reporterr( "(*argv)[0] in while loop = %s\n", (*argv) ); while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; // fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; // fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; // fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; #if 0 case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; #else case 'l': if( atof( *++argv ) < 0.00001 ) store_localhom = 0; --argc; goto nextoption; #endif case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'p': laraparams = *++argv; fprintf( stderr, "laraparams = %s\n", laraparams ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); // fprintf( stderr, "nthread = %d\n", nthread ); --argc; #ifndef enablemultithread nthread = 0; #endif goto nextoption; case 'I': nadd = myatoi( *++argv ); // fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'w': lastm = myatoi( *++argv ); fprintf( stderr, "lastm = %d\n", lastm ); --argc; goto nextoption; case 'e': laste = myatoi( *++argv ); fprintf( stderr, "laste = %d\n", laste ); --argc; goto nextoption; case 'u': specificityconsideration = (double)myatof( *++argv ); // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame. break; case 'c': stdout_dist = 1; break; case 'n': stdout_align = 1; break; case 'x': store_localhom = 0; store_dist = 0; break; #if 1 case 'a': fmodel = 1; break; #endif #if 0 case 'r': fmodel = -1; break; #endif case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 0 case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; #endif #if 0 case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; case 'a': alg = 'a'; break; case 'S': alg = 'S'; break; #endif case 'U': lastonce = 1; break; case 'S': lastsubopt = 1; break; case 't': alg = 't'; store_localhom = 0; break; case 'L': alg = 'L'; break; case 'Y': alg = 'Y'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> L; break; case 'Z': usenaivescoreinsteadofalignmentscore = 1; break; case 's': alg = 's'; break; case 'G': alg = 'G'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'r': alg = 'r'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> R, last break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; case '=': specifictarget = 1; break; case ':': nwildcard = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'o': // foldalignopt = *++argv; strcat( foldalignopt, " " ); strcat( foldalignopt, *++argv ); fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); --argc; goto nextoption; #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; #endif default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "pairlocalalign options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static double score2dist( double pscore, double selfscore1, double selfscore2) { double val; double bunbo; // fprintf( stderr, "In score2dist\n" ); if( (bunbo=MIN( selfscore1, selfscore2 )) == 0.0 ) val = 2.0; else if( bunbo < pscore ) // mondai ari val = 0.0; else val = ( 1.0 - pscore / bunbo ) * 2.0; return( val ); } #if enablemultithread static void *athread( void *arg ) // alg='R', alg='r' -> tsukawarenai. { thread_arg_t *targ = (thread_arg_t *)arg; int i, ilim, j, jst; int off1, off2, dum1, dum2, thereisx; int intdum; double pscore = 0.0; // by D.Mathog double *effarr1; double *effarr2; char **mseq1, **mseq2, **distseq1, **distseq2, **dumseq1, **dumseq2; char **aseq; double **dynamicmtx = NULL; double dist; double scoreoffset; // thread_arg int thread_no = targ->thread_no; int njob = targ->njob; Jobtable *jobpospt = targ->jobpospt; char **name = targ->name; char **seq = targ->seq; char **dseq = targ->dseq; int *thereisxineachseq = targ->thereisxineachseq; LocalHom **localhomtable = targ->localhomtable; double **distancemtx = targ->distancemtx; double *selfscore = targ->selfscore; char ***bpp = targ->bpp; Lastresx **lastresx = targ->lastresx; int alloclen = targ->alloclen; int *targetmap = targ->targetmap; double **expdist = targ->expdist; // fprintf( stderr, "thread %d start!\n", thread_no ); effarr1 = AllocateDoubleVec( 1 ); effarr2 = AllocateDoubleVec( 1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); if( alg == 'N' ) { dumseq1 = AllocateCharMtx( 1, alloclen+10 ); dumseq2 = AllocateCharMtx( 1, alloclen+10 ); } distseq1 = AllocateCharMtx( 1, 0 ); distseq2 = AllocateCharMtx( 1, 0 ); aseq = AllocateCharMtx( 2, alloclen+10 ); if( specificityconsideration > 0.0 ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd; else ilim = njob - 1; while( 1 ) { pthread_mutex_lock( targ->mutex_counter ); j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; if( alg == 'Y' || alg == 'r' ) jst = njob - nadd; else jst = i + 1; j = jst; if( i == ilim ) { // fprintf( stderr, "thread %d end!\n", thread_no ); pthread_mutex_unlock( targ->mutex_counter ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; if( commonJP ) FreeIntMtx( commonJP ); commonJP = NULL; Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // 20130603 G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 ); L__align11( NULL, 0.0, NULL, NULL, 0, NULL, NULL ); L__align11_noalign( NULL, NULL, NULL ); genL__align11( NULL, NULL, NULL, 0, NULL, NULL ); free( effarr1 ); free( effarr2 ); free( mseq1 ); free( mseq2 ); if( alg == 'N' ) { FreeCharMtx( dumseq1 ); FreeCharMtx( dumseq2 ); } free( distseq1 ); free( distseq2 ); FreeCharMtx( aseq ); if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); return( NULL ); } } jobpospt->j = j; jobpospt->i = i; pthread_mutex_unlock( targ->mutex_counter ); // if( j == i+1 || j % 100 == 0 ) if( j == i+1 && i % 10 == 0 ) { fprintf( stderr, "% 5d / %d (by thread %3d) \r", i, njob-nadd, thread_no ); // fprintf( stderr, "% 5d - %5d / %d (thread %d)\n", i, j, njob, thread_no ); } if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 ) { if( store_dist ) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = 3.0; else distancemtx[i][j-i] = 3.0; } if( stdout_dist) { pthread_mutex_lock( targ->mutex_stdout ); fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 3.0 ); pthread_mutex_unlock( targ->mutex_stdout ); } continue; } strcpy( aseq[0], seq[i] ); strcpy( aseq[1], seq[j] ); // clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 ); // clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 ); // fprintf( stderr, "Skipping conjuction..\n" ); effarr1[0] = 1.0; effarr2[0] = 1.0; mseq1[0] = aseq[0]; mseq2[0] = aseq[1]; thereisx = thereisxineachseq[i] + thereisxineachseq[j]; // strcpy( distseq1[0], dseq[i] ); // nen no tame // strcpy( distseq2[0], dseq[j] ); // nen no tame distseq1[0] = dseq[i]; distseq2[0] = dseq[j]; // fprintf( stderr, "mseq1 = %s\n", mseq1[0] ); // fprintf( stderr, "mseq2 = %s\n", mseq2[0] ); #if 0 fprintf( stderr, "group1 = %.66s", indication1 ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); fprintf( stderr, "\n" ); #endif // for( l=0; l 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( ( scoreoffset = dist2offset( dist ) ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); L__align11( dynamicmtx, scoreoffset, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); } } // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // CHUUI!!!!!! break; case( 'Y' ): if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) // new sequence vs exiting sequence nomi keisan { if( usenaivescoreinsteadofalignmentscore ) { L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { if( store_localhom ) { pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); } } else pscore = 0.0; break; case( 'A' ): if( usenaivescoreinsteadofalignmentscore ) { G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { // if( store_localhom ) if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) ) { pscore = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap ); if( thereisx ) pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki #if 1 if( specificityconsideration > 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); // dist = score2dist( L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ), selfscore[i], selfscore[j] ); // 2014/Feb/20 if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); G__align11( dynamicmtx, mseq1, mseq2, alloclen, outgap, outgap ); } // pscore = (double)naivepairscore11( *mseq1, *mseq2, 0.0 ); } #endif } else pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki } off1 = off2 = 0; break; case( 'N' ): if( usenaivescoreinsteadofalignmentscore ) { genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen ); pscore = genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) { strcpy( dumseq1[0], distseq1[0] ); strcpy( dumseq2[0], distseq2[0] ); pscore = genL__align11( n_dis_consweight_multi, dumseq1, dumseq2, alloclen, &dum1, &dum2 ); // uwagaki } #if 1 if( specificityconsideration > 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); genL__align11( dynamicmtx, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } break; case( 't' ): off1 = off2 = 0; // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen ); pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // tsuneni distseq shiyou break; case( 's' ): pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; case( 'G' ): pscore = calldafs_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; #if 0 case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen ); off1 = off2 = 0; break; case( 'K' ): pscore = genG__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen ); off1 = off2 = 0; break; case( 'H' ): pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen ); break; case( 'B' ): case( 'T' ): pscore = recalllara( mseq1, mseq2, alloclen ); off1 = off2 = 0; break; case( 'M' ): pscore = MSalign11( mseq1, mseq2, alloclen ); break; #endif default: ErrorExit( "\n\nERROR IN SOURCE FILE\n\n" ); } } if( alg == 't' || ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) // 't' no jouken ha iranai to omou. if( ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) { #if SCOREOUT fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j ); #endif // if( pscore > 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) x-ins-i de seido teika if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) { if( !store_localhom ) ; else if( specifictarget && targetmap[i] == -1 && targetmap[j] == -1) ; else if( alg == 'R' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j, 'h' ); else if( alg == 'r' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd), 'h' );// ????? else if( alg == 'H' ) putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( alg == 'Y' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( !specifictarget && alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-i, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else // putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); { if( targetmap[i] != -1 && targetmap[j] != -1 ) { putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); // sukoshi muda. } else if( targetmap[j] != -1 ) putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); else if( targetmap[i] != -1 ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); #if 0 if( targetmap[i] != -1 ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( targetmap[j] != -1 ) putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); #endif else { reporterr( "okashii\n" ); exit( 1 ); } } } pscore = score2dist( pscore, selfscore[i], selfscore[j] ); // pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // pscore = score2dist( pscore, selfscore[i], selfscore[j] ); // reporterr( "->pscore = %f\n", pscore ); } else { pscore = 2.0; } #if 1 // mutex if( stdout_align ) { pthread_mutex_lock( targ->mutex_stdout ); if( alg != 't' ) { fprintf( stdout, "sequence %d - sequence %d, pairwise distance = %10.5f\n", i+1, j+1, pscore ); fprintf( stdout, ">%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } pthread_mutex_unlock( targ->mutex_stdout ); } if( stdout_dist ) { pthread_mutex_lock( targ->mutex_stdout ); if( j == i+1 ) fprintf( stdout, "%d %d d=%.3f\n", i+1, i+1, 0.0 ); fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore ); pthread_mutex_unlock( targ->mutex_stdout ); } #endif // mutex if( store_dist ) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore; else distancemtx[i][j-i] = pscore; } } } #endif static void pairalign( char **name, int *nlen, char **seq, char **aseq, char **dseq, int *thereisxineachseq, char **mseq1, char **mseq2, int alloclen, Lastresx **lastresx, double **distancemtx, LocalHom **localhomtable, double **expdist, int ngui ) { int i, j, ilim, jst, jj; int off1, off2, dum1, dum2, thereisx; double pscore = 0.0; // by D.Mathog FILE *hat2p, *hat3p; // double **distancemtx; double *selfscore; double *effarr1; double *effarr2; char *pt; char *hat2file = "hat2"; // LocalHom **localhomtable = NULL, LocalHom *tmpptr; int intdum; char ***bpp = NULL; // mxscarna no toki dake char **distseq1, **distseq2; char **dumseq1, **dumseq2; double dist; double scoreoffset; int ntarget; int *targetmap, *targetmapr; if( specifictarget ) { targetmap = calloc( njob, sizeof( int ) ); ntarget = 0; for( i=0; i_focus_' to the title lines of the sequences to be focused on.\n\n" ); exit( 1 ); } else { reporterr( "nfocus = %d \n", ntarget ); } } else { ntarget = njob; targetmap = calloc( njob, sizeof( int ) ); targetmapr = calloc( njob, sizeof( int ) ); for( i=0; i 0 ) // alg=='r' || alg=='R' -> nthread:=0 (sukoshi ue) { Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex_counter; pthread_mutex_t mutex_stdout; thread_arg_t *targ; if( alg == 'Y' || alg == 'r' ) jobpos.j = njob - nadd - 1; else jobpos.j = 0; jobpos.i = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); pthread_mutex_init( &mutex_stdout, NULL ); for( i=0; i 0.0 ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd; else ilim = njob - 1; for( i=0; i 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); // dist = score2dist( L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ), selfscore[i], selfscore[j] ); // 2014/Feb/20 if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); G__align11( dynamicmtx, mseq1, mseq2, alloclen, outgap, outgap ); } // pscore = (double)naivepairscore11( *mseq1, *mseq2, 0.0 ); } #endif } else pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki } off1 = off2 = 0; break; case( 'N' ): // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen ); if( usenaivescoreinsteadofalignmentscore ) { genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { pscore = genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) { strcpy( dumseq1[0], distseq1[0] ); strcpy( dumseq2[0], distseq2[0] ); pscore = genL__align11( n_dis_consweight_multi, dumseq1, dumseq2, alloclen, &dum1, &dum2 ); // uwagaki } #if 1 if( specificityconsideration > 0.0 ) { // fprintf( stderr, "dist = %f\n", score2dist( pscore, selfscore[i], selfscore[j] ) ); if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( dist2offset( dist ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); genL__align11( dynamicmtx, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } break; case( 'R' ): if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi pscore = 0.0; else pscore = (double)lastresx[i][j].score; // all pair break; case( 'r' ): if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) pscore = (double)lastresx[i][j-(njob-nadd)].score; else pscore = 0.0; break; case( 'L' ): if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi pscore = 0.0; else { if( usenaivescoreinsteadofalignmentscore ) { L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { // if( store_localhom ) if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) ) { pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); // all pair if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // all pair #if 1 if( specificityconsideration > 0.0 ) { if( expdist ) dist = expdist[i][j]; else dist = score2dist( pscore, selfscore[i], selfscore[j] ); if( ( scoreoffset = dist2offset( dist ) ) < 0.0 ) { makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru. strcpy( mseq1[0], seq[i] ); strcpy( mseq2[0], seq[j] ); L__align11( dynamicmtx, scoreoffset, mseq1, mseq2, alloclen, &off1, &off2 ); } } #endif } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // all pair } } // pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // CHUUI!!!!!! break; case( 'Y' ): if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) // new sequence vs exiting sequence nomi keisan { if( usenaivescoreinsteadofalignmentscore ) { L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki } else { if( store_localhom ) { pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki } else pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); } } else pscore = 0.0; break; case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, 1, 1, alloclen ); off1 = off2 = 0; break; #if 0 case( 'K' ): pscore = genG__align11( mseq1, mseq2, alloclen ); off1 = off2 = 0; break; #endif case( 'H' ): pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen ); break; case( 'B' ): case( 'T' ): pscore = recalllara( mseq1, mseq2, alloclen ); off1 = off2 = 0; break; case( 's' ): pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; case( 'G' ): pscore = calldafs_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j ); off1 = off2 = 0; break; case( 'M' ): pscore = MSalign11( mseq1, mseq2, alloclen ); break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } if( alg == 't' || ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) // 't' no jouken ha iranai to omou. if( ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) { #if SCOREOUT fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j ); #endif // if( pscore > 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) // x-ins-i de seido teika if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) { if( !store_localhom ) ; else if( specifictarget && targetmap[i] == -1 && targetmap[j] == -1) ; else if( alg == 'R' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j, 'h' ); else if( alg == 'r' ) putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd), 'h' );// ????? else if( alg == 'H' ) putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( alg == 'Y' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else if( !specifictarget && alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-i, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else { if( targetmap[i] != -1 && targetmap[j] != -1 ) { putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); // sukoshi muda. } else if( targetmap[j] != -1 ) putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); else if( targetmap[i] != -1 ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); else { reporterr( "okashii\n" ); exit( 1 ); } } } pscore = score2dist( pscore, selfscore[i], selfscore[j] ); } else { pscore = 2.0; } if( stdout_align ) { if( alg != 't' ) { fprintf( stdout, "sequence %d - sequence %d, pairwise distance = %10.5f\n", i+1, j+1, pscore ); fprintf( stdout, ">%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } } if( stdout_dist ) fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore ); if( store_dist) { if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore; else distancemtx[i][j-i] = pscore; } } } if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); } if( store_dist && ngui == 0 ) { hat2p = fopen( hat2file, "w" ); if( !hat2p ) ErrorExit( "Cannot open hat2." ); if( alg == 'Y' || alg == 'r' ) WriteHat2_part_pointer( hat2p, njob, nadd, name, distancemtx ); else // WriteHat2_pointer( hat2p, njob, name, distancemtx ); WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, distancemtx ); // jissiha double fclose( hat2p ); } hat3p = fopen( "hat3", "w" ); if( !hat3p ) ErrorExit( "Cannot open hat3." ); if( store_localhom && ngui == 0 ) { fprintf( stderr, "\n\n##### writing hat3\n" ); if( alg == 'Y' || alg == 'r' ) ilim = njob-nadd; else if( specifictarget ) ilim = ntarget; else ilim = njob-1; for( i=0; inext ) { // fprintf( stderr, "j=%d, jj=%d\n", j, jj ); if( tmpptr->opt == -1.0 ) continue; // tmptmptmptmptmp // if( alg == 'B' || alg == 'T' ) // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); // else if( targetmap[j] == -1 || targetmap[i] < targetmap[j] ) fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", targetmapr[i], j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2+1, tmpptr->end2+1 ); // zettai dame!!!! } } } // if( ngui == 0 ) // { #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif if( alg == 'Y' || alg == 'r' ) FreeLocalHomTable_part( localhomtable, (njob-nadd), nadd ); else if( specifictarget ) FreeLocalHomTable_part( localhomtable, ntarget, njob ); else FreeLocalHomTable_half( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif // } } fclose( hat3p ); if( alg == 's' ) { char **ptpt; for( i=0; i M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } } if( ( alg == 'r' || alg == 'R' ) && dorp == 'p' ) { fprintf( stderr, "Not yet supported\n" ); exit( 1 ); } alloclen = nlenmax*2; if( ngui ) { seq = seqgui; name = namegui; } else { seq = AllocateCharMtx( njob, alloclen+10 ); name = AllocateCharMtx( njob, B ); } aseq = AllocateCharMtx( 2, alloclen+10 ); bseq = AllocateCharMtx( njob, alloclen+10 ); dseq = AllocateCharMtx( njob, alloclen+10 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); nlen = AllocateIntVec( njob ); thereisxineachseq = AllocateIntVec( njob ); if( alg == 'R' ) { lastresx = calloc( njob+1, sizeof( Lastresx * ) ); for( i=0; i 1 ) { fprintf( stderr, "\nThe order of distances is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself, using sort -n -k 2 | sort -n -k 1 -s\n" ); } if( stdout_align && nthread > 1 ) { fprintf( stderr, "\nThe order of pairwise alignments is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself.\n" ); } #if 1 if( lastresx ) { for( i=0; lastresx[i]; i++ ) { for( j=0; lastresx[i][j].naln!=-1; j++ ) { for( k=0; k&2\n sleep 2\n done\n" ); // reporterr( "before bcast\n" ); // system( "hostname 1>&2" ); MPI_Barrier(MPI_COMM_WORLD); // reporterr( "after bcast\n" ); // system( "hostname 1>&2" ); #endif if(my_rank==0){ fn = calloc( 100, sizeof( char ) ); // system( "rm -rf hat3dir/" ); // toriaezu // system( "mkdir -p hat3dir/" ); // toriaezu for( i=0; i&2" ); } #if 0 // -> mltalln9_mpi.c // synchronize all processes to make sure hat3dir/ is cached in all nodes MPI_Barrier(MPI_COMM_WORLD); // system( "echo 3 > /proc/sys/vm/drop_caches 1>&2" ); // only for root // system( "ls -ltrR hat3dir/ 1>&2" ); // system( "hostname 1>&2" ); fn = calloc( 100, sizeof( char ) ); for( i=0; i0; i-- ) { if( alignmentlength != strlen( seq[i] ) ) { fprintf( stderr, "#################################################################################\n" ); fprintf( stderr, "# ERROR! \n" ); fprintf( stderr, "# For the --add option, the original%4d sequences must be aligned \n", njob-nadd ); fprintf( stderr, "#################################################################################\n" ); exit( 1 ); } } } if( specifictarget ) { reporterr( "specifictarget\n" ); ntarget = 0; for( i=0; i_focus_' to the title lines of the sequences to be focused on.\n\n" ); exit( 1 ); } else { reporterr( "nfocus = %d \n", ntarget ); } } else { ntarget = njob; // targetmap = calloc( njob, sizeof( int ) ); // targetmapr = calloc( njob, sizeof( int ) ); // for( i=0; i&2" 0 15 if [ ! $MAFFT_N_THREADS_PER_PROCESS ]; then echo "set MAFFT_N_THREADS_PER_PROCESS, the number of cores to be used by a process." 1>&2 exit 1 fi if [ ! "$MAFFT_MPIRUN" ]; then MAFFT_MPIRUN="mpirun" fi command="$* -C $MAFFT_N_THREADS_PER_PROCESS" #rm -rf hat3dir # To avoid cache, don't touch hat3dir here $MAFFT_MPIRUN $command 1>&2 sync; if [ -s hat3dir/tree ]; then #if [ -s infile.tree ]; then val=0 else echo "error in MPI" 1>&2 val=1 fi trap - 0 15 exit $val mafft-7.505-without-extensions/license0000644000175000017500000000334313066343607017433 0ustar nileshnilesh============================================================ If you have the './extensions' directory, please also see license.extensions file. ============================================================ The codes in the './core' directory is distributed with the BSD license. MAFFT: multiple sequence alignment program Copyright (c) 2009 Kazutaka Katoh Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- mafft-7.505-without-extensions/readme0000644000175000017500000000775214224477543017262 0ustar nileshnilesh----------------------------------------------------------------------- MAFFT: a multiple sequence alignment program version 7.505, 2022/Apr/10 http://mafft.cbrc.jp/alignment/software/ katoh@ifrec.osaka-u.ac.jp ----------------------------------------------------------------------- 1. COMPILE % cd core % make clean % make % cd .. If you have the './extensions' directory, which is for RNA alignments, % cd extensions % make clean % make % cd .. 2. INSTALL (select 2a or 2b) 2a. Install to /usr/local/ using root account # cd core # make install # cd .. If you have the './extensions' directory, # cd extensions # make install # cd .. By this procedure (2a), programs are installed into /usr/local/bin/. Some binaries, which are not directly used by a user, are installed into /usr/local/libexec/mafft/. If the MAFFT_BINARIES environment variable is set to /somewhare/else/, the binaries in the /somewhere/else/ directory are used, instead of those in /usr/local/libexec/mafft/. 2b. Install to non-default location (root account is not necessary) % cd core/ Edit the first line of Makefile From: PREFIX = /usr/local To: PREFIX = /home/your_home/somewhere Edit the third line of Makefile From: BINDIR = $(PREFIX)/bin To: BINDIR = /home/your_home/bin (or elsewhere in your command-search path) % make clean % make % make install If you have the './extensions' directory, % cd ../extensions/ Edit the first line of Makefile From: PREFIX = /usr/local To: PREFIX = /home/your_home/somewhere % make clean % make % make install The MAFFT_BINARIES environment variable *must not be* set. If the MAFFT_BINARIES environment variable is set to /somewhare/else/, it overrides the setting of PREFIX (/home/your_home/somewhere/ in the above example) in Makefile. 3. CHECK % cd test % rehash # if necessary % mafft sample > test.fftns2 # FFT-NS-2 % mafft --maxiterate 100 sample > test.fftnsi # FFT-NS-i % mafft --globalpair sample > test.gins1 # G-INS-1 % mafft --globalpair --maxiterate 100 sample > test.ginsi # G-INS-i % mafft --localpair sample > test.lins1 # L-INS-1 % mafft --localpair --maxiterate 100 sample > test.linsi # L-INS-i % diff test.fftns2 sample.fftns2 % diff test.fftnsi sample.fftnsi % diff test.gins1 sample.gins1 % diff test.ginsi sample.ginsi % diff test.lins1 sample.lins1 If you have the './extensions' directory, % mafft-qinsi samplerna > test.qinsi # Q-INS-i % mafft-xinsi samplerna > test.xinsi # X-INS-i % diff test.qinsi samplerna.qinsi % diff test.xinsi samplerna.xinsi If you use the multithread version, the results of iterative refinement methods (*-*-i) are not always identical. Try this test with the single- thread mode (--thread 0). 4. INPUT FORMAT fasta format. The type of input sequences (nucleotide or amino acid) is automatically recognized based on the frequency of A, T, G, C, U and N. 5. USAGE % /usr/local/bin/mafft input > output See also http://mafft.cbrc.jp/alignment/software/ 6. UNINSTALL # rm -r /usr/local/libexec/mafft # rm /usr/local/bin/mafft # rm /usr/local/bin/fftns # rm /usr/local/bin/fftnsi # rm /usr/local/bin/nwns # rm /usr/local/bin/nwnsi # rm /usr/local/bin/linsi # rm /usr/local/bin/ginsi # rm /usr/local/bin/mafft-* # rm /usr/local/share/man/man1/mafft* 7. LICENSE See the './license' file. If you have the extensions, see also the './license.extensions' file, mafft-7.505-without-extensions/binaries/0000755000175000017500000000000014224477667017672 5ustar nileshnileshmafft-7.505-without-extensions/binaries/.gitkeep0000644000175000017500000000000013146214456021274 0ustar nileshnileshmafft-7.505-without-extensions/test/0000755000175000017500000000000014200672476017042 5ustar nileshnileshmafft-7.505-without-extensions/test/sample.parttree0000644000175000017500000007264014224501721022072 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MN-------------------GTE------GDNFYVPF----------------SNKTGL -----------ARSPYEYPQY-----YLAEPWK--------------------Y------ ------------------SALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNL AMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIER YIVICKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSC GPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAAQQ------------------------------------ESASTQKAEKEVTRM VVLMVIGFLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMN KQFRNCMITTLC----C---GKNPLGD-DE--SGASTSKTEV------------------ ------------------------------------------------------------ ---SSVS-------TSPVSP-A---------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MN-------------------GTE------GPNFYVPF----------------SNITGV -----------VRSPFEQPQY-----YLAEPWQ--------------------F------ ------------------SMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNL AVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIER YVVVCKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSC GIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------ESATTQKAEKEVTRM VIIMVIFFLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMN KQFRNCMLTSLC----C---GKNPLGD-DE--ASATASKTE------------------- ------------------------------------------------------------ --------------TSQVAP-A---------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MN-------------------GTE------GINFYVPM----------------SNKTGV -----------VRSPFEYPQY-----YLAEPWK--------------------Y------ ------------------RLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNL AVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIER YIVVCKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSC GPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------ESATTQKAEKEVTRM VILMVLGFMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMN KQFRNCMITTIC----C---GKNPFGD-EDVSSTVSQSKTEV------------------ ------------------------------------------------------------ ---SSVS-------SSQVSP-A---------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MN-------------------GTE------GKNFYVPM----------------SNRTGL -----------VRSPFEYPQY-----YLAEPWQ--------------------F------ ------------------KILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNL AVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIER YIVVCKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSC GPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------DSASTQKAEREVTKM VILMVFGFLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLN KQFRNCMLTTIF----C---GKNPLGD-DE-SSTVSTSKTEV------------------ ------------------------------------------------------------ ---SS------------VSP-A---------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MN-------------------GTE------GNNFYVPL----------------SNRTGL -----------VRSPFEYPQY-----YLAEPWQ--------------------F------ ------------------KLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNL AVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIER YIVVCKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSC GPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------DSASTQKAEREVTKM VILMVLGFLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLN KQFRSCMLTTLF----C---GKNPLGD-EE-SSTVSTSKTEV------------------ ------------------------------------------------------------ ---SS------------VSP-A---------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MK-------------------QVPE----FHEDFYIPIPL------------DINNLSAY -------------SPFLVPQD-----HLGNQGI--------------------F------ ------------------MAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNL SIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFER WLVICKPLGN-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSC GPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAKAQA------------------------------------DSASTQKAEREVTKM VVVMVLGFLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMN KQFRSCMM-KMV----C---GKN-IEE-DE--ASTSSQVTQV------------------ ------------------------------------------------------------ ---SS------------VAPEK---------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MR-------------------KMS------EEEFYL-----------------FKNISSV -------------GPWDGPQY-----HIAPVWA--------------------F------ ------------------YLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNV SFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFER YIVICKPFGN-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSC GPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAAQQQ------------------------------------ESATTQKAEREVSRM VVVMVGSFCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMN KQFQACIM-KMV----C---GKA-MTD-ES--DTCSSQKTEV------------------ ------------------------------------------------------------ ---STVS-------STQVGP-N---------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ---DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER WLVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAKQQK------------------------------------ESESTQKAEKEVTRM VVVMVLAFC--------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------HLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNL AVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER WMVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAKQQK------------------------------------ESESTQKAEKEVTRM VVVMVLAFCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMN RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------ ------------------------------------------------------------ ---SSV---------SSVSP-A---------- > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------HLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNL AVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER WLVVCKPFGN-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAKQQK------------------------------------ESESTQKAEKEVTRM VVVMIFAYCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMN RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------ ------------------------------------------------------------ ---SSV---------SSVSP-A---------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD-DDTTR------GSVFTYTN----------------TNNT-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------NLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNL AFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWER WFVVCKPFGN-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAAQQK------------------------------------ESESTQKAEREVSRM VVVMIVAFCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMN RQFRNCIMQLF---------GKK-VDD-GS--EASTTSRTEV------------------ ------------------------------------------------------------ ---SSVS-------NSSVAP-A---------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWEAAFAARRRHEE--EDTTR------DSVFTYTN----------------SNNT-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------NLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNL AVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWER WFVVCKPFGN-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAAQQK------------------------------------ESESTQKAEKEVSRM VVVMIVAYCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMN RQFRNCILQLF---------GKK-VDD-GS--EVST-SRTEV------------------ ------------------------------------------------------------ ---SSVS-------NSSVSP-A---------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS-----------------SNSSQ------AP----------------------PNGT-- ------------PGPFDGPQW----PYQAPQST--------------------Y------ ------------------VGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNL AVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALER YVVVCKPLGD-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSC GPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQK------------------------------------EADTTQRAEREVTRM VIVMVMAFLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMN KQFQSCLLEMLC----CGYQPQR-TGK-AS--PGTPGPHADV------------------ ------------------------------------------------------------ ---TAAG------LRNKVMP-AH-------PV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNV---------------SSSLFGNVST-ALRPEARLSA---ETRLLGW-------- ------------NVPPEELR------HIPEHWL-----------TYPEPPESMN------ ------------------YLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINL AFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDR FNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSC TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LRDQAKK----------MNVESL----------------RSNVDKNKETAEIRIAKA AITICFLFFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISH PRYRMELQKRCP----WLALNEKAPE--SS-AVASTSTTQEP------------------ ------------------------------------------------------------ ---------------QQTTA-A---------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNV---------------SSVL-GNVSS-VLRPDARLSA---ESRLLGW-------- ------------NVPPDELR------HIPEHWL-----------IYPEPPESMN------ ------------------YLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINL AFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDR YNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSC TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LRDQAKK----------MNVDSL----------------RSNVDKSKEAAEIRIAKA AITICFLFFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISH PRYRMELQKRCP----WLAISEKAPE--SR-AAISTSTTQEQ------------------ ------------------------------------------------------------ ---------------QQTTA-A---------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME---------------------PL-CNASEPPLRPEAR-SSGNGDLQFLGW-------- ------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------ ------------------YMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNL AVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDR YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC SFDYLS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISH PRYRLELQKRCP----WLGVNEKSGE--IS-SAQST-TTQEQ------------------ ------------------------------------------------------------ ---------------QQTTA-A---------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD---------------------AL-CNASEPPLRPEARMSSGSDELQFLGW-------- ------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------ ------------------YMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNL AVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDR YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC SFDYLS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISH PRYRMELQKRCP----WLGVNEKSGE--AS-SAQST-TTQEQ------------------ ------------------------------------------------------------ --------------TQQTSA-A---------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT-------------------------NATGPQMAYYGAASMD------FGYP-EGVSIV -----------DFVRPEIKP------YVHQHWY-----------NYPPVNPMWH------ ------------------YLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNL ALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDR YNIICNGFNG-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSC SYDYLT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MRAQAKK----------MNVSTL----------------RS-NEADAQRAEIRIAKT ALVNVSLWFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISH PKYRLAITQHLP----WFCVHETETKS-ND-DSQSNSTVAQ------------------- ------------------------------------------------------------ ------------------DK-A---------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA-------------------------NVTGPQMAFYGSGAAT------FGYP-EGMTVA -----------DFVPDRVKH------MVLDHWY-----------NYPPVNPMWH------ ------------------YLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNL ALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDR YNIICNGFNG-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSC SYDYFT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MRAQAKK----------MNVTNL----------------RS-NEAETQRAEIRIAKT ALVNVSLWFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISH PKFRLAITQHLP----WFCVHEKDPND-VE-ENQSSNTQTQ------------------- ------------------------------------------------------------ ------------------EK-S---------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV -----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------ ------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR YQVIVKGMAG-RPMTIPLALGKM---------------------------YVPEGNLTSC GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------ ------------------------------------------------------------ ---E--------------SK-A---------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV -----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------ ------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR YQVIVKGMAG-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSC GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------ ------------------------------------------------------------ ---E--------------SK-A---------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MD-------------------SFAAVATQLGPQFAAPS-----------------NGSVV -----------DKVTPDMAH------LISPYWD-----------QFPAMDPIWA------ ------------------KILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINL AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR YQVIVKGMAG-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSC GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDADKSAEGKLAKV ALVTISLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH PKYRLALKEKCP----CCVFGKVDDGK-SS-EAQSQATTSEA------------------ ------------------------------------------------------------ ---E--------------SK-A---------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSH--------------LPETPFDLAHSGPRFQAQSSG---------------NGSVL -----------DNVLPDMAH------LVNPYWS-----------RFAPMDPMMS------ ------------------KILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNL AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDR YNVIVKGING-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTAC SIDYMT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDCDKSAEGKLAKV ALTTISLWFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH PKYRIVLKEKCP----MCVFGNTDEPKPDA-PASDTETTSEA------------------ ------------------------------------------------------------ ---D--------------SK-A---------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSL--------------LPEPPLAMALLGPRFEAQTGG---------------NRSVL -----------DNVLPDMAP------LVNPHWS-----------RFAPMDPTMS------ ------------------KILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNL AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDR YNVIVKGING-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTAC SIDYMT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MRDQAKK----------MNVKSL----------------RS-SEDCDKSAENKLAKV ALTTISLWFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH PNDRLVLKEKCP----MCVCGTTDEPKPDA-PPSDTETTSEA------------------ ------------------------------------------------------------ ---E--------------SK-D---------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI-------------------------AVSGPSYEAFSYGGQA-----RF----NNQTVV -----------DKVPPDMLH------LIDANWY-----------QYPPLNPMWH------ ------------------GILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINL AISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDR YNVIVKGLSG-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTAC GTDYFN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVASL----------------RS-SENQNTSAECKLAKV ALMTISLWFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISH PKYRAALFAKFP----SLAC-AAEPSS-DA-VSTTSGTTTVT------------------ ------------------------------------------------------------ ---DNEK-----------SN-A---------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] M---------------------------ANQLSYSSLGWPYQP------------NASVV -----------DTMPKEMLY------MIHEHWY-----------AFPPMNPLWY------ ------------------SILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNL AFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDR YNVIVRGMAA-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSC TVDYLT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LREQAKK----------MNVASL----------------RANADQQKQSAECRLAKV AMMTVGLWFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISH PRYKAALYQRFP----SLACGSGESGS-DV-KSEASATTTME------------------ ------------------------------------------------------------ ---EKPK----------IPE-A---------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MVESTTL------------VNQT--------------------------WWY---NPTVD ---------------------------IHPHWA-----------KFDPIPDAVY------ ------------------YSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINL AMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDR YNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSC SFDYLS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MAAMAKR----------LNAKEL----------------R--KAQAGASAEMKLAKI SMVIITQFMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSH PKFREAIQTTFPWLLTCCQFDEKECED-AN-DAEEEVVASER--GGESRDAAQMKEMMAM MQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAP PQVEAPQGAPPQGVDNQAYQ-A---------- > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGRDLR-------------DNET--------------------------WWY---NPSIV ---------------------------VHPHWR-----------EFDQVPDAVY------ ------------------YSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINL AFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDR YNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNC SFDYIS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MAAMAKR----------LNAKEL----------------R--KAQAGANAEMRLAKI SIVIVSQFLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSH PKFREAISQTFPWVLTCCQFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAM MQKMQQQQAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAP PQ-GAPPAAPPQGVDNQAYQ-A---------- > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- --------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------ -------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- ---------PGF---------------------------------PRVQPESVI------ ----------------SLNGVVK----------------------LQKEVEECAN----- ---LSRLLKH--------------------------------ER-KNISIFKREQKAATT LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN RDLRPTSRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ -LKLA------------------------------------------------------- ---ERPE------RSEFVLQNSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- --------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------ -------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- ---------PGF---------------------------------PRVQPESVI------ ----------------SLNGVVK----------------------LQKEVEECAN----- ---LSRLLKH--------------------------------ER-KNISIFKREQKAATT LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN RDLRTTYRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ -LKLA------------------------------------------------------- ---ERPE------RSEFVLQNSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] M-DVLS-------------PGQ--------GNNTTSPPAPFETG----------GNTTGI -------------------------SDVTVSYQ--------------------------- ------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------- ----------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGS RNWRLGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP- ----ASFERK-----------NERNA----------------EA-KRKMALARERKTVKT LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFN KDFQNAFKKIIK--------CKFCR----------------------------------- ------------------------------------------------------------ -------------------------------Q > 33=p A35181 serotonin receptor class 1A - rat M-DVFS-------------FGQ--------GNNTTASQEPFGTG----------GNVTSI -------------------------SDVTFSYQ--------------------------- ------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------- ----------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGS GDWRRCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP- ----ACLERK-----------NERNA----------------EA-KRKMALARERKTVKT LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFN KDFQNAFKKIIK--------CKFCR----------------------------------- ------------------------------------------------------------ -------------------------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------- --------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS PEFRSAFQKILF--------GKYRRG---------------------------------- ------------------------------------------------------------ ------------------------------HR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------- --------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS PEFRSAFQKILF--------GKYRRG---------------------------------- ------------------------------------------------------------ ------------------------------HR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi M-EG-A-------------EGQEELD----WEALYLRLPLQNCS----------WNSTGW EPNW------------------NVTVVPNTTWW----------------QASAPFDTPAA LVRA--------------AAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSL AVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDR YWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RC VVS--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR----------- ----------------------------------RRGATARGGVGPPPVP---------- -----------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNV SSNNTSPEKQSCANGLEA---DPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKT LAIITGAFVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFS PEFRHAFQRLLC--------GRRVRRR----R---------------------------- ------------------------------------------------------------ -----------------------------APQ mafft-7.505-without-extensions/test/samplerna0000644000175000017500000000273614224501721020745 0ustar nileshnilesh>AJ006331.1_1230 ccauggcguuaguau gagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagu >Z84287.1_1250 uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu >AF064490.1_2296 ugagu gucgaacagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg >Z84230.1_1250 uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu >AB049100.1_1360 auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg mafft-7.505-without-extensions/test/samplerna.xinsi0000644000175000017500000000357414224501721022077 0ustar nileshnilesh>AJ006331.1_1230 c--------------------------------ca-------------------uggcgu uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcga--------------------- ----------------------------------------------------------gu >Z84287.1_1250 u-------------------------------ucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgaga--------------------- ----------------------------------------------------------gu >AF064490.1_2296 u----------------------------------------------------------- ------gagugucgaacagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg >Z84230.1_1250 u-------------------------------ucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgaga--------------------- ----------------------------------------------------------gu >AB049100.1_1360 auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg mafft-7.505-without-extensions/test/sample.gins10000644000175000017500000007231014224501721021257 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] --------------------MN-------------------------------------- -----------------------GTE--GDNFY------------VPFSNKTG------- ---------------LARSPYEYPQY-------------YLAEPWKYSALAAYMFFLILV GFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FG PTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMAL ACAAP-PLVG-WS-----RYIPEGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVI IFFCYGRLLCTVKEAAAAQQESA------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEKEVTRMVVLM VIGFLVCWVPYASVAFYIFT-HQGS-D-FGATFMTLPAFFAKSSALYNPVIYILMNKQFR NCMITTLCC----GKNPLGDDE-SGA-STSKTEVSSVS-TSPV----------------- ------------------------------------------------------------ --------------------SPA--- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] --------------------MN-------------------------------------- -----------------------GTE--GPNFY------------VPFSNITG------- ---------------VVRSPFEQPQY-------------YLAEPWQFSMLAAYMFLLIVL GFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FG PTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMAL ACAAP-PLVG-WS-----RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIV IFFCYGQLVFTVKEAAAQQQESA------------------------------------- ------------------------------------------------------------ --------------------------------------------TTQKAEKEVTRMVIIM VIFFLICWLPYASVAMYIFT-HQGS-N-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFR NCMLTSLCC----GKNPLGDDE-ASA-TASKTE------TSQV----------------- ------------------------------------------------------------ --------------------APA--- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 --------------------MN-------------------------------------- -----------------------GTE--GINFY------------VPMSNKTG------- ---------------VVRSPFEYPQY-------------YLAEPWKYRLVCCYIFFLIST GLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FG PVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAF SCAAP-PLFG-WS-----RYMPEGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVV IFFSYGRLICKVREAAAQQQESA------------------------------------- ------------------------------------------------------------ --------------------------------------------TTQKAEKEVTRMVILM VLGFMLAWTPYAVVAFWIFT-NKGA-D-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFR NCMITTICC----GKNPFGDEDVSSTVSQSKTEVSSVS-SSQV----------------- ------------------------------------------------------------ --------------------SPA--- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish --------------------MN-------------------------------------- -----------------------GTE--GKNFY------------VPMSNRTG------- ---------------LVRSPFEYPQY-------------YLAEPWQFKILALYLFFLMSM GLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LG PTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMAL ACAAP-PLFG-WS-----RYIPEGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAV IFFTYGRLVCTVKAAAAQQQDSA------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEREVTKMVILM VFGFLIAWTPYATVAAWIFF-NKGA-D-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFR NCMLTTIFC----GKNPLGDDE-SSTVSTSKTEVSS------V----------------- ------------------------------------------------------------ --------------------SPA--- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish --------------------MN-------------------------------------- -----------------------GTE--GNNFY------------VPLSNRTG------- ---------------LVRSPFEYPQY-------------YLAEPWQFKLLAVYMFFLICL GLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LG PTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAM ACAAP-PLVG-WS-----RYIPEGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTI IFFTYGRLVCTVKAAAAQQQDSA------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEREVTKMVILM VLGFLVAWTPYATVAAWIFF-NKGA-A-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFR SCMLTTLFC----GKNPLGDEE-SSTVSTSKTEVSS------V----------------- ------------------------------------------------------------ --------------------SPA--- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- --------------------MK-------------------------------------- -----------------------QVPEFHEDFY------------IPIPLDINN------ --------------LSAYSPFLVPQD-------------HLGNQGIFMAMSVFMFFIFIG GASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FG ATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISAL AASLP-PLFG-WS-----RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGT IVFCYGQLLITLKLAAKAQADSA------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEREVTKMVVVM VLGFLVCWAPYASFSLWIVS-HRGE-E-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFR SCMM-KMVC----GKN-IEEDE-AST-SSQVTQVSS------V----------------- ------------------------------------------------------------ --------------------APEK-- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] --------------------MR-------------------------------------- -----------------------KMS--EEEFY------------L-FKNISS------- ---------------V--GPWDGPQY-------------HIAPVWAFYLQAAFMGTVFLI GFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FG RHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGI GVSIP-PFFG-WS-----RFIPEGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSL ICFSYTQLLRALKAVAAQQQESA------------------------------------- ------------------------------------------------------------ --------------------------------------------TTQKAEREVSRMVVVM VGSFCVCYVPYAAFAMYMVN-NRNH-G-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQ ACIM-KMVC----GKA-MTDES-DTC-SSQKTEVSTVS-STQV----------------- ------------------------------------------------------------ --------------------GPN--- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------DLAETVIA-STISIVNQVS-GYFV--LG HPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAA VWTAP-PIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSI IVLCYLQVWLAIRAVAKQQKESE------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEKEVTRMVVVM VLAFC------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWS-LQRLAGRHPQDSYED-------------------------------------- -----------------------STQ--SSIFT------------YTNSNSTR------- ------------------GPFEGPNY-------------HIAPRWVYHLTSVWMIFVVIA SVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQVY-GYFV--LG HPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAA VWTAP-PIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSI IVLCYLQVWLAIRAVAKQQKESE------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEKEVTRMVVVM VLAFCFCWGPYAFFACFAAA-NPGY-P-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFR NCIL-QLF-----GKK-VDDGS-ELS-SASKTEVSSV---SSV----------------- ------------------------------------------------------------ --------------------SPA--- > 10== Z68193 1 human Red Opsin <>[] MAQQWS-LQRLAGRHPQDSYED-------------------------------------- -----------------------STQ--SSIFT------------YTNSNSTR------- ------------------GPFEGPNY-------------HIAPRWVYHLTSVWMIFVVTA SVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQVS-GYFV--LG HPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSA VWTAP-PIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAI IMLCYLQVWLAIRAVAKQQKESE------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEKEVTRMVVVM IFAYCVCWGPYTFFACFAAA-NPGY-A-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFR NCIL-QLF-----GKK-VDDGS-ELS-SASKTEVSSV---SSV----------------- ------------------------------------------------------------ --------------------SPA--- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD--DD-------------------------------------- -----------------------TTR--GSVFT------------YTNTNNTR------- ------------------GPFEGPNY-------------HIAPRWVYNLVSFFMIIVVIA SCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LG HPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAW GWSAP-PIFG-WS-----RYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFI IIVCYLQVWMAIRAVAAQQKESE------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEREVSRMVVVM IVAFCICWGPYASFVSFAAA-NPGY-A-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFR NCIM-QLF-----GKK-VDDGS-EAS-TTSRTEVSSVS-NSSV----------------- ------------------------------------------------------------ --------------------APA--- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWE-AAFAARRRHEE--ED-------------------------------------- -----------------------TTR--DSVFT------------YTNSNNTR------- ------------------GPFEGPNY-------------HIAPRWVYNLTSVWMIFVVAA SVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQIS-GYFI--LG HPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSC AWTAP-PIFG-WS-----RYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAI IILCYLQVWLAIRAVAAQQKESE------------------------------------- ------------------------------------------------------------ --------------------------------------------STQKAEKEVSRMVVVM IVAYCFCWGPYTFFACFAAA-NPGY-A-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFR NCIL-QLF-----GKK-VDDGS-EVS-T-SRTEVSSVS-NSSV----------------- ------------------------------------------------------------ --------------------SPA--- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] ------------------------------------------------------------ -------------------------M--SSNSS------------QAPPNGTP------- ------------------GPFDGPQWP------------YQAPQSTYVGVAVLMGTVVAC ASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FG RRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWAL LWSAP-PLLG-WS-----SYVPEGLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSL ILFSYTNLLLTLRAAAAQQKEAD------------------------------------- ------------------------------------------------------------ --------------------------------------------TTQRAEREVTRMVIVM VMAFLLCWLPYSTFALVVAT-HKGI-I-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQ SCLL-EMLCCGYQPQR-TGKAS-PGT-PGPHADVTAAGLRNKV----------------- ------------------------------------------------------------ --------------------MPAHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] --------------------MESGNVS--------------------------------- -------------------SSLFGNV--STALR-------------PEARLSA---E--- ---TRLLGWNVPPEELRHIPEHWLTY-------------PEPPESMNYLLGTLYIFFTLM SMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSFH-QGYA--LG HLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYAT PWVVA-CYTETWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTM ITYYYSQIVGHVFSHEKALRDQAKKMNVE------------------SLRS--------- ------------------------------------------------------------ ----------------------------------------NVDKNKETAEIRIAKAAITI CFLFFCSWTPYGVMSLIGAF-GDKT-L-LTPGATMIPACACKMVACIDPFVYAISHPRYR MELQ-KRCPWL--ALNEKAPES-SAV-ASTST---TQE-PQQT----------------- ------------------------------------------------------------ --------------------TAA--- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 --------------------MEYHNVS--------------------------------- -------------------SVL-GNV--SSVLR-------------PDARLSA---E--- ---SRLLGWNVPPDELRHIPEHWLIY-------------PEPPESMNYLLGTLYIFFTVI SMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSFH-QGYA--LG HLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYAT PWVVA-CYTESWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTM ITYYYSQIVGHVFSHEKALRDQAKKMNVD------------------SLRS--------- ------------------------------------------------------------ ----------------------------------------NVDKSKEAAEIRIAKAAITI CFLFFASWTPYGVMSLIGAF-GDKT-L-LTPGATMIPACTCKMVACIDPFVYAISHPRYR MELQ-KRCPWL--AISEKAPES-RAA-ISTST---TQE-QQQT----------------- ------------------------------------------------------------ --------------------TAA--- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] --------------------ME-------------------------------------- --------------------PLCNAS--EPPLR-------------PEAR-SSGNGD--- ---LQFLGWNVPPDQIQYIPEHWLTQ-------------LEPPASMHYMLGVFYIFLFCA STVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSFH-RGFAIYLG NTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCT PWVVL-PLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLM ILYYYSQIVGHVFSHEKALREQAKKMNVE------------------SLRS--------- ------------------------------------------------------------ ----------------------------------------NVDKSKETAEIRIAKAAITI CFLFFVSWTPYGVMSLIGAF-GDKS-L-LTQGATMIPACTCKLVACIDPFVYAISHPRYR LELQ-KRCPWL--GVNEKSGEI-SSA-QSTTT---QEQ--QQT----------------- ------------------------------------------------------------ --------------------TAA--- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 --------------------MD-------------------------------------- --------------------ALCNAS--EPPLR-------------PEARMSSGSDE--- ---LQFLGWNVPPDQIQYIPEHWLTQ-------------LEPPASMHYMLGVFYIFLFFA STLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSFH-RGFA--LG NTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCT PWVVL-PLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLM ILYYYSQIVGHVFNHEKALREQAKKMNVE------------------SLRS--------- ------------------------------------------------------------ ----------------------------------------NVDKSKETAEIRIAKAAITI CFLFFVSWTPYGVMSLIGAF-GDKS-L-LTPGATMIPACTCKLVACIEPFVYAISHPRYR MELQ-KRCPWL--GVNEKSGEA-SSA-QSTTT---QEQ-TQQT----------------- ------------------------------------------------------------ --------------------SAA--- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------MT-------------------------------------- -----------------------NAT--GPQMAY-----------YGAASMDFGYPE--- ---GVSIVDFVRPEIKPYVHQHWYNY-------------PPVNPMWHYLLGVIYLFLGTV SIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FS PQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAI GCALP-PFFG-WG-----NYILEGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAI IVFSYVFIVKAIFAHEAAMRAQAKKMNVS------------------TLRS--------- ------------------------------------------------------------ -----------------------------------------NEADAQRAEIRIAKTALVN VSLWFICWTPYALISLKGVM-GDTS-G-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYR LAIT-QHLPWF--CVHETETKS-NDD-SQSNS---TVA-Q-------------------- ------------------------------------------------------------ --------------------DKA--- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------MA-------------------------------------- -----------------------NVT--GPQMAF-----------YGSGAATFGYPE--- ---GMTVADFVPDRVKHMVLDHWYNY-------------PPVNPMWHYLLGVVYLFLGVI SIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FS GTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISV GWSLP-PFFG-WG-----SYTLEGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASV IVFSYVFIVKAIFAHEAAMRAQAKKMNVT------------------NLRS--------- ------------------------------------------------------------ -----------------------------------------NEAETQRAEIRIAKTALVN VSLWFICWTPYAAITIQGLL-GNAE-G-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFR LAIT-QHLPWF--CVHEKDPND-VEE-NQSSN---TQT-Q-------------------- ------------------------------------------------------------ --------------------EKS--- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] --------------------MESF------------------------------------ -------------------AVAAAQL--GPHFA----------------PLS-------- ---NGSVVDKVTPDMAHLISPYWNQF-------------PAMDPIWAKILTAYMIMIGMI SWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LG PMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKM-------- -------------------YVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFL ICYSYWFIIAAVSAHEKAMREQAKKMNVK------------------SLRS--------- ------------------------------------------------------------ -----------------------------------------SEDAEKSAEGKLAKVALVT ITLWFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYR LALK-EKCPCC--VFGKVDDGK-SSD-AQSQA-TASEA-E-------------------- ------------------------------------------------------------ --------------------SKA--- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] --------------------MESF------------------------------------ -------------------AVAAAQL--GPHFA----------------PLS-------- ---NGSVVDKVTPDMAHLISPYWNQF-------------PAMDPIWAKILTAYMIMIGMI SWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LG PMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSS IW-CLAPAFG-WS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFL ICYSYWFIIAAVSAHEKAMREQAKKMNVK------------------SLRS--------- ------------------------------------------------------------ -----------------------------------------SEDAEKSAEGKLAKVALVT ITLWFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYR LALK-EKCPCC--VFGKVDDGK-SSD-AQSQA-TASEA-E-------------------- ------------------------------------------------------------ --------------------SKA--- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' --------------------MDSF------------------------------------ -------------------AAVATQL--GPQFA----------------APS-------- ---NGSVVDKVTPDMAHLISPYWDQF-------------PAMDPIWAKILTAYMIIIGMI SWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LG PMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMST IWCCLAPVFG-WS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFL ICYSYWFIIAAVSAHEKAMREQAKKMNVK------------------SLRS--------- ------------------------------------------------------------ -----------------------------------------SEDADKSAEGKLAKVALVT ISLWFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYR LALK-EKCPCC--VFGKVDDGK-SSE-AQSQA-TTSEA-E-------------------- ------------------------------------------------------------ --------------------SKA--- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] --------------------MERSHLP--------------------------------- -----------------ETPFDLAHS--GPRFQ----------------AQSSG------ ---NGSVLDNVLPDMAHLVNPYWSRF-------------APMDPMMSKILGLFTLAIMII SCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LG PLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAV FWTVM-PLIG-WS-----AYVPEGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFL ICYSYWFIIAAVAAHEKAMREQAKKMNVK------------------SLRS--------- ------------------------------------------------------------ -----------------------------------------SEDCDKSAEGKLAKVALTT ISLWFMAWTPYLVICYFGLF-KI-D-G-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYR IVLK-EKCPMC--VFGNTDEPK-PDA-PASDTETTSEA-D-------------------- ------------------------------------------------------------ --------------------SKA--- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 --------------------MERSLLP--------------------------------- -----------------EPPLAMALL--GPRFE----------------AQTGG------ ---NRSVLDNVLPDMAPLVNPHWSRF-------------APMDPTMSKILGLFTLVILII SCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LG PLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAV FWTIM-PLIG-WS-----SYVPEGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFM ICYSYWFIIATVAAHEKAMRDQAKKMNVK------------------SLRS--------- ------------------------------------------------------------ -----------------------------------------SEDCDKSAENKLAKVALTT ISLWFMAWTPYLIICYFGLF-KI-D-G-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDR LVLK-EKCPMC--VCGTTDEPK-PDA-PPSDTETTSEA-E-------------------- ------------------------------------------------------------ --------------------SKD--- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------MI-------------------------------------- -----------------------AVS--GPSYE----------------AFSYGGQAR-- -FNNQTVVDKVPPDMLHLIDANWYQY-------------PPLNPMWHGILGFVIGMLGFV SAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LG PLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSL GWTIA-PMFG-WN-----RYVPEGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFL IIYSYWFIIQAVAAHEKNMREQAKKMNVA------------------SLRS--------- ------------------------------------------------------------ -----------------------------------------SENQNTSAECKLAKVALMT ISLWFMAWTPYLVINFSGIF-NL-V-K-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYR AALF-AKFPSL--AC-AAEPSS-DAV-STTSG-TTTVT-DNEK----------------- ------------------------------------------------------------ --------------------SNA--- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] --------------------M--------------------------------------- ------------------------AN--QLSYS----------------SLGWPYQP--- ---NASVVDTMPKEMLYMIHEHWYAF-------------PPMNPLWYSILGVAMIILGII CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LG PFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSG GWTIL-PFFG-WS-----RYVPEGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLIT MIYCYFFIVHAVAEHEKQLREQAKKMNVA------------------SLRA--------- ------------------------------------------------------------ ----------------------------------------NADQQKQSAECRLAKVAMMT VGLWFMAWTPYLIISWAGVF-SSGT-R-LTPLATIWGSVFAKANSCYNPIVYGISHPRYK AALY-QRFPSL--ACGSGESGS-DVK-SEASA-TTTME-EKPK----------------- ------------------------------------------------------------ -------------------IPEA--- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] --------------------MV-------------------------------------- -----------------------EST--TL------------------VNQTWWY----- ---NPTVD----------IHPHWAKF-------------DPIPDAVYYSVGIFIGVVGII GILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FG KVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSI VWSVG-PVFN-WG-----AYVPEGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIII IAFCYFNIVMSVSNHEKEMAAMAKRLNAK------------------ELRK--------- ------------------------------------------------------------ -----------------------------------------AQ-AGASAEMKLAKISMVI ITQFMLSWSPYAIIALLAQF-GPAE-W-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFR EAIQ-TTFPWLLTCCQFDEKEC-EDA-NDAEE---EVV-ASER--GGESRDAAQMKEMMA MMQKMQAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQG APPQVEAPQGAPPQGVDNQAYQA--- > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] --------------------MG-------------------------------------- -----------------------RDL--R-------------------DNETWWY----- ---NPSIV----------VHPHWREF-------------DQVPDAVYYSLGIFIGICGII GCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FG FAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSV LWAIG-PIFG-WG-----AYTLEGVLCNCSFDYISRD--STTRSNILCMFILGFFGPILI IFFCYFNIVMSVSNHEKEMAAMAKRLNAK------------------ELRK--------- ------------------------------------------------------------ -----------------------------------------AQ-AGANAEMRLAKISIVI VSQFLLSWSPYAVVALLAQF-GPLE-W-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFR EAIS-QTFPWVLTCCQFDDKET-EDD-KDAET---EIP-AGESSDAAPSADAAQMKEMMA MMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQG APP------AAPPQGVDNQAYQA--- > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ ------------------------------------------------------MPHLLS GFLEVTAS----------PAPTW---DAPPDNVSGCGEQINYGRVEKVVIGSILTLITLL TIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FG HFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSA SITLP-PLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSV MLFMYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG--------- -------------------------------------------VVKLQKE---------- --------VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGII VGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLR PTSR-SLLQ----CQYRNINRKLSAA-GMHEA--LKLA-ERPE----RSEFVLQNSDHCG ------------------------------------------------------------ --------------------KKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ ------------------------------------------------------MPHLLS GFLEVTAS----------PAPTW---DAPPDNVSGCGEQINYGRVEKVVIGSILTLITLL TIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FG HFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSA SITLP-PLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSV MLFMYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG--------- -------------------------------------------VVKLQKE---------- --------VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGII VGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLR TTYR-SLLQ----CQYRNINRKLSAA-GMHEA--LKLA-ERPE----RSEFVLQNSDHCG ------------------------------------------------------------ --------------------KKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] --------------------MDVLSPG--------------------------------- -----------------------------------------------QGNNT-------- ---TSPPAPF-------ETGGNTTGISD-------------VTVSYQVITSLLLGTLIFC AVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LG QVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGF LISIP-PMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLL MLVLYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNW RLGVESKAGGALCANGAVRQGDDGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS- -----------------FERKNERNA-------------EAKRKMALARERKTVKTLGII MGTFILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQ NAFK-KIIK----CKFCR------------------------------------------ ------------------------------------------------------------ -------------------------Q > 33=p A35181 serotonin receptor class 1A - rat --------------------MDVFSFG--------------------------------- -----------------------------------------------QGNNT-------- ---TASQEPF-------GTGGNVTSISD-------------VTFSYQVITSLLLGTLIFC AVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LG QVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGF LISIP-PMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLL MLVLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDW RRCAENRAVGTPCTNGAVRQGDDEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC- -----------------LERKNERNA-------------EAKRKMALARERKTVKTLGII MGTFILCWLPFFIVALVLPF-CESSCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKDFQ NAFK-KIIK----CKFCR------------------------------------------ ------------------------------------------------------------ -------------------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] --------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG LSPTGLVTSDFNDSYGLTGQFINGSH--SSRSRD-----------NASANDT-------- ---SATNM----------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLC CIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LH SEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVAL FISIP-PLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLV MMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTE KKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCA EEASIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAII TGAFLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFR SAFQ-KILF----GKYRR----GH------------------------------------ ------------------------------------------------------------ -------------------------R > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail --------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG LSPTGLVTSDFNDSYGLTGQFINGSH--SSRSRD-----------NASANDT-------- ---SATNM----------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLC CIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LH SEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVAL FISIP-PLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLV MMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTE KKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCA EEASIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAII TGAFLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFR SAFQ-KILF----GKYRR----GH------------------------------------ ------------------------------------------------------------ -------------------------R > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi --------------------ME-------------------------------------- -----------------------GAE--GQEELDWEALYLRLPLQNCSWNSTGWEPN--- --WNVTVV----------PNTTWWQASAPFDT--------PAALVRAAAKAVVLGLLILA TVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LG PELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSF FVCIA-QLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFATASSFYVPVLI ILILYWRIYQTARKRIRR------RRGATARGGVGPPPVPAGGALV-AGGG--------- -------------------SGGIAAAV---------VAVIGRPLPTISETTTTGFTNVSS NNTSPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAII TGAFVACWLPFFVLAILVPT-CD--CE-VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFR HAFQ-RLLC----GRRVRR-RRAP------------------------------------ ------------------------------------------------------------ -------------------------Q mafft-7.505-without-extensions/test/sample.ginsi.allowshift0000644000175000017500000012064414224501721023526 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] M----------------------------------------------------------- ---NG------------TEGDNFYVPFSNKTGLAR------------------------- ----------------------------------SP---------------------YEY --------------------PQY---------------------YLAEPW---------- -----------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANL FMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVIC KPMGNFRF-GNTHAIMGVAFTWIMALAC-AAPPLV-GWSR-------YIPEGMQCSCGPD YYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-HQG-SD-FG ATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------------------- ------------------------------------------------------------ -----------------------------------------GKN-PLGDDE-SGA----S TSKTE------------------VSSVSTSPV-------------------------S-- --------------------------------------PA > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] M----------------------------------------------------------- ---NG------------TEGPNFYVPFSNITGVVR------------------------- ----------------------------------SP---------------------FEQ --------------------PQY---------------------YLAEPW---------- -----------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADL FMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVC KPMSNFRF-GENHAIMGVAFTWVMALAC-AAPPLV-GWSR-------YIPEGMQCSCGID YYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------TTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-HQG-SN-FG PIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------------------- ------------------------------------------------------------ -----------------------------------------GKN-PLGDDE-ASA----T ASKTE-----------------------TSQV-------------------------A-- --------------------------------------PA > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 M----------------------------------------------------------- ---NG------------TEGINFYVPMSNKTGVVR------------------------- ----------------------------------SP---------------------FEY --------------------PQY---------------------YLAEPW---------- -----------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADL FMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVC KPMGNFRF-SATHAMMGIAFTWVMAFSC-AAPPLF-GWSR-------YMPEGMQCSCGPD YYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------TTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-NKG-AD-FT ATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------------------- ------------------------------------------------------------ -----------------------------------------GKN-PFGDEDVSST---VS QSKTE------------------VSSVSSSQV-------------------------S-- --------------------------------------PA > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish M----------------------------------------------------------- ---NG------------TEGKNFYVPMSNRTGLVR------------------------- ----------------------------------SP---------------------FEY --------------------PQY---------------------YLAEPW---------- -----------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGT IMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVC KPMGSFKF-SSSHAFAGIAFTWVMALAC-AAPPLF-GWSR-------YIPEGMQCSCGPD YYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF-NKG-AD-FS AKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------------------- ------------------------------------------------------------ -----------------------------------------GKN-PLGDDE-SST---VS TSKTE------------------VSS-----V-------------------------S-- --------------------------------------PA > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish M----------------------------------------------------------- ---NG------------TEGNNFYVPLSNRTGLVR------------------------- ----------------------------------SP---------------------FEY --------------------PQY---------------------YLAEPW---------- -----------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGA IMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVC KPMGSFKF-SSTHASAGIAFTWVMAMAC-AAPPLV-GWSR-------YIPEGIQCSCGPD YYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF-NKG-AA-FS AQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------------------- ------------------------------------------------------------ -----------------------------------------GKN-PLGDEE-SST---VS TSKTE------------------VSS-----V-------------------------S-- --------------------------------------PA > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MK---------------------------------------------------------- -----------------------------------QVPEF-------------------- ---------------HEDFYIPIPL-DINNLSAYSP---------------------FLV --------------------PQD---------------------HLGNQG---------- -----------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANL FVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVIC KPLGNFTF-KTPHAIAGCILPWISALAA-SLPPLF-GWSR-------YIPEGLQCSCGPD WYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-HRG-EE-FD LRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMVC---------------------- ------------------------------------------------------------ -----------------------------------------GKN--IEEDE-AST----S SQVTQ------------------VSSVAP------------------------------- --------------------------------------EK > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] M----------------------------------------------------------- ----------------------------------RKMSE--------------------- ----------------EEFYL------FKNISSVGP---------------------WDG --------------------PQY---------------------HIAPVW---------- -----------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGF LLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVIC KPFGNFRF-SSKHALTVVLATWTIGIGV-SIPPFF-GWSR-------FIPEGLQCSCGPD WYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------TTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-NRN-HG-LD LRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMVC---------------------- ------------------------------------------------------------ -----------------------------------------GKA--MTDES-DTC----S SQKTE------------------VSTVSSTQV-------------------------G-- --------------------------------------PN > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------DL AETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVC KPFGNVRF-DAKLAIVGIAFSWIWAAVW-TAPPIF-GWSR-------YWPHGLKTSCGPD VFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEKEVTRMVVVMVLAFC------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLA------GRHPQDSYEDSTQS-------------------SIFTYTNS-- ---NS------------TR----------------------------------------- ----------------------------------GP---------------------FEG --------------------PNY---------------------HIAPRW---------- -----------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADL AETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVC KPFGNVRF-DAKLAIVGIAFSWIWAAVW-TAPPIF-GWSR-------YWPHGLKTSCGPD VFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-NPG-YP-FH PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF----------------------- ------------------------------------------------------------ -----------------------------------------GKK--VDDGS-ELS----S ASKTE------------------VSSV--SSV-------------------------S-- --------------------------------------PA > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLA------GRHPQDSYEDSTQS-------------------SIFTYTNS-- ---NS------------TR----------------------------------------- ----------------------------------GP---------------------FEG --------------------PNY---------------------HIAPRW---------- -----------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADL AETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVC KPFGNVRF-DAKLAIVGIAFSWIWSAVW-TAPPIF-GWSR-------YWPHGLKTSCGPD VFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-NPG-YA-FH PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF----------------------- ------------------------------------------------------------ -----------------------------------------GKK--VDDGS-ELS----S ASKTE------------------VSSV--SSV-------------------------S-- --------------------------------------PA > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNV--------------------------AVFAARRSRDDDDTTRGSVFTYTNT-- ---NN------------TR----------------------------------------- ----------------------------------GP---------------------FEG --------------------PNY---------------------HIAPRW---------- -----------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDL VETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVC KPFGNIKF-DSKLAIIGIVFSWVWAWGW-SAPPIF-GWSR-------YWPHGLKTSCGPD VFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA-NPG-YA-FH PLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF----------------------- ------------------------------------------------------------ -----------------------------------------GKK--VDDGS-EAS----T TSRTE------------------VSSVSNSSV-------------------------A-- --------------------------------------PA > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA----------------------------AWEAAFAARRRHEEEDTTRDSVFTYTNS-- ---NN------------TR----------------------------------------- ----------------------------------GP---------------------FEG --------------------PNY---------------------HIAPRW---------- -----------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADL GETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVC KPFGNIKF-DGKLAVAGILFSWLWSCAW-TAPPIF-GWSR-------YWPHGLKTSCGPD VFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------STQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-NPG-YA-FH PLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF----------------------- ------------------------------------------------------------ -----------------------------------------GKK--VDDGS-EVS----T -SRTE------------------VSSVSNSSV-------------------------S-- --------------------------------------PA > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS-----------------------------------------------------SNSSQ APPNG------------TP----------------------------------------- ----------------------------------GP---------------------FDG --------------------PQW--------------------PYQAPQS---------- -----------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADL LVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVC KPLGDFQF-QRRHAVSGCAFTWGWALLW-SAPPLL-GWSS-------YVPEGLRTSCGPN WYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------TTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-HKG-II-IQ PVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCGYQ------------------ ------------------------------------------------------------ -------------------------------------PQRTGK----------------- ---------------------------------------------------------ASP GTPGPHADVTAAGLRNKVMP----------------AHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] ME-----------------------------------------------------SGNVS SSLFG------------NV----STA---------------------------------- -------------------------------------LRPEARLSA--------ETRL-- --------------LGW-----------------NV--PPEELRHIPEHWLTYPEPPE-- ----------SMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDF MMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVIT RPMEGK-M-THGKAIAMIIFIYMYATPW-VVACYTETWGR-------FVPEGYLTSCTFD YLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVESLRSN-------------------------------------------------- -----------VDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-GDK-TL-LT PGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLA------------------- ------------------------------------------------------------ ----------------------------------------LNE--KAPES--SAV---AS TSTTQ----------------------EPQQT-------------------------T-- --------------------------------------AA > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNVSSVLGNVSSV-------------------------------------------- ------------------------------------------------------------ -------------------------------------LRPDARLSA--------ESRL-- --------------LGW-----------------NV--PPDELRHIPEHWLIYPEPPE-- ----------SMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDF MMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVIT RPMEGK-M-THGKAIAMIIFIYLYATPW-VVACYTESWGR-------FVPEGYLTSCTFD YLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVDSLRSN-------------------------------------------------- -----------VDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF-GDK-TL-LT PGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLA------------------- ------------------------------------------------------------ ----------------------------------------ISE--KAPES--RAA---IS TSTTQ----------------------EQQQT-------------------------T-- --------------------------------------AA > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME---------------------------------------------------------- -------------------------PLC-------NAS---------------------- ----------------------------------EPPLRPEAR-SSG-----NGDLQF-- --------------LGW-----------------NV--PPDQIQYIPEHWLTQLEPPA-- ----------SMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL IMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVIT KPMNRN-M-TFTKAVIMNIIIWLYCTPW-VVLPLTQFWDR-------FVPEGYLTSCSFD YLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVESLRSN-------------------------------------------------- -----------VDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDK-SL-LT QGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLG------------------- ------------------------------------------------------------ ----------------------------------------VNE--KSGEI--SSA---QS TTTQE----------------------Q-QQT-------------------------T-- --------------------------------------AA > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD---------------------------------------------------------- -------------------------ALC-------NAS---------------------- ----------------------------------EPPLRPEARMSSG-----SDELQF-- --------------LGW-----------------NV--PPDQIQYIPEHWLTQLEPPA-- ----------SMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL IMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVIT KPMNRN-M-TFTKAVIMNIIIWLYCTPW-VVLPLTQFWDR-------FVPEGYLTSCSFD YLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVESLRSN-------------------------------------------------- -----------VDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDK-SL-LT PGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLG------------------- ------------------------------------------------------------ ----------------------------------------VNE--KSGEA--SSA---QS TTTQE----------------------QTQQT-------------------------S-- --------------------------------------AA > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT---------------------------------------------------------- -----------------------------------NAT---------------------- ----------------------------------GP------------------QMAYYG A-----------ASMDFGY-P------EGVSIVDFV--RPEIKPYVHQHWYNYPPVNP-- ----------MWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDL IMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIIC NGFNGPKL-TTGKAVVFALISWVIAIGC-ALPPFF-GWGN-------YILEGILDSCSYD YLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVSTLRS--------------------------------------------------- -----------NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM-GDT-SG-IT PLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFC------------------- ------------------------------------------------------------ ----------------------------------------VHE----------------- ----------------------------------------TETKSNDDSQSNSTVAQD-- --------------------------------------KA > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA---------------------------------------------------------- -----------------------------------NVT---------------------- ----------------------------------GP------------------QMAFYG S-----------GAATFGY-P------EGMTVADFV--PDRVKHMVLDHWYNYPPVNP-- ----------MWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDL IMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIIC NGFNGPKL-TQGKATFMCGLAWVISVGW-SLPPFF-GWGS-------YTLEGILDSCSYD YFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVTNLRS--------------------------------------------------- -----------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL-GNA-EG-IT PLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFC------------------- ------------------------------------------------------------ ----------------------------------------VHE--KDPNDV--------- ---------------------------EENQS------SNTQT-------------QE-- --------------------------------------KS > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ME---------------------------------------------------------- ------------------------------------------------------------ -----------------------------------------------SFAVAAAQLG--- --------------------PHFAP-LSNGSVVDKV--TPDMAHLISPYWNQFPAMDP-- ----------IWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIV KGMAGRPM-TIPLALGKM-----------------------------YVPEGNLTSCGID YLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVKSLRS--------------------------------------------------- -----------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EG-LT PLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCV------------------- ------------------------------------------------------------ ----------------------------------------FG------------------ -------------------------------KVDDGKSSDAQS------QATASEAES-- --------------------------------------KA > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ME---------------------------------------------------------- ------------------------------------------------------------ -----------------------------------------------SFAVAAAQLG--- --------------------PHFAP-LSNGSVVDKV--TPDMAHLISPYWNQFPAMDP-- ----------IWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIV KGMAGRPM-TIPLALGKIAYIWFMSSIW-CLAPAF-GWSR-------YVPEGNLTSCGID YLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVKSLRS--------------------------------------------------- -----------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EG-LT PLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCV------------------- ------------------------------------------------------------ ----------------------------------------FG------------------ -------------------------------KVDDGKSSDAQS------QATASEAES-- --------------------------------------KA > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MD---------------------------------------------------------- ------------------------------------------------------------ -----------------------------------------------SFAAVATQLG--- --------------------PQFAA-PSNGSVVDKV--TPDMAHLISPYWDQFPAMDP-- ----------IWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDF GIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIV KGMAGRPM-TIPLALGKIAYIWFMSTIWCCLAPVF-GWSR-------YVPEGNLTSCGID YLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVKSLRS--------------------------------------------------- -----------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF--KF-EG-LT PLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCV------------------- ------------------------------------------------------------ ----------------------------------------FG------------------ -------------------------------KVDDGKSSEAQS------QATTSEAES-- --------------------------------------KA > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHLPET--------------------------------------------------- -------------------------PFD------LAHS---------------------- ----------------------------------GP--RFQAQ-SSG------------- ----------------------------NGSVLDNV--LPDMAHLVNPYWSRFAPMDP-- ----------MMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDF CMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIV KGINGTPM-TIKTSIMKILFIWMMAVFW-TVMPLI-GWSA-------YVPEGNLTACSID YMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVKSLRS--------------------------------------------------- -----------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF--KI-DG-LT PLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCV------------------- ------------------------------------------------------------ ----------------------------------------FGN----------------- --TDEPKPDAPA---------------SDTE--------------------TTSEADS-- --------------------------------------KA > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLLPEP--------------------------------------------------- -------------------------PLA------MALL---------------------- ----------------------------------GP--RFEAQ-TGG------------- ----------------------------NRSVLDNV--LPDMAPLVNPHWSRFAPMDP-- ----------TMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDF CMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIV KGINGTPM-TIKTSIMKIAFIWMMAVFW-TIMPLI-GWSS-------YVPEGNLTACSID YMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVKSLRS--------------------------------------------------- -----------SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF--KI-DG-LT PLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCV------------------- ------------------------------------------------------------ ----------------------------------------CGT----------------- --TDEPKPDAPP---------------SDTE--------------------TTSEAES-- --------------------------------------KD > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI---------------------------------------------------------- -----------------------------------AVS---------------------- ----------------------------------GP--SYEAF-SYGGQARFNNQ----- ------------------------------TVVDKV--PPDMLHLIDANWYQYPPLNP-- ----------MWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNF LMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIV KGLSGKPL-SINGALIRIIAIWLFSLGW-TIAPMF-GWNR-------YVPEGNMTACGTD YFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVASLRS--------------------------------------------------- -----------SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF--NL-VK-IS PLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLA------------------- ------------------------------------------------------------ ----------------------------------------CAA----------------- -----------EPSSDAVSTTSGTTTVTDNEK-------------------------S-- --------------------------------------NA > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] MA---------------------------------------------------------- ------------------------------------------------------------ -----------------------------------------------------NQLSY-- ------------SSLGWPYQP-------NASVVDTM--PKEMLYMIHEHWYAFPPMNP-- ----------LWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDF CMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIV RGMAAAPL-THKKATLLLLFVWIWSGGW-TILPFF-GWSR-------YVPEGNLTSCTVD YLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -MNVASLRAN-------------------------------------------------- -----------ADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-SSG-TR-LT PLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLA------------------- ------------------------------------------------------------ ----------------------------------------CGSG-ESGSDVKSEA---SA TTTMEEKPKIP------------------------------------------------- --------------------------------------EA > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MV---------------------------------------------------------- -----------------------------------ESTT--------------------- -----LVNQT-------------------------------------------------- ----------------WWYNPTV---------------------DIHPHWAKFDPIPD-- ----------AVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDL SFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIG RPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVF-NWGA-------YVPEGILTSCSFD YLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -LNAKELRKA-------------------------------------------------- -------------QAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF-GPA-EW-VT PYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECED------- ------------ANDAEEEVVASERGGESRDAAQMKEMMAMMQKMQAQQAAYQP---PPP PQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQ--------------------- ------------------------------------------------------------ GAP----------------PQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGR-----------------------DLRD------------------------------ ------------------------------------------------------------ -------NET-------------------------------------------------- ----------------WWYNPSI---------------------VVHPHWREFDQVPD-- ----------AVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDF TFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIG RPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIF-GWGA-------YTLEGVLCNCSFD YISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -LNAKELRKA-------------------------------------------------- -------------QAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-GPL-EW-VT PYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETE IPAGESSDAAPSA-----------------DAAQMKEMMAMMQKMQQQQAAYPPQGYAPP PQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQ--------------------- ------------------------------------------------------------ GAP----------------P------AAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ --------------------PHLLSGFLEVTASPAPTW--------------DAPPDNVS GCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADL SVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGIT RPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWAQ-------NVNDDKVCLISQD F----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP---G------ -----FPRVQPESVISLNGV---------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ V--------------KLQKEV------------------EECAN---------------- --LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIP LWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQ---------------------- ------------------------------------------------------------ -----------------------------------------CQYRNINRKL-SAAGMHEA LKLAE-RPERSE----FVLQ---NSDHCGKKG-------------------------H-- --------------------------------------DT > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ --------------------PHLLSGFLEVTASPAPTW--------------DAPPDNVS GCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADL SVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGIT RPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWAQ-------NVNDDKVCLISQD F----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP---G------ -----FPRVQPESVISLNGV---------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ V--------------KLQKEV------------------EECAN---------------- --LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIP LWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ---------------------- ------------------------------------------------------------ -----------------------------------------CQYRNINRKL-SAAGMHEA LKLAE-RPERSE----FVLQ---NSDHCGKKG-------------------------H-- --------------------------------------DT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] MDVLSPG----------------------------------------------------- ------------------------------------------------------------ -----QGNNTT--------------------SPPAP------FETGG------------- ----------------------------NTTGISDVTV---------------------- ----------SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL MVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAIT DPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP--EDRSDPD---ACTISKD H----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK-------------- -------------------------TVKKVEKTG-------------------------- -------------ADTRHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQG DDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAPASFERKNER----------------- --------------------NAEA------------------------------------ -----------KRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MP TLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK---------------------- ------------------------------------------------------------ -----------------------------------------CKF---------------- ---------------------------CR------------------------------- ---------------------------------------Q > 33=p A35181 serotonin receptor class 1A - rat MDVFSFG----------------------------------------------------- ------------------------------------------------------------ -----QGNNTT--------------------ASQEP------FGTGG------------- ----------------------------NVTSISDVTF---------------------- ----------SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL MVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAIT DPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP--EDRSDPD---ACTISKD H----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK-------------- -------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDW RRCAENRAVGTP---------------------------------------CTNGAVRQG DDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAPACLERKNER----------------- --------------------NAEA------------------------------------ -----------KRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MP ALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK---------------------- ------------------------------------------------------------ -----------------------------------------CKF---------------- ---------------------------CR------------------------------- ---------------------------------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALDVARMGGL----------------------------------------A STPSG------------LRSTGLTTPGLSPTGLV----TSDFNDSYGLTGQFINGSHSSR SRDNASANDTSATNMTDDRYWSLTVYSHEHL----------------------------- ------------------------------------------------------------ -------------VLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL MVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVT S-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NN--DPDKTGTCIISQD K----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEE TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRP--------------- ------------------------------------------------------------ -------------------------------------ERKKNRAKKLPENANG------- ---VNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS---- ---------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDP-EG-IP PFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF---------------------- ------------------------------------------------------------ -----------------------------------------GKY---------------- ------------------------------RR-------------------------G-- --------------------------------------HR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALDVARMGGL----------------------------------------A STPSG------------LRSTGLTTPGLSPTGLV----TSDFNDSYGLTGQFINGSHSSR SRDNASANDTSATNMTDDRYWSLTVYSHEHL----------------------------- ------------------------------------------------------------ -------------VLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL MVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVT S-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NN--DPDKTGTCIISQD K----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEE TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRP--------------- ------------------------------------------------------------ -------------------------------------ERKKNRAKKLPENANG------- ---VNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS---- ---------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDP-EG-IP PFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF---------------------- ------------------------------------------------------------ -----------------------------------------GKY---------------- ------------------------------RR-------------------------G-- --------------------------------------HR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi ME---------------------------------------------------------- ------------------------------------------------------------ -------------------------------------------GAEG-----QEELDWEA LYLRLPLQNCSWNSTGWE--PNW-----NVTVVPNTTW-----------WQASAPFDTPA AL--------VRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADL LVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVT N-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQ--RVSEDLRCVVSQD V----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRR------------ ------------------------------------------------------------ ------------GATARGGVGPPPVP--------------------AGGALVAGG----G SGGIAAAVVAV--IGR-----PLPT---------------------------ISETTTTG FTNVSSNNTSPEK--------------------------QSCANGLEADPPTTGYGAVAA AYYPSLVRRKPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-CD--CE-VS PVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLC---------------------- ------------------------------------------------------------ -----------------------------------------GRR---------------- ---------------------------VRRRR-------------------------A-- --------------------------------------PQ mafft-7.505-without-extensions/test/sample.fftns20000644000175000017500000007022014224501721021436 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] ----------------------------------------MNG----------------T E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL GDDE--SGASTSKTEVSSVS-TSPVSPA-------------------------------- --------------------------------------------------------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] ----------------------------------------MNG----------------T E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL GDDE--ASATASKTE------TSQVAPA-------------------------------- --------------------------------------------------------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 ----------------------------------------MNG----------------T E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF GDEDVSSTVSQSKTEVSSVS-SSQVSPA-------------------------------- --------------------------------------------------------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish ----------------------------------------MNG----------------T E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL GDDE-SSTVSTSKTEVSS------VSPA-------------------------------- --------------------------------------------------------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish ----------------------------------------MNG----------------T E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL GDEE-SSTVSTSKTEVSS------VSPA-------------------------------- --------------------------------------------------------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- ----------------------------------------MKQ----------------V PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I EEDE--ASTSSQVTQVSS------VAPEK------------------------------- --------------------------------------------------------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] ----------------------------------------MRK----------------M S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M TDES--DTCSSQKTEVSTVS-STQVGPN-------------------------------- --------------------------------------------------------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------DLAETVIA-STISIVNQV S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESESTQK------AEKEVTRMVVVMVLAFC----------------- ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] ----------------------------------------MAQQWSLQRLAGRHPQDSYE DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- --------------------------------------------------------- > 10== Z68193 1 human Red Opsin <>[] ----------------------------------------MAQQWSLQRLAGRHPQDSYE DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- --------------------------------------------------------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] ----------------------------------------MTEAWNVAVFAARRSRDD-D DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V DDGS--EASTTSRTEVSSVS-NSSVAPA-------------------------------- --------------------------------------------------------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] ----------------------------------------MAA-WEAAFAARRRHEE--E DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V DDGS--EVST-SRTEVSSVS-NSSVSPA-------------------------------- --------------------------------------------------------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] ----------------------------------------MS-----------------S NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T GKAS--PGTPGPHADVTAAGLRNKVMPAHPV----------------------------- --------------------------------------------------------- > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] ----------MESGNVSS------------SLFGNVST-ALRP----------------E ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- --------------------------------------------------MN-------- --VESL------------------------------------------------------ ----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N EKAP--ESSAVASTSTTQEP--QQTTAA-------------------------------- --------------------------------------------------------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 ----------MEYHNVSS------------VL-GNVSS-VLRP----------------D ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- --------------------------------------------------MN-------- --VDSL------------------------------------------------------ ----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S EKAP--ESRAAISTSTTQEQ--QQTTAA-------------------------------- --------------------------------------------------------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ----------ME------------------PL-CNASEPPLRP----------------E AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK------------------------- --------------------------------------------------MN-------- --VESL------------------------------------------------------ ----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N EKSG--EISSAQST-TTQEQ--QQTTAA-------------------------------- --------------------------------------------------------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 ----------MD------------------AL-CNASEPPLRP----------------E ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK------------------------- --------------------------------------------------MN-------- --VESL------------------------------------------------------ ----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N EKSG--EASSAQST-TTQEQ-TQQTSAA-------------------------------- --------------------------------------------------------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------------------MTNATGPQMAY----------------Y GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- --------------------------------------------------MN-------- --VSTL------------------------------------------------------ ----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T ETKS--NDDSQSNSTVAQDK-A-------------------------------------- --------------------------------------------------------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------------------MANVTGPQMAF----------------Y GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- --------------------------------------------------MN-------- --VTNL------------------------------------------------------ ----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K DPND--VEENQSSNTQTQEK-S-------------------------------------- --------------------------------------------------------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ----------ME----SF------------AVAAAQLGPHFAP----------------L S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- --------------------------------------------------MN-------- --VKSL------------------------------------------------------ ----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- --------------------------------------------------------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ----------ME----SF------------AVAAAQLGPHFAP----------------L S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- --------------------------------------------------MN-------- --VKSL------------------------------------------------------ ----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- --------------------------------------------------------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' ----------MD----SF------------AAVATQLGPQFAA----------------P S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- --------------------------------------------------MN-------- --VKSL------------------------------------------------------ ----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V DDGK--SSEAQSQA-TTSEA-ESKA----------------------------------- --------------------------------------------------------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] -----MERSHLP----ET------------PFDLAHSGPRFQA----------------Q SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK------------------------- --------------------------------------------------MN-------- --VKSL------------------------------------------------------ ----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T DEPK--PDAPASDTETTSEA-DSKA----------------------------------- --------------------------------------------------------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 -----MERSLLP----EP------------PLAMALLGPRFEA----------------Q TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK------------------------- --------------------------------------------------MN-------- --VKSL------------------------------------------------------ ----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T DEPK--PDAPPSDTETTSEA-ESKD----------------------------------- --------------------------------------------------------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------------------MIAVSGPSYEA----------------F SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK------------------------- --------------------------------------------------MN-------- --VASL------------------------------------------------------ ----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A EPSS--DAVSTTSGTTTVTD-NEKSNA--------------------------------- --------------------------------------------------------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] ----------------------------------MANQLSYSS----------------L GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK------------------------- --------------------------------------------------MN-------- --VASL------------------------------------------------------ ----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G ESGS--DVKSEASATTTMEE-KPKIPEA-------------------------------- --------------------------------------------------------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ---------------------------------------MVES----------------T TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR------------------------- --------------------------------------------------LN-------- --AKEL------------------------------------------------------ ----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] ----------------------------------------MGR----------------D LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR------------------------- --------------------------------------------------LN-------- --AKEL------------------------------------------------------ ----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ ------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- ---------------------------------PGFPRV----QPESVISLNG------- --VVKL----------------------QK---------EVEECAN-------------- ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR-- ------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- --------------------------------------------CGKKGHDT----- > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ ------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- ---------------------------------PGFPRV----QPESVISLNG------- --VVKL----------------------QK---------EVEECAN-------------- ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- ------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- --------------------------------------------CGKKGHDT----- > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] ----------MDVLSP-------------------------------------------- ---------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- --------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-- -------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG-- PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ---- ------------------------------------------------------------ --------------------------------------------------------- > 33=p A35181 serotonin receptor class 1A - rat ----------MDVFSF-------------------------------------------- ---------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- --------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-- -------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG-- SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR---- ------------------------------------------------------------ --------------------------------------------------------- > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- -------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- -----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- ------------------------------------------------------------ --------------------------------------------------------- > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- -------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- -----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- ------------------------------------------------------------ --------------------------------------------------------- > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi -MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T VVPN--TTWW------------QASAPFDTPAALVRAAAK-------------------- AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT ASSFYVPVLIILILYWRIY-------QTARKRIR-------------------------- -------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR-- ------A---------------PQ------------------------------------ --------------------------------------------------------- mafft-7.505-without-extensions/test/sample0000644000175000017500000004035014224501721020236 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN > 8=opsin, greensensitive human (fragment) S07060 DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS VSPA > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS VSPA > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS SVAPA > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS PA > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE KAPESSAVASTSTTQEPQQTTAA > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK APESRAAISTSTTQEQQQTTAA > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE ISSAQSTTTQEQQQTTAA > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG EASSAQSTTTQEQTQQTSAA > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET KSNDDSQSNSTVAQDKA > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP NDVEENQSSNTQTQEKS > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA QSQATASEAESKA > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE AQSQATTSEAESKA > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD EPKPDAPASDTETTSEADSKA > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD EPKPDAPPSDTETTSEAESKD > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV STTSGTTTVTDNEKSNA > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE ASATTTMEEKPKIPEA > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY PPPPQGAPPQGAPPAAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC RQ > 33=p A35181 serotonin receptor class 1A - rat MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC RR > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL NPIIYTIFSPEFRSAFQKILFGKYRRGHR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL NPIIYTIFSPEFRSAFQKILFGKYRRGHR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT VFSPEFRHAFQRLLCGRRVRRRRAPQ mafft-7.505-without-extensions/test/sample.ginsi0000644000175000017500000007160414224501721021354 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MN---------------------------------------------------------- ----GTEG--DNFY------------VPFSNKTG----------------------LARS PYEYPQY------YLAEPWKYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYIL LNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLA IERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQ CSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEKEVTRMVVLMVIGFLVCWV PYASVAFYIFT---HQGSD-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLC C---GKNPLGDDE-SGA-STSKTEVSSVS-TSPV-------------------------- ------------------------------------------------------------ -----------SPA--- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MN---------------------------------------------------------- ----GTEG--PNFY------------VPFSNITG----------------------VVRS PFEQPQY------YLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYIL LNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLA IERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQ CSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------TTQKAEKEVTRMVIIMVIFFLICWL PYASVAMYIFT---HQGSN-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLC C---GKNPLGDDE-ASA-TASKTE------TSQV-------------------------- ------------------------------------------------------------ -----------APA--- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MN---------------------------------------------------------- ----GTEG--INFY------------VPMSNKTG----------------------VVRS PFEYPQY------YLAEPWKYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYIL VNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLA IERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQ CSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------TTQKAEKEVTRMVILMVLGFMLAWT PYAVVAFWIFT---NKGAD-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTIC C---GKNPFGDEDVSSTVSQSKTEVSSVS-SSQV-------------------------- ------------------------------------------------------------ -----------SPA--- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MN---------------------------------------------------------- ----GTEG--KNFY------------VPMSNRTG----------------------LVRS PFEYPQY------YLAEPWQFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFIL VNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLA IERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQ CSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEREVTKMVILMVFGFLIAWT PYATVAAWIFF---NKGAD-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIF C---GKNPLGDDE-SSTVSTSKTEVSS------V-------------------------- ------------------------------------------------------------ -----------SPA--- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MN---------------------------------------------------------- ----GTEG--NNFY------------VPLSNRTG----------------------LVRS PFEYPQY------YLAEPWQFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFIL VNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLA IERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQ CSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEREVTKMVILMVLGFLVAWT PYATVAAWIFF---NKGAA-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLF C---GKNPLGDEE-SSTVSTSKTEVSS------V-------------------------- ------------------------------------------------------------ -----------SPA--- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MK---------------------------------------------------------- ----QVPEFHEDFY------IPIP--LDINNLSA------------------------YS PFLVPQD------HLGNQGIFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYIL VNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVA FERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQ CSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEREVTKMVVVMVLGFLVCWA PYASFSLWIVS---HRGEE-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMVC ----GKN-IEEDE-AST-SSQVTQVSSVA-PEK--------------------------- ------------------------------------------------------------ ----------------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MR---------------------------------------------------------- ----KMSE--EEFY------------L-FKNISS----------------------V--G PWDGPQY------HIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYIL VNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLA FERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQ CSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------TTQKAEREVSRMVVVMVGSFCVCYV PYAAFAMYMVN---NRNHG-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMVC ----GKA-MTDES-DTC-SSQKTEVSTVS-STQV-------------------------- ------------------------------------------------------------ -----------GPN--- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS WERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLK TSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEKEVTRMVVVMVLAFC---- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MA------QQWS-LQRLAGRHPQDS--------------------------YED------ ----STQS--SIFT------------YTNSNSTR-------------------------G PFEGPNY------HIAPRWVYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWIL VNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS WERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLK TSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEKEVTRMVVVMVLAFCFCWG PYAFFACFAAA---NPGYP-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF- ----GKK-VDDGS-ELS-SASKTEVSSV---SSV-------------------------- ------------------------------------------------------------ -----------SPA--- > 10== Z68193 1 human Red Opsin <>[] MA------QQWS-LQRLAGRHPQDS--------------------------YED------ ----STQS--SIFT------------YTNSNSTR-------------------------G PFEGPNY------HIAPRWVYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWIL VNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS WERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLK TSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEKEVTRMVVVMIFAYCVCWG PYTFFACFAAA---NPGYA-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF- ----GKK-VDDGS-ELS-SASKTEVSSV---SSV-------------------------- ------------------------------------------------------------ -----------SPA--- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MT------EAWNVAVFAARRSRDD----------------------------DD------ ----TTRG--SVFT------------YTNTNNTR-------------------------G PFEGPNY------HIAPRWVYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWIL VNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIIS WERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLK TSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEREVSRMVVVMIVAFCICWG PYASFVSFAAA---NPGYA-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF- ----GKK-VDDGS-EAS-TTSRTEVSSVS-NSSV-------------------------- ------------------------------------------------------------ -----------APA--- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-------AWE-AAFAARRRHEE----------------------------ED------ ----TTRD--SVFT------------YTNSNNTR-------------------------G PFEGPNY------HIAPRWVYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWIL VNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIIS WERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLK TSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------STQKAEKEVSRMVVVMIVAYCFCWG PYTFFACFAAA---NPGYA-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF- ----GKK-VDDGS-EVS--TSRTEVSSVS-NSSV-------------------------- ------------------------------------------------------------ -----------SPA--- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS---------------------------------------------------------- ----SNSS------------------QAPPNGTP-------------------------G PFDGPQWP-----YQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYIL VNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILA LERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLR TSCGPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD--- ------------------------------------------------------------ ------------------------------------------------------------ -----------------------------------TTQRAEREVTRMVIVMVMAFLLCWL PYSTFALVVAT---HKGII-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLC C---GYQPQRTGKASPGTPGPHADVTAAGLRNKV-------------------------- ------------------------------------------------------------ -----------MPAHPV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNVS----------------------------------------------------- SSLFGNVS--TALR-------------PEARLSA---E------TRLLGWNVPPEELRHI PEHWLTY------PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILV INLAFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIA YDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYL TSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKM NVE------------------SLRS----------------------------------- ------------------------------------------------------------ -------------------------------NVDKNKETAEIRIAKAAITICFLFFCSWT PYGVMSLIGAF---GDKTL-LTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCP WL--ALNEKAPES-SAV-ASTST---TQE-PQQT-------------------------- ------------------------------------------------------------ -----------TAA--- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNVS----------------------------------------------------- SVL-GNVS--SVLR-------------PDARLSA---E------SRLLGWNVPPDELRHI PEHWLIY------PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILV INLAFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIA YDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYL TSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKM NVD------------------SLRS----------------------------------- ------------------------------------------------------------ -------------------------------NVDKSKEAAEIRIAKAAITICFLFFASWT PYGVMSLIGAF---GDKTL-LTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCP WL--AISEKAPES-RAA-ISTST---TQE-QQQT-------------------------- ------------------------------------------------------------ -----------TAA--- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME---------------------------------------------------------- -PLCNASE--PPLR-------------PEAR-SSGNGD------LQFLGWNVPPDQIQYI PEHWLTQ------LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFV LNLAVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIG YDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYL TSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKM NVE------------------SLRS----------------------------------- ------------------------------------------------------------ -------------------------------NVDKSKETAEIRIAKAAITICFLFFVSWT PYGVMSLIGAF---GDKSL-LTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCP WL--GVNEKSGEI-SSA-QSTTT---QEQ--QQT-------------------------- ------------------------------------------------------------ -----------TAA--- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD---------------------------------------------------------- -ALCNASE--PPLR-------------PEARMSSGSDE------LQFLGWNVPPDQIQYI PEHWLTQ------LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFV LNLAVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIG YDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYL TSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKM NVE------------------SLRS----------------------------------- ------------------------------------------------------------ -------------------------------NVDKSKETAEIRIAKAAITICFLFFVSWT PYGVMSLIGAF---GDKSL-LTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCP WL--GVNEKSGEA-SSA-QSTTT---QEQ-TQQT-------------------------- ------------------------------------------------------------ -----------SAA--- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT---------------------------------------------------------- ----NATG--PQMAY-----------YGAASMDFGYPE------GVSIVDFVRPEIKPYV HQHWYNY------PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILV VNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMIS FDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGIL DSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKM NVS------------------TLRS----------------------------------- ------------------------------------------------------------ --------------------------------NEADAQRAEIRIAKTALVNVSLWFICWT PYALISLKGVM---GDTSG-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLP WF--CVHETETKS-NDD-SQSNS---TVA-Q----------------------------- ------------------------------------------------------------ -----------DKA--- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA---------------------------------------------------------- ----NVTG--PQMAF-----------YGSGAATFGYPE------GMTVADFVPDRVKHMV LDHWYNY------PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLI VNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMIS FDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGIL DSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKM NVT------------------NLRS----------------------------------- ------------------------------------------------------------ --------------------------------NEAETQRAEIRIAKTALVNVSLWFICWT PYAAITIQGLL---GNAEG-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLP WF--CVHEKDPND-VEE-NQSSN---TQT-Q----------------------------- ------------------------------------------------------------ -----------EKS--- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] MESF-------------------------------------------------------- AVAAAQLG--PHFA----------------PLS-----------NGSVVDKVTPDMAHLI SPYWNQF------PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLV INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS LDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNL TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM NVK------------------SLRS----------------------------------- ------------------------------------------------------------ --------------------------------SEDAEKSAEGKLAKVALVTITLWFMAWT PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP CC--VFGKVDDGK-SSD-AQSQA-TASEA-E----------------------------- ------------------------------------------------------------ -----------SKA--- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] MESF-------------------------------------------------------- AVAAAQLG--PHFA----------------PLS-----------NGSVVDKVTPDMAHLI SPYWNQF------PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLV INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS LDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNL TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM NVK------------------SLRS----------------------------------- ------------------------------------------------------------ --------------------------------SEDAEKSAEGKLAKVALVTITLWFMAWT PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP CC--VFGKVDDGK-SSD-AQSQA-TASEA-E----------------------------- ------------------------------------------------------------ -----------SKA--- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MDSF-------------------------------------------------------- AAVATQLG--PQFA----------------APS-----------NGSVVDKVTPDMAHLI SPYWDQF------PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLV INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS LDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNL TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM NVK------------------SLRS----------------------------------- ------------------------------------------------------------ --------------------------------SEDADKSAEGKLAKVALVTISLWFMAWT PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP CC--VFGKVDDGK-SSE-AQSQA-TTSEA-E----------------------------- ------------------------------------------------------------ -----------SKA--- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSHLP---------------------------------------------------ET PFDLAHSG--PRFQ----------------AQSSG---------NGSVLDNVLPDMAHLV NPYWSRF------APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLV LNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIA FDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNL TACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKM NVK------------------SLRS----------------------------------- ------------------------------------------------------------ --------------------------------SEDCDKSAEGKLAKVALTTISLWFMAWT PYLVICYFGLF---KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCP MC--VFGNTDEPK-PDA-PASDTETTSEA-D----------------------------- ------------------------------------------------------------ -----------SKA--- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSLLP---------------------------------------------------EP PLAMALLG--PRFE----------------AQTGG---------NRSVLDNVLPDMAPLV NPHWSRF------APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLV LNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIA FDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNL TACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKM NVK------------------SLRS----------------------------------- ------------------------------------------------------------ --------------------------------SEDCDKSAENKLAKVALTTISLWFMAWT PYLIICYFGLF---KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCP MC--VCGTTDEPK-PDA-PPSDTETTSEA-E----------------------------- ------------------------------------------------------------ -----------SKD--- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI---------------------------------------------------------- ----AVSG--PSYE----------------AFSYGGQAR---FNNQTVVDKVPPDMLHLI DANWYQY------PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFV INLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIA FDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNM TACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKM NVA------------------SLRS----------------------------------- ------------------------------------------------------------ --------------------------------SENQNTSAECKLAKVALMTISLWFMAWT PYLVINFSGIF---NL-VK-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFP SL--AC-AAEPSS-DAV-STTSG-TTTVT-DNEK-------------------------- ------------------------------------------------------------ -----------SNA--- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] M----------------------------------------------------------- -----ANQ--LSYS----------------SLGWPYQP------NASVVDTMPKEMLYMI HEHWYAF------PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLV VNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMIT LDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNL TSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKM NVA------------------SLRA----------------------------------- ------------------------------------------------------------ -------------------------------NADQQKQSAECRLAKVAMMTVGLWFMAWT PYLIISWAGVF---SSGTR-LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFP SL--ACGSGESGS-DVK-SEASA-TTTME-EKPK-------------------------- ------------------------------------------------------------ ----------IPEA--- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MV---------------------------------------------------------- ----ESTT--------------------LVNQTWWY--------NPTVD----------I HPHWAKF------DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFI INLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMIS IDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGIL TSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRL NAK------------------ELRK----------------------------------- ------------------------------------------------------------ --------------------------------AQ-AGASAEMKLAKISMVIITQFMLSWS PYAIIALLAQF---GPAEW-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFP WLLTCCQFDEKEC-EDA-NDAEE---EVV-ASER--GGESRDAAQMKEMMAMMQKMQAQQ AAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQ GAPPQGVDNQAYQA--- > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MG---------------------------------------------------------- ----RDLR---------------------DNETWWY--------NPSIV----------V HPHWREF------DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFI INLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMIS IDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVL CNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRL NAK------------------ELRK----------------------------------- ------------------------------------------------------------ --------------------------------AQ-AGANAEMRLAKISIVIVSQFLLSWS PYAVVALLAQF---GPLEW-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFP WVLTCCQFDDKET-EDD-KDAET---EIP-AGESSDAAPSADAAQMKEMMAMMQKMQQQQ AAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------ AAPPQGVDNQAYQA--- > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- ----------PHLL------------SGFLEVTASPAPTWDAPPDNVSG----------- ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLI VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG--- -----------FPRVQPESVISLNG----------------------------------- ---------------------------VVKLQKE-------------------VEECAN- -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL PFFLLSTARPFICGTSCSC-IPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQ ----CQYRNINRKLSAA-GMHEALKLAER-PERS-------------EFVL--------- ------------------------------------------------------------ ----QNSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- ----------PHLL------------SGFLEVTASPAPTWDAPPDNVSG----------- ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLI VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG--- -----------FPRVQPESVISLNG----------------------------------- ---------------------------VVKLQKE-------------------VEECAN- -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL PFFLLSTARPFICGTSCSC-IPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ ----CQYRNINRKLSAA-GMHEALKLAER-PERS-------------EFVL--------- ------------------------------------------------------------ ----QNSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] MDVLS------------------------------------------------------- --------------------------PGQGNNTTSPPAPFETGGNTTGI----------- -------------SDVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLI GSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIA LDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDA CTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKT GADTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALC------------- -----------------------------------ANGAVRQGDDGAALEVIEVHRVGNS KEHLPLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWL PFFIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK ----CKFCR--------------------------------------------------- ------------------------------------------------------------ ----------------Q > 33=p A35181 serotonin receptor class 1A - rat MDVFS------------------------------------------------------- --------------------------FGQGNNTTASQEPFGTGGNVTSI----------- -------------SDVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLI GSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIA LDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDA CTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKK GAGTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPC------------- -----------------------------------TNGAVRQGDDEATLEVIEVHRVGNS KEHLPLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWL PFFIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK ----CKFCR--------------------------------------------------- ------------------------------------------------------------ ----------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALD-VARMGGLASTPSGLRS-----TGLTTPGLSPTGLVTSDFNDSYGLTG QFINGSHS--SRSRD-----------NASANDT-----------SATNM----------T DDRYWSL------TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLI LSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIA MDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGT CIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKA RLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKK LPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNG KK-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWL PFFIIALIGPF---VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF ----GKYRRGH------------------------------------------------- ------------------------------------------------------------ ----------------R > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALD-VARMGGLASTPSGLRS-----TGLTTPGLSPTGLVTSDFNDSYGLTG QFINGSHS--SRSRD-----------NASANDT-----------SATNM----------T DDRYWSL------TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLI LSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIA MDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGT CIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKA RLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKK LPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNG KK-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWL PFFIIALIGPF---VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF ----GKYRRGH------------------------------------------------- ------------------------------------------------------------ ----------------R > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi ME---------------------------------------------------------- ----GAEG--QEELDWEALYLRLPLQNCSWNSTGWEPN-----WNVTVV----------P NTTWWQASAPFD-TPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLI LSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIA LDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLR CVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARG GVG---------------------------------------------PPP--------- VPAGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANG LEADPPTTGYGAVAAAYYPSLVRR------KPKEAADSKRERKAAKTLAIITGAFVACWL PFFVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLC ----GRRVRRRRA----------------------------------------------- ----P------------------------------------------------------- ----------------Q mafft-7.505-without-extensions/test/sample.lins10000644000175000017500000007033014224501721021264 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] --------------------MNGTE--G-------------------DNFYVPFSNKTG- -------------------------------LARSPYEYPQY----------------YL AEPW---------KYSA-----LAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILL NLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAI ERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALACAAP-PLVG-WS-----RYIPEGMQC SCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-HQGS-- DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLC-----C----GKNPLGDDE -SGA-STSK-TEVSSVS-TSPVSPA----------------------------------- ----------------------------------------------------------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] --------------------MNGTE--G-------------------PNFYVPFSNITG- -------------------------------VVRSPFEQPQY----------------YL AEPW---------QFSM-----LAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILL NLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAI ERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALACAAP-PLVG-WS-----RYIPEGMQC SCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------TTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-HQGS-- NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLC-----C----GKNPLGDDE -ASA-TASK-TETSQVA-PA---------------------------------------- ----------------------------------------------------------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 --------------------MNGTE--G-------------------INFYVPMSNKTG- -------------------------------VVRSPFEYPQY----------------YL AEPW---------KYRL-----VCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILV NLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAI ERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSCAAP-PLFG-WS-----RYMPEGMQC SCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------TTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-NKGA-- DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTIC-----C----GKNPFGDED VSSTVSQSK-TEVSSVS-SSQVSPA----------------------------------- ----------------------------------------------------------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish --------------------MNGTE--G-------------------KNFYVPMSNRTG- -------------------------------LVRSPFEYPQY----------------YL AEPW---------QFKI-----LALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILV NLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAI ERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALACAAP-PLFG-WS-----RYIPEGMQC SCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF-NKGA-- DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIF-----C----GKNPLGDDE -SSTVSTSK-TEVSSVS-PA---------------------------------------- ----------------------------------------------------------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish --------------------MNGTE--G-------------------NNFYVPLSNRTG- -------------------------------LVRSPFEYPQY----------------YL AEPW---------QFKL-----LAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILV NLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAI ERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMACAAP-PLVG-WS-----RYIPEGIQC SCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF-NKGA-- AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLF-----C----GKNPLGDEE -SSTVSTSK-TEVSSVS-PA---------------------------------------- ----------------------------------------------------------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- --------------------MKQVPEFH-------------------EDFYIPIPLDIN- -----------------------------NLSAYSPFLVPQD----------------HL GNQG---------IFMA-----MSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILV NLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAF ERWLVICKPLGNF-TFKTPHAIAGCILPWISALAASLP-PLFG-WS-----RYIPEGLQC SCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-HRGE-- EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMV-----C----GKN-IEEDE -AST-SSQV-TQVSSVA-PEK--------------------------------------- ----------------------------------------------------------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] --------------------MRKMS--E-------------------EEFYL-----FK- -----------------------------NISSVGPWDGPQY----------------HI APVW---------AFYL-----QAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILV NVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAF ERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGVSIP-PFFG-WS-----RFIPEGLQC SCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------TTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-NRNH-- GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMV-----C----GKA-MTDES -DTC-SSQK-TEVSTVS-STQVGPN----------------------------------- ----------------------------------------------------------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISW ERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVWTAP-PIFG-WS-----RYWPHGLKT SCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEKEVTRMVVVMVLAFC---------------------- ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWS-LQRLAGRHPQDSYEDSTQ--S-------------------SIFTYTNSN---- -------------------------------STRGPFEGPNY----------------HI APRW---------VYHL-----TSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILV NLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISW ERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVWTAP-PIFG-WS-----RYWPHGLKT SCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-NPGY-- PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK-VDDGS -ELS-SASK-TEVSSV---SSVSPA----------------------------------- ----------------------------------------------------------- > 10== Z68193 1 human Red Opsin <>[] MAQQWS-LQRLAGRHPQDSYEDSTQ--S-------------------SIFTYTNSN---- -------------------------------STRGPFEGPNY----------------HI APRW---------VYHL-----TSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILV NLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISW ERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVWTAP-PIFG-WS-----RYWPHGLKT SCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-NPGY-- AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK-VDDGS -ELS-SASK-TEVSSV---SSVSPA----------------------------------- ----------------------------------------------------------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD--DDTTR--G-------------------SVFTYTNTN---- -------------------------------NTRGPFEGPNY----------------HI APRW---------VYNL-----VSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILV NLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISW ERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGWSAP-PIFG-WS-----RYWPHGLKT SCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA-NPGY-- AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF----------GKK-VDDGS -EAS-TTSR-TEVSSVS-NSSVAPA----------------------------------- ----------------------------------------------------------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWE-AAFAARRRHEE--EDTTR--D-------------------SVFTYTNSN---- -------------------------------NTRGPFEGPNY----------------HI APRW---------VYNL-----TSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILV NLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISW ERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAWTAP-PIFG-WS-----RYWPHGLKT SCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------STQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-NPGY-- AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF----------GKK-VDDGS -EVS-T-SR-TEVSSVS-NSSVSPA----------------------------------- ----------------------------------------------------------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] ------------------------M--S-------------------SNSSQAPPN---- -------------------------------GTPGPFDGPQW---------------PYQ APQS---------TYVG-----VAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILV NLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILAL ERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLWSAP-PLLG-WS-----SYVPEGLRT SCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD---- ------------------------------------------------------------ ------------------------------------------------------------ -----------------TTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-HKGI-- IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EML-----CCGYQPQR-TGKAS -PGT-PGPH-ADVTAAGLRNKVMPAHPV-------------------------------- ----------------------------------------------------------- > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] -MESGNVS--SS--------LFGNV--S-------------------TALR-------P- -------------------------------EARLSA---E---TRLLGWNVPPEELRHI PEHWLTYPEPPESMNYL-----LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVI NLAFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAY DRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPWVVA-CYTETWG-----RFVPEGYLT SCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMN VE------------------SLRS------------------------------------ ------------------------------------------------------------ -------------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-GDKT-- LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQ-KRCPWL--AL---NEK-APESS -AVA-STST-TQEPQQT------TAA---------------------------------- ----------------------------------------------------------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 -MEYHNVS--SV--------L-GNV--S-------------------SVLR-------P- -------------------------------DARLSA---E---SRLLGWNVPPDELRHI PEHWLIYPEPPESMNYL-----LGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVI NLAFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAY DRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPWVVA-CYTESWG-----RFVPEGYLT SCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMN VD------------------SLRS------------------------------------ ------------------------------------------------------------ -------------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF-GDKT-- LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQ-KRCPWL--AI---SEK-APESR -AAI-STST-TQEQQQT------TAA---------------------------------- ----------------------------------------------------------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] -ME--------P--------LCNAS--E-------------------PPLR-------P- -------------------------------EAR-SSGNGD---LQFLGWNVPPDQIQYI PEHWLTQLEPPASMHYM-----LGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVL NLAVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGY DRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPWVVL-PLTQFWD-----RFVPEGYLT SCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMN VE------------------SLRS------------------------------------ ------------------------------------------------------------ -------------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS-- LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQ-KRCPWL--GV---NEK-SGEIS -SAQ-STTT-QEQQQTT------AA----------------------------------- ----------------------------------------------------------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 -MD--------A--------LCNAS--E-------------------PPLR-------P- -------------------------------EARMSSGSDE---LQFLGWNVPPDQIQYI PEHWLTQLEPPASMHYM-----LGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVL NLAVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGY DRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPWVVL-PLTQFWD-----RFVPEGYLT SCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMN VE------------------SLRS------------------------------------ ------------------------------------------------------------ -------------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS-- LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQ-KRCPWL--GV---NEK-SGEAS -SAQ-STTT-QEQTQQT------SAA---------------------------------- ----------------------------------------------------------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------MTNAT--G-------------------PQMAY-----YG- -------------------------------AASMDFGYPE---GVSIVDFVRPEIKPYV HQHWYNYPPVNPMWHYL-----LGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVV NLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISF DRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGCALP-PFFG-WG-----NYILEGILD SCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMN VS------------------TLRS------------------------------------ ------------------------------------------------------------ --------------NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM-GDTS-- GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAIT-QHLPWF--CV---HET-ETKSN -DDS-QSNS-TVAQDKA------------------------------------------- ----------------------------------------------------------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------MANVT--G-------------------PQMAF-----YG- -------------------------------SGAATFGYPE---GMTVADFVPDRVKHMV LDHWYNYPPVNPMWHYL-----LGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIV NLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISF DRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGWSLP-PFFG-WG-----SYTLEGILD SCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMN VT------------------NLRS------------------------------------ ------------------------------------------------------------ --------------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL-GNAE-- GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAIT-QHLPWF--CV---HEK-DPNDV -EEN-QSSN-TQTQEKS------------------------------------------- ----------------------------------------------------------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ------MESFAV--------AAAQL--G-------------------PHFA--------- ---------------------------------PLS--------NGSVVDKVTPDMAHLI SPYWNQFPAMDPIWAKI-----LTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVI NLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISL DRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNLT SCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMN VK------------------SLRS------------------------------------ ------------------------------------------------------------ --------------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E-- GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VF---GKV-DDGKS -SDA-QSQA-TASEAES------KA----------------------------------- ----------------------------------------------------------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ------MESFAV--------AAAQL--G-------------------PHFA--------- ---------------------------------PLS--------NGSVVDKVTPDMAHLI SPYWNQFPAMDPIWAKI-----LTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVI NLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISL DRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLT SCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMN VK------------------SLRS------------------------------------ ------------------------------------------------------------ --------------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E-- GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VF---GKV-DDGKS -SDA-QSQA-TASEAES------KA----------------------------------- ----------------------------------------------------------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' ------MDSFAA--------VATQL--G-------------------PQFA--------- ---------------------------------APS--------NGSVVDKVTPDMAHLI SPYWDQFPAMDPIWAKI-----LTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVI NLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISL DRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLT SCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMN VK------------------SLRS------------------------------------ ------------------------------------------------------------ --------------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF-KF-E-- GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VF---GKV-DDGKS -SEA-QSQA-TTSEAES------KA----------------------------------- ----------------------------------------------------------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] -MERSHLPETPF--------DLAHS--G-------------------PRFQ--------- ---------------------------------AQSSG------NGSVLDNVLPDMAHLV NPYWSRFAPMDPMMSKI-----LGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVL NLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAF DRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFWTVM-PLIG-WS-----AYVPEGNLT ACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMN VK------------------SLRS------------------------------------ ------------------------------------------------------------ --------------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF-KI-D-- GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLK-EKCPMC--VF---GNT-DEPKP -DAP-ASDTETTSEADS------KA----------------------------------- ----------------------------------------------------------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 -MERSLLPEPPL--------AMALL--G-------------------PRFE--------- ---------------------------------AQTGG------NRSVLDNVLPDMAPLV NPHWSRFAPMDPTMSKI-----LGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVL NLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAF DRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFWTIM-PLIG-WS-----SYVPEGNLT ACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMN VK------------------SLRS------------------------------------ ------------------------------------------------------------ --------------SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF-KI-D-- GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLK-EKCPMC--VC---GTT-DEPKP -DAP-PSDTETTSEAES------KD----------------------------------- ----------------------------------------------------------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------MIAVS--G-------------------PSYE--------- ---------------------------------AFSYGGQARFNNQTVVDKVPPDMLHLI DANWYQYPPLNPMWHGI-----LGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVI NLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAF DRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGWTIA-PMFG-WN-----RYVPEGNMT ACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMN VA------------------SLRS------------------------------------ ------------------------------------------------------------ --------------SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF-NL-V-- KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALF-AKFPSL--AC----AA-EPSSD -AVS-TTSG-TTTVTDN------EKSNA-------------------------------- ----------------------------------------------------------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] ----------------------MAN--Q-------------------LSYS--------- ---------------------------------SLGWPYQP---NASVVDTMPKEMLYMI HEHWYAFPPMNPLWYSI-----LGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVV NLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITL DRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGWTIL-PFFG-WS-----RYVPEGNLT SCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMN VA------------------SLRA------------------------------------ ------------------------------------------------------------ -------------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-SSGT-- RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALY-QRFPSL--AC---GSG-ESGSD -VKS-EASA-TTTMEEK------PKIPEA------------------------------- ----------------------------------------------------------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ----------------------MVE--S-------------------TTLV--------- ---------------------------------NQTWWY-----NPTV----------DI HPHWAKFDPIPDAVYYS-----VGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFII NLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISI DRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVG-PVFN-WG-----AYVPEGILT SCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLN AK------------------ELRK------------------------------------ ------------------------------------------------------------ --------------AQ-AGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF-GPAE-- WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQ-TTFPWLLTCC---QFD-EKECE -DAN-DAEE-EVVASER----GGESRDAAQMKEMMAMMQKMQAQQAAYQP---PPPPQGY PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] -----------------------MG--R-------------------DLRD--------- ---------------------------------NETWWY-----NPSI----------VV HPHWREFDQVPDAVYYS-----LGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFII NLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISI DRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIG-PIFG-WG-----AYTLEGVLC NCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLN AK------------------ELRK------------------------------------ ------------------------------------------------------------ --------------AQ-AGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-GPLE-- WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAIS-QTFPWVLTCC---QFD-DKETE -DDK-DAET-EIPAGES--SDAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGY PPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------ --------------------MPHLLSGFLEVTASPA---PTW--DA------PPDNVSGC GEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIV SLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISI DRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLP-PLFG-WA-----QNVNDDKVC LISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF------ --------PGFPRVQPESVISLNG------------------------------------ ----------------VVKLQKE------------------VEECAN------------- -----LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSR-SLL---------QCQYRNINRK LSAA-GMHE-ALKLAER------PERSEFVL------------QNSDHCGK--------- -------------------------------------------------KGHDT----- > 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------ --------------------MPHLLSGFLEVTASPA---PTW--DA------PPDNVSGC GEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIV SLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISI DRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLP-PLFG-WA-----QNVNDDKVC LISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF------ --------PGFPRVQPESVISLNG------------------------------------ ----------------VVKLQKE------------------VEECAN------------- -----LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYR-SLL---------QCQYRNINRK LSAA-GMHE-ALKLAER------PERSEFVL------------QNSDHCGK--------- -------------------------------------------------KGHDT----- > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] --------------------MDVLS-PG-------------------------------- -------------------------------QGNNT--------TSPPAPFETGGNTTGI S-------DVTVSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIG SLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIAL DRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIP-PMLG-WRTPEDRSDPD---AC TISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTG ADTRHGASPAPQPKK-----SVNG--ESGSRNWRLGVESKAGGALCANGAVRQGDDGAAL --EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS------------------FERKNERNA- ------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSC- HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KII---------KCKFCRQ--- ------------------------------------------------------------ ----------------------------------------------------------- > 33=p A35181 serotonin receptor class 1A - rat --------------------MDVFS-FG-------------------------------- -------------------------------QGNNT--------TASQEPFGTGGNVTSI S-------DVTFSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIG SLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIAL DRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIP-PMLG-WRTPEDRSDPD---AC TISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKG AGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATL --EVIEVHRVGNSKEHLPLPSESGSNSYAPAC------------------LERKNERNA- ------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSC- HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KII---------KCKFCRR--- ------------------------------------------------------------ ----------------------------------------------------------- > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] ------MANFTFGDL----ALDVAR-MGGLASTPS---GLRS-----TGLTTPGLSPTGL VTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDDRYWSL T-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLIL SLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAM DRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFISIP-PLFG-WRDPNN--DPDKTGTC IISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKAR LKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKL PENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS- ------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-- GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KIL---------FGKYRRGHR- ------------------------------------------------------------ ----------------------------------------------------------- > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail ------MANFTFGDL----ALDVAR-MGGLASTPS---GLRS-----TGLTTPGLSPTGL VTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDDRYWSL T-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLIL SLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAM DRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFISIP-PLFG-WRDPNN--DPDKTGTC IISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKAR LKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKL PENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS- ------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-- GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KIL---------FGKYRRGHR- ------------------------------------------------------------ ----------------------------------------------------------- > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi -------MEGAEGQE----ELDWEA-LY---------------------LRLP------- ------------------------LQ---NCSWNSTGWEPNW--NVTVV---PNTTWWQA S-------APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLIL SLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIAL DRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFVCIA-QLLG-WKDPDWNQRVSEDLRC VVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRR------RRG ATARGGVGPPPVPAGGALV-AGGG----------------------------SGGIAAAV ---------VAVIGRPLPTISETTTTGFTNVSSNNTSPE--KQSCANGLEADPPTTGYGA VAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-CDCE-- -VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ-RLL---------CGRRVRRRRA -----------------------PQ----------------------------------- ----------------------------------------------------------- mafft-7.505-without-extensions/test/script0000644000175000017500000000161414224501721020261 0ustar nileshnileshmafft sample > x diff x sample.fftns2 >& /dev/null || exit 1 mafft --maxiterate 100 sample > x diff x sample.fftnsi >& /dev/null || exit 1 mafft --dpparttree sample > x diff x sample.dpparttree >& /dev/null || exit 1 mafft --globalpair sample > x diff x sample.gins1 >& /dev/null || exit 1 mafft --globalpair --maxiterate 100 sample > x diff x sample.ginsi >& /dev/null || exit 1 mafft --allowshift --globalpair --maxiterate 100 sample > x diff x sample.ginsi.allowshift >& /dev/null || exit 1 mafft --localpair sample > x diff x sample.lins1 >& /dev/null || exit 1 mafft --localpair --maxiterate 100 sample > x diff x sample.linsi >& /dev/null || exit 1 mafft --parttree sample > x diff x sample.parttree >& /dev/null || exit 1 mafft-qinsi samplerna > x diff x samplerna.qinsi >& /dev/null || exit 1 mafft-xinsi samplerna > x diff x samplerna.xinsi >& /dev/null || exit 1 echo 'OK' rm x exit 0 mafft-7.505-without-extensions/test/sample.dpparttree0000644000175000017500000007264014224501721022416 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] MN-------------------GTE------GDNFYVPF----------------SNKTGL -----------ARSPYEYPQY-----YLAEPWK--------------------Y------ ------------------SALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNL AMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIER YIVICKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSC GPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAAQQ------------------------------------ESASTQKAEKEVTRM VVLMVIGFLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMN KQFRNCMITTLC----C---GKNPLGD-DE--SGASTSKTEV------------------ ------------------------------------------------------------ ---SSVS-------TSPVSP-A---------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] MN-------------------GTE------GPNFYVPF----------------SNITGV -----------VRSPFEQPQY-----YLAEPWQ--------------------F------ ------------------SMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNL AVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIER YVVVCKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSC GIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------ESATTQKAEKEVTRM VIIMVIFFLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMN KQFRNCMLTSLC----C---GKNPLGD-DE--ASATASKTE------------------- ------------------------------------------------------------ --------------TSQVAP-A---------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 MN-------------------GTE------GINFYVPM----------------SNKTGV -----------VRSPFEYPQY-----YLAEPWK--------------------Y------ ------------------RLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNL AVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIER YIVVCKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSC GPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------ESATTQKAEKEVTRM VILMVLGFMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMN KQFRNCMITTIC----C---GKNPFGD-EDVSSTVSQSKTEV------------------ ------------------------------------------------------------ ---SSVS-------SSQVSP-A---------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish MN-------------------GTE------GKNFYVPM----------------SNRTGL -----------VRSPFEYPQY-----YLAEPWQ--------------------F------ ------------------KILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNL AVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIER YIVVCKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSC GPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------DSASTQKAEREVTKM VILMVFGFLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLN KQFRNCMLTTIF----C---GKNPLGD-DE-SSTVSTSKTEV------------------ ------------------------------------------------------------ ---SS------------VSP-A---------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish MN-------------------GTE------GNNFYVPL----------------SNRTGL -----------VRSPFEYPQY-----YLAEPWQ--------------------F------ ------------------KLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNL AVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIER YIVVCKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSC GPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQQ------------------------------------DSASTQKAEREVTKM VILMVLGFLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLN KQFRSCMLTTLF----C---GKNPLGD-EE-SSTVSTSKTEV------------------ ------------------------------------------------------------ ---SS------------VSP-A---------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- MK-------------------QVPE----FHEDFYIPIPL------------DINNLSAY -------------SPFLVPQD-----HLGNQGI--------------------F------ ------------------MAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNL SIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFER WLVICKPLGN-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSC GPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAKAQA------------------------------------DSASTQKAEREVTKM VVVMVLGFLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMN KQFRSCMM-KMV----C---GKN-IEE-DE--ASTSSQVTQV------------------ ------------------------------------------------------------ ---SS------------VAPEK---------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] MR-------------------KMS------EEEFYL-----------------FKNISSV -------------GPWDGPQY-----HIAPVWA--------------------F------ ------------------YLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNV SFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFER YIVICKPFGN-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSC GPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAAQQQ------------------------------------ESATTQKAEREVSRM VVVMVGSFCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMN KQFQACIM-KMV----C---GKA-MTD-ES--DTCSSQKTEV------------------ ------------------------------------------------------------ ---STVS-------STQVGP-N---------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ---DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER WLVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAKQQK------------------------------------ESESTQKAEKEVTRM VVVMVLAFC--------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------HLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNL AVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER WMVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAKQQK------------------------------------ESESTQKAEKEVTRM VVVMVLAFCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMN RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------ ------------------------------------------------------------ ---SSV---------SSVSP-A---------- > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------HLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNL AVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER WLVVCKPFGN-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAKQQK------------------------------------ESESTQKAEKEVTRM VVVMIFAYCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMN RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------ ------------------------------------------------------------ ---SSV---------SSVSP-A---------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD-DDTTR------GSVFTYTN----------------TNNT-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------NLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNL AFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWER WFVVCKPFGN-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAAQQK------------------------------------ESESTQKAEREVSRM VVVMIVAFCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMN RQFRNCIMQLF---------GKK-VDD-GS--EASTTSRTEV------------------ ------------------------------------------------------------ ---SSVS-------NSSVAP-A---------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWEAAFAARRRHEE--EDTTR------DSVFTYTN----------------SNNT-- ------------RGPFEGPNY-----HIAPRWV--------------------Y------ ------------------NLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNL AVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWER WFVVCKPFGN-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSC GPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---VAAQQK------------------------------------ESESTQKAEKEVSRM VVVMIVAYCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMN RQFRNCILQLF---------GKK-VDD-GS--EVST-SRTEV------------------ ------------------------------------------------------------ ---SSVS-------NSSVSP-A---------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] MS-----------------SNSSQ------AP----------------------PNGT-- ------------PGPFDGPQW----PYQAPQST--------------------Y------ ------------------VGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNL AVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALER YVVVCKPLGD-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSC GPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---AAAQQK------------------------------------EADTTQRAEREVTRM VIVMVMAFLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMN KQFQSCLLEMLC----CGYQPQR-TGK-AS--PGTPGPHADV------------------ ------------------------------------------------------------ ---TAAG------LRNKVMP-AH-------PV > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] MESGNV---------------SSSLFGNVST-ALRPEARLSA---ETRLLGW-------- ------------NVPPEELR------HIPEHWL-----------TYPEPPESMN------ ------------------YLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINL AFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDR FNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSC TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LRDQAKK----------MNVESL----------------RSNVDKNKETAEIRIAKA AITICFLFFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISH PRYRMELQKRCP----WLALNEKAPE--SS-AVASTSTTQEP------------------ ------------------------------------------------------------ ---------------QQTTA-A---------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 MEYHNV---------------SSVL-GNVSS-VLRPDARLSA---ESRLLGW-------- ------------NVPPDELR------HIPEHWL-----------IYPEPPESMN------ ------------------YLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINL AFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDR YNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSC TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LRDQAKK----------MNVDSL----------------RSNVDKSKEAAEIRIAKA AITICFLFFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISH PRYRMELQKRCP----WLAISEKAPE--SR-AAISTSTTQEQ------------------ ------------------------------------------------------------ ---------------QQTTA-A---------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ME---------------------PL-CNASEPPLRPEAR-SSGNGDLQFLGW-------- ------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------ ------------------YMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNL AVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDR YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC SFDYLS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISH PRYRLELQKRCP----WLGVNEKSGE--IS-SAQST-TTQEQ------------------ ------------------------------------------------------------ ---------------QQTTA-A---------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 MD---------------------AL-CNASEPPLRPEARMSSGSDELQFLGW-------- ------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------ ------------------YMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNL AVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDR YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC SFDYLS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISH PRYRMELQKRCP----WLGVNEKSGE--AS-SAQST-TTQEQ------------------ ------------------------------------------------------------ --------------TQQTSA-A---------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 MT-------------------------NATGPQMAYYGAASMD------FGYP-EGVSIV -----------DFVRPEIKP------YVHQHWY-----------NYPPVNPMWH------ ------------------YLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNL ALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDR YNIICNGFNG-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSC SYDYLT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MRAQAKK----------MNVSTL----------------RS-NEADAQRAEIRIAKT ALVNVSLWFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISH PKYRLAITQHLP----WFCVHETETKS-ND-DSQSNSTVAQ------------------- ------------------------------------------------------------ ------------------DK-A---------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 MA-------------------------NVTGPQMAFYGSGAAT------FGYP-EGMTVA -----------DFVPDRVKH------MVLDHWY-----------NYPPVNPMWH------ ------------------YLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNL ALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDR YNIICNGFNG-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSC SYDYFT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MRAQAKK----------MNVTNL----------------RS-NEAETQRAEIRIAKT ALVNVSLWFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISH PKFRLAITQHLP----WFCVHEKDPND-VE-ENQSSNTQTQ------------------- ------------------------------------------------------------ ------------------EK-S---------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV -----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------ ------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR YQVIVKGMAG-RPMTIPLALGKM---------------------------YVPEGNLTSC GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------ ------------------------------------------------------------ ---E--------------SK-A---------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV -----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------ ------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR YQVIVKGMAG-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSC GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------ ------------------------------------------------------------ ---E--------------SK-A---------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' MD-------------------SFAAVATQLGPQFAAPS-----------------NGSVV -----------DKVTPDMAH------LISPYWD-----------QFPAMDPIWA------ ------------------KILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINL AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR YQVIVKGMAG-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSC GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDADKSAEGKLAKV ALVTISLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH PKYRLALKEKCP----CCVFGKVDDGK-SS-EAQSQATTSEA------------------ ------------------------------------------------------------ ---E--------------SK-A---------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MERSH--------------LPETPFDLAHSGPRFQAQSSG---------------NGSVL -----------DNVLPDMAH------LVNPYWS-----------RFAPMDPMMS------ ------------------KILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNL AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDR YNVIVKGING-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTAC SIDYMT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVKSL----------------RS-SEDCDKSAEGKLAKV ALTTISLWFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH PKYRIVLKEKCP----MCVFGNTDEPKPDA-PASDTETTSEA------------------ ------------------------------------------------------------ ---D--------------SK-A---------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MERSL--------------LPEPPLAMALLGPRFEAQTGG---------------NRSVL -----------DNVLPDMAP------LVNPHWS-----------RFAPMDPTMS------ ------------------KILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNL AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDR YNVIVKGING-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTAC SIDYMT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MRDQAKK----------MNVKSL----------------RS-SEDCDKSAENKLAKV ALTTISLWFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH PNDRLVLKEKCP----MCVCGTTDEPKPDA-PPSDTETTSEA------------------ ------------------------------------------------------------ ---E--------------SK-D---------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] MI-------------------------AVSGPSYEAFSYGGQA-----RF----NNQTVV -----------DKVPPDMLH------LIDANWY-----------QYPPLNPMWH------ ------------------GILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINL AISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDR YNVIVKGLSG-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTAC GTDYFN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MREQAKK----------MNVASL----------------RS-SENQNTSAECKLAKV ALMTISLWFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISH PKYRAALFAKFP----SLAC-AAEPSS-DA-VSTTSGTTTVT------------------ ------------------------------------------------------------ ---DNEK-----------SN-A---------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] M---------------------------ANQLSYSSLGWPYQP------------NASVV -----------DTMPKEMLY------MIHEHWY-----------AFPPMNPLWY------ ------------------SILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNL AFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDR YNVIVRGMAA-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSC TVDYLT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ----------- ------------------------------------------------------------ ------------------------------------------------------------ ---LREQAKK----------MNVASL----------------RANADQQKQSAECRLAKV AMMTVGLWFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISH PRYKAALYQRFP----SLACGSGESGS-DV-KSEASATTTME------------------ ------------------------------------------------------------ ---EKPK----------IPE-A---------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] MVESTTL------------VNQT--------------------------WWY---NPTVD ---------------------------IHPHWA-----------KFDPIPDAVY------ ------------------YSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINL AMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDR YNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSC SFDYLS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MAAMAKR----------LNAKEL----------------R--KAQAGASAEMKLAKI SMVIITQFMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSH PKFREAIQTTFPWLLTCCQFDEKECED-AN-DAEEEVVASER--GGESRDAAQMKEMMAM MQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAP PQVEAPQGAPPQGVDNQAYQ-A---------- > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] MGRDLR-------------DNET--------------------------WWY---NPSIV ---------------------------VHPHWR-----------EFDQVPDAVY------ ------------------YSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINL AFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDR YNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNC SFDYIS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE----------- ------------------------------------------------------------ ------------------------------------------------------------ ---MAAMAKR----------LNAKEL----------------R--KAQAGANAEMRLAKI SIVIVSQFLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSH PKFREAISQTFPWVLTCCQFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAM MQKMQQQQAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAP PQ-GAPPAAPPQGVDNQAYQ-A---------- > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- --------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------ -------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- ---------PGF---------------------------------PRVQPESVI------ ----------------SLNGVVK----------------------LQKEVEECAN----- ---LSRLLKH--------------------------------ER-KNISIFKREQKAATT LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN RDLRPTSRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ -LKLA------------------------------------------------------- ---ERPE------RSEFVLQNSDHCGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- --------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------ -------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- ---------PGF---------------------------------PRVQPESVI------ ----------------SLNGVVK----------------------LQKEVEECAN----- ---LSRLLKH--------------------------------ER-KNISIFKREQKAATT LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN RDLRTTYRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ -LKLA------------------------------------------------------- ---ERPE------RSEFVLQNSDHCGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] M-DVLS-------------PGQ--------GNNTTSPPAPFETG----------GNTTGI -------------------------SDVTVSYQ--------------------------- ------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------- ----------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGS RNWRLGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP- ----ASFERK-----------NERNA----------------EA-KRKMALARERKTVKT LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFN KDFQNAFKKIIK--------CKFCR----------------------------------- ------------------------------------------------------------ -------------------------------Q > 33=p A35181 serotonin receptor class 1A - rat M-DVFS-------------FGQ--------GNNTTASQEPFGTG----------GNVTSI -------------------------SDVTFSYQ--------------------------- ------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------- ----------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGS GDWRRCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP- ----ACLERK-----------NERNA----------------EA-KRKMALARERKTVKT LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFN KDFQNAFKKIIK--------CKFCR----------------------------------- ------------------------------------------------------------ -------------------------------R > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------- --------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS PEFRSAFQKILF--------GKYRRG---------------------------------- ------------------------------------------------------------ ------------------------------HR > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------- --------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS PEFRSAFQKILF--------GKYRRG---------------------------------- ------------------------------------------------------------ ------------------------------HR > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi M-EG-A-------------EGQEELD----WEALYLRLPLQNCS----------WNSTGW EPNW------------------NVTVVPNTTWW----------------QASAPFDTPAA LVRA--------------AAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSL AVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDR YWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RC VVS--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR----------- ----------------------------------RRGATARGGVGPPPVP---------- -----------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNV SSNNTSPEKQSCANGLEA---DPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKT LAIITGAFVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFS PEFRHAFQRLLC--------GRRVRRR----R---------------------------- ------------------------------------------------------------ -----------------------------APQ mafft-7.505-without-extensions/test/sample.fftnsi0000644000175000017500000007050414224501721021532 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] ----------------------------------------MNG----------------- TE--GDNFYVP-----FSNKTGLARSPYEYPQY---------------YLAEPWK----- ----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F TVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FR FGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNN ESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYI FTHQGS---DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------G KNPLGDDE--SGASTSKTEVSSVS-TSPVSPA---------------------------- ------------------------------------------------------------ - > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] ----------------------------------------MNG----------------- TE--GPNFYVP-----FSNITGVVRSPFEQPQY---------------YLAEPWQ----- ----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F TTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FR FGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNN ESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYI FTHQGS---NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------G KNPLGDDE--ASATASKTE------TSQVAPA---------------------------- ------------------------------------------------------------ - > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 ----------------------------------------MNG----------------- TE--GINFYVP-----MSNKTGVVRSPFEYPQY---------------YLAEPWK----- ----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F TVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FR FSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHN ESYVLYMFVIHFIIPVVVIFFSYGRLICKV----REAAAQQQ------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWI FTNKGA---DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------G KNPFGDEDVSSTVSQSKTEVSSVS-SSQVSPA---------------------------- ------------------------------------------------------------ - > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish ----------------------------------------MNG----------------- TE--GKNFYVP-----MSNRTGLVRSPFEYPQY---------------YLAEPWQ----- ----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F TVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK FSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNN ESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWI FFNKGA---DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------G KNPLGDDE-SSTVSTSKTEVSS------VSPA---------------------------- ------------------------------------------------------------ - > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish ----------------------------------------MNG----------------- TE--GNNFYVP-----LSNRTGLVRSPFEYPQY---------------YLAEPWQ----- ----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F TVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK FSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNN ESYVLYMFICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWI FFNKGA---AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------G KNPLGDEE-SSTVSTSKTEVSS------VSPA---------------------------- ------------------------------------------------------------ - > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- ----------------------------------------MKQ----------------- VPEFHEDFYIPI-PLDINNLS--AYSPFLVPQD---------------HLGNQGI----- ----FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-S PLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FT FKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNN ESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KLAAKAQA------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWI VSHRGE---EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------G KN-IEEDE--ASTSSQVTQVSS------VAPEK--------------------------- ------------------------------------------------------------ - > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] ----------------------------------------MRK----------------- MS--EEEFYL------FKNIS--SVGPWDGPQY---------------HIAPVWA----- ----FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-V FPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FR FSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRS ESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYM VNNRNH---GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------G KA-MTDES--DTCSSQKTEVSTVS-STQVGPN---------------------------- ------------------------------------------------------------ - > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------------------------DLAETVIA-S TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQK------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESESTQK------AEKEVTRMVVVMVLAFC------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ - > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] ----------------------------------------MAQQWSLQRLAGRHPQDSYE DSTQSSIFTYTN--------SNSTRGPFEGPNY---------------HIAPRWV----- ----YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S TISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VR FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQK------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFA AANPGY---PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF----------G KK-VDDGS--ELSSASKTEVSSV---SSVSPA---------------------------- ------------------------------------------------------------ - > 10== Z68193 1 human Red Opsin <>[] ----------------------------------------MAQQWSLQRLAGRHPQDSYE DSTQSSIFTYTN--------SNSTRGPFEGPNY---------------HIAPRWV----- ----YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR FDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV QSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFA AANPGY---AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF----------G KK-VDDGS--ELSSASKTEVSSV---SSVSPA---------------------------- ------------------------------------------------------------ - > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] ----------------------------------------MTEAWNVAVFAARRSRDD-D DTTRGSVFTYTN--------TNNTRGPFEGPNY---------------HIAPRWV----- ----YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-S TISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IK FDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGC QSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFA AANPGY---AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF----------G KK-VDDGS--EASTTSRTEVSSVS-NSSVAPA---------------------------- ------------------------------------------------------------ - > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] ----------------------------------------MAA-WEAA-FAARRRHEE-E DTTRDSVFTYTN--------SNNTRGPFEGPNY---------------HIAPRWV----- ----YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-S TISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IK FDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGV QSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RAVAAQQK------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFA AANPGY---AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF----------G KK-VDDGS--EVST-SRTEVSSVS-NSSVSPA---------------------------- ------------------------------------------------------------ - > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] ----------------------------------------MSS----------------- ----NSSQAPPN-G---------TPGPFDGPQWP--------------YQAPQST----- ----YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-S SVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQ FQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGS--NN NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RAAAAQQK------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVV ATHKGI---IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCC--------G YQPQRTGKASPGTPGPHADVTAAGLRNKVMPAH--------------------------- ----------------------------------------PV------------------ - > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] ----------MESGN-----VS--------SSLFGNVSTALRP----------------- EARLSAE-----------TRLLGWNVPPEELR----------------HIPEHWLTYPEP PESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--T PIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--K MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDT RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK----------------- ----------------------------------------------------------MN -----------VESL--------------------------------------------- --------------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIG AFGDKT---LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL------ ---NEKAP--ESSAVASTSTTQEP--QQTTAA---------------------------- ------------------------------------------------------------ - > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 ----------MEYHN-----VS---------SVLGNVSSVLRP----------------- DARLSAE-----------SRLLGWNVPPDELR----------------HIPEHWLIYPEP PESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--T PIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--K MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDT RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK----------------- ----------------------------------------------------------MN -----------VDSL--------------------------------------------- --------------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIG AFGDKT---LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI------ ---SEKAP--ESRAAISTSTTQEQ--QQTTAA---------------------------- ------------------------------------------------------------ - > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] ----------ME-------------------PLCNASEPPLRP----------------- EARSSGN---------GDLQFLGWNVPPDQIQ----------------YIPEHWLTQLEP PASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A PIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT RLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK----------------- ----------------------------------------------------------MN -----------VESL--------------------------------------------- --------------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIG AFGDKS---LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV------ ---NEKSG--EISSAQSTTTQEQ---QQTTAA---------------------------- ------------------------------------------------------------ - > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 ----------MD-------------------ALCNASEPPLRP----------------- EARMSSG--------SDELQFLGWNVPPDQIQ----------------YIPEHWLTQLEP PASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A PIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT RLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK----------------- ----------------------------------------------------------MN -----------VESL--------------------------------------------- --------------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIG AFGDKS---LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV------ ---NEKSG--EASSAQSTTTQEQT--QQTSAA---------------------------- ------------------------------------------------------------ - > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 --------------------------------MTNATGPQMAY----------------- YGAASMDFGYP------EGVSIVDFVRPEIKP----------------YVHQHWYNYPPV NPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-V PFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PK LTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNT FSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK----------------- ----------------------------------------------------------MN -----------VSTL--------------------------------------------- --------------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKG VMGDTS---GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------ HE-TETKS--NDDSQSNSTVAQDK-A---------------------------------- ------------------------------------------------------------ - > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 --------------------------------MANVTGPQMAF----------------- YGSGAATFGYP------EGMTVADFVPDRVKH----------------MVLDHWYNYPPV NPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-F PPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PK LTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNT ITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK----------------- ----------------------------------------------------------MN -----------VTNL--------------------------------------------- --------------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQG LLGNAE---GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------ HE-KDPND--VEENQSSNTQTQEK-S---------------------------------- ------------------------------------------------------------ - > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] ----------MESF----------------AVAAAQLGPHFAP----------------- LS----------------NGSVVDKVTPDMAH----------------LISPYWNQFPAM DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP MTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNP RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK----------------- ----------------------------------------------------------MN -----------VKSL--------------------------------------------- --------------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMG LFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------ GK-VDDGK--SSDAQSQA-TASEA-ESKA------------------------------- ------------------------------------------------------------ - > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] ----------MESF----------------AVAAAQLGPHFAP----------------- LS----------------NGSVVDKVTPDMAH----------------LISPYWNQFPAM DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP MTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK----------------- ----------------------------------------------------------MN -----------VKSL--------------------------------------------- --------------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMG LFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------ GK-VDDGK--SSDAQSQA-TASEA-ESKA------------------------------- ------------------------------------------------------------ - > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' ----------MDSF----------------AAVATQLGPQFAA----------------- PS----------------NGSVVDKVTPDMAH----------------LISPYWDQFPAM DPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP MTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK----------------- ----------------------------------------------------------MN -----------VKSL--------------------------------------------- --------------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMG LFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------ GK-VDDGK--SSEAQSQA-TTSEA-ESKA------------------------------- ------------------------------------------------------------ - > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] MER-----SHLPET----------------PFDLAHSGPRFQA----------------- QSSG--------------NGSVLDNVLPDMAH----------------LVNPYWSRFAPM DPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S PVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP MTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNP RSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK----------------- ----------------------------------------------------------MN -----------VKSL--------------------------------------------- --------------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFG LFKI-D---GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------ GN-TDEPK--PDAPASDTETTSEA-DSKA------------------------------- ------------------------------------------------------------ - > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 MER-----SLLPEP----------------PLAMALLGPRFEA----------------- QTGG--------------NRSVLDNVLPDMAP----------------LVNPHWSRFAPM DPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S PVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP MTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNP RSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK----------------- ----------------------------------------------------------MN -----------VKSL--------------------------------------------- --------------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFG LFKI-D---GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------ GT-TDEPK--PDAPPSDTETTSEA-ESKD------------------------------- ------------------------------------------------------------ - > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] --------------------------------MIAVSGPSYEA----------------- FSYGGQA--------RFNNQTVVDKVPPDMLH----------------LIDANWYQYPPL NPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-S PPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KP LSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLS ASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK----------------- ----------------------------------------------------------MN -----------VASL--------------------------------------------- --------------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSG IFNL-V---KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC------ -A-AEPSS--DAVSTTSGTTTVTD-NEKSNA----------------------------- ------------------------------------------------------------ - > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] MAN---------------------------QLSYSSLGWPYQP----------------- ------------------NASVVDTMPKEMLY----------------MIHEHWYAFPPM NPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-M PTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-AP LTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSS ASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK----------------- ----------------------------------------------------------MN -----------VASL--------------------------------------------- --------------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAG VFSSGT---RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------ GS-GESGS--DVKSEASATTTMEE-KPKIPEA---------------------------- ------------------------------------------------------------ - > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ---------------------------------------MVES----------------- TTLVNQTWWY--------NPTVD--------------------------IHPHWAKFDPI PDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGF PLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKK MSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPST RSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR----------------- ----------------------------------------------------------LN -----------AKEL--------------------------------------------- --------------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLA QFGPAE---WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDE KE-CEDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY --PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQ A > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] ----------------------------------------MGR----------------- DLRDNETWWY--------NPSIV--------------------------VHPHWREFDQV PDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGF PLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKK MSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTT RSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR----------------- ----------------------------------------------------------LN -----------AKEL--------------------------------------------- --------------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLA QFGPLE---WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDD KE-TEDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGY APPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQ A > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- ----------PHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVE------------ ----KVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- -GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-------------- -------------------PGF-----------------PRVQ---------PESVISLN G----------VVKLQK--------------------------EVEECAN---------- --------LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTAR PFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR---NIN RK-LSAAGMHEALKLAERPERSEFVLQNSDHCGKK------------------------- ----------------------------------------------------GHDT---- - > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- ----------PHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVE------------ ----KVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- -GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-------------- -------------------PGF-----------------PRVQ---------PESVISLN G----------VVKLQK--------------------------EVEECAN---------- --------LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTAR PFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR---NIN RK-LSAAGMHEALKLAERPERSEFVLQNSDHCGKK------------------------- ----------------------------------------------------GHDT---- - > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] MDVLSPGQ--------------G------------------------------------- ------------------NNTTSPPAPFETGGNTT-------------GISDVTVSY--- ----QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH--------- -GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK----------------- ----------------------TVKKVEKTGADTRHGASPAPQ---------PKKS--VN GESGSRNWRLGVESK-----AGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSE AG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVL PFCESSC--HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC------ RQ---------------------------------------------------------- ------------------------------------------------------------ - > 33=p A35181 serotonin receptor class 1A - rat MDVFSFGQ--------------G------------------------------------- ------------------NNTTASQEPFGTGGNVT-------------SISDVTFSY--- ----QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH--------- -GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK----------------- ----------------------TVRKVEKKGAGTSLGTSSAPP---------PKKS--LN GQPGSGDWRRCAENR-----AVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSE SG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVL PFCESSC--HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC------ RR---------------------------------------------------------- ------------------------------------------------------------ - > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] MANFTFGDLALDVAR-----MGGLASTPSGLRSTGLTTPGLSPTGL-------------V TSDFNDSYGLTG---QFINGSHSSRSRDNASANDTSATN---------MTDDRYWSLTVY SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- -GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTL VASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVN SNSSS------SERL----KQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISSNDTP YS------------RT-REKLELK------RERKAARTLAIITGAFLICWLPFFIIALIG PFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RG HR---------------------------------------------------------- ------------------------------------------------------------ - > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail MANFTFGDLALDVAR-----MGGLASTPSGLRSTGLTTPGLSPTGL-------------V TSDFNDSYGLTG---QFINGSHSSRSRDNASANDTSATN---------MTDDRYWSLTVY SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- -GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTL VASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVN SNSSS------SERL----KQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISSNDTP YS------------RT-REKLELK------RERKAARTLAIITGAFLICWLPFFIIALIG PFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RG HR---------------------------------------------------------- ------------------------------------------------------------ - > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi MEGAE-GQEELDWEA-----LY--LRLP--LQNCSWNSTGWEPNWN-------------V TVVPNTTWWQ-------------ASAPFDTPAALVR------------------------ ----AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M PLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHA STAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV--------- -GYQIFATASSFYVPVLIILILYWRIY-------QTARKRIRR----------------- ----------------------------RRGATARGGVGPPPV---------PAGGALVA GGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSP---EKQSCANGLEADPPTTG YGAVAAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILV PTCDCE----VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RR RR---------------------------------------------------------- ---------------------------------------APQ------------------ - mafft-7.505-without-extensions/test/sample.linsi0000644000175000017500000007147414224501721021366 0ustar nileshnilesh> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] M------------------NGTE-G------DNFYVPF----SNKTGLARSPYEYPQY-- --------------------------------------------------YLAEPW---- ---------------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLA MANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERY IVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCSCG PDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEKEVTRMVVLMVIGFLVCWVPYAS VAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTL----- CC-----GKNPLGDDE-SGA-STSK-TEVSSVS-TSPVSPA------------------- ------------------------------------------------------------ --------------- > 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] M------------------NGTE-G------PNFYVPF----SNITGVVRSPFEQPQY-- --------------------------------------------------YLAEPW---- ---------------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA VADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERY VVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCSCG IDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------TTQKAEKEVTRMVIIMVIFFLICWLPYAS VAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL----- CC-----GKNPLGDDE-ASA-TASK-TETSQVA-PA------------------------ ------------------------------------------------------------ --------------- > 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 M------------------NGTE-G------INFYVPM----SNKTGVVRSPFEYPQY-- --------------------------------------------------YLAEPW---- ---------------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLA VADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERY IVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCSCG PDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------TTQKAEKEVTRMVILMVLGFMLAWTPYAV VAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI----- CC-----GKNPFGDEDVSSTVSQSK-TEVSSVS-SSQVSPA------------------- ------------------------------------------------------------ --------------- > 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish M------------------NGTE-G------KNFYVPM----SNRTGLVRSPFEYPQY-- --------------------------------------------------YLAEPW---- ---------------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLA VAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERY IVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCSCG PDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEREVTKMVILMVFGFLIAWTPYAT VAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI----- FC-----GKNPLGDDE-SSTVSTSK-TEVSSVS-PA------------------------ ------------------------------------------------------------ --------------- > 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish M------------------NGTE-G------NNFYVPL----SNRTGLVRSPFEYPQY-- --------------------------------------------------YLAEPW---- ---------------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLA VAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERY IVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCSCG PDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEREVTKMVILMVLGFLVAWTPYAT VAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL----- FC-----GKNPLGDEE-SSTVSTSK-TEVSSVS-PA------------------------ ------------------------------------------------------------ --------------- > 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- M------------------KQVP-EFH----EDFYIPIPL--DINNLSAYSPFLVPQD-- --------------------------------------------------HLGNQG---- ---------------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLS IANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERW LVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQCSCG PDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEREVTKMVVVMVLGFLVCWAPYAS FSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV----- C------GKN-IEEDE-AST-SSQV-TQVSSVA-PEK----------------------- ------------------------------------------------------------ --------------- > 7== M13299 1 human BCP <>[Science232(4747),193-202'86] M------------------RKMS-E------EEFYL-------FKNISSVGPWDGPQY-- --------------------------------------------------HIAPVW---- ---------------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVS FGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERY IVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQCSCG PDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------TTQKAEREVSRMVVVMVGSFCVCYVPYAA FAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV----- C------GKA-MTDES-DTC-SSQK-TEVSTVS-STQVGPN------------------- ------------------------------------------------------------ --------------- > 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW LVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCG PDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEKEVTRMVVVMVLAFC-------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------- > 9== K03494 1 human GCP <>[Science232(4747),193-202'86] MAQQWSLQRLAGRHPQDSYEDSTQS------SIFTY-------TNSNSTRGPFEGPNY-- --------------------------------------------------HIAPRW---- ---------------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLA VADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW MVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCG PDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEKEVTRMVVVMVLAFCFCWGPYAF FACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF----- -------GKK-VDDGS-ELS-SASK-TEVSSVS---SVSPA------------------- ------------------------------------------------------------ --------------- > 10== Z68193 1 human Red Opsin <>[] MAQQWSLQRLAGRHPQDSYEDSTQS------SIFTY-------TNSNSTRGPFEGPNY-- --------------------------------------------------HIAPRW---- ---------------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLA VADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW LVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLKTSCG PDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEKEVTRMVVVMIFAYCVCWGPYTF FACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF----- -------GKK-VDDGS-ELS-SASK-TEVSSVS---SVSPA------------------- ------------------------------------------------------------ --------------- > 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] MTEAWNVAVFAARRSRDD-DDTTRG------SVFTY-------TNTNNTRGPFEGPNY-- --------------------------------------------------HIAPRW---- ---------------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLA FVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERW FVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLKTSCG PDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEREVSRMVVVMIVAFCICWGPYAS FVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF----- -------GKK-VDDGS-EAS-TTSR-TEVSSVS-NSSVAPA------------------- ------------------------------------------------------------ --------------- > 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] MA-AWE-AAFAARRRHEE-EDTTRD------SVFTY-------TNSNNTRGPFEGPNY-- --------------------------------------------------HIAPRW---- ---------------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLA VADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERW FVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLKTSCG PDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------STQKAEKEVSRMVVVMIVAYCFCWGPYTF FACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF----- -------GKK-VDDGS-EVS-T-SR-TEVSSVS-NSSVSPA------------------- ------------------------------------------------------------ --------------- > 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] -----------------------MS------SNSSQ-------APPNGTPGPFDGPQW-- -------------------------------------------------PYQAPQS---- ---------------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLA VADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERY VVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLRTSCG PNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------TTQRAEREVTRMVIVMVMAFLLCWLPYST FALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML----- CCGYQ--PQR-TGKAS-PGT-PGPH-ADVTAAGLRNKVMPAHPV---------------- ------------------------------------------------------------ --------------- > 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] M------------------ESGNVS------SSLF------------GNVSTALRPEA-- -------------------------RLSA---E---TRLLGWNVPPEELRHIPEHWLTYP ----------EPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLA FCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRF NVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTSCT FDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVES ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS-------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGV MSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWL-- AL-----NEK-APESS-AVA-STST-TQEPQQT--------------------------- TAA--------------------------------------------------------- --------------- > 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 M------------------EYHNVS------SVL-------------GNVSSVLRPDA-- -------------------------RLSA---E---SRLLGWNVPPDELRHIPEHWLIYP ----------EPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLA FCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRY NVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTSCT FDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS-------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGV MSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWL-- AI-----SEK-APESR-AAI-STST-TQEQQQT--------------------------- TAA--------------------------------------------------------- --------------- > 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] M------------------EPLC------------------------NASEPPLRPEA-- -------------------------R-SSGNGD---LQFLGWNVPPDQIQYIPEHWLTQL ----------EPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLA VFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRY NVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCS FDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVES ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGV MSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWL-- GV-----NEK-SGEIS-SAQ-STTT-QEQQQTT--------------------------- AA---------------------------------------------------------- --------------- > 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 M------------------DALC------------------------NASEPPLRPEA-- -------------------------RMSSGSDE---LQFLGWNVPPDQIQYIPEHWLTQL ----------EPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLA VFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRY NVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCS FDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVES ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGV MSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWL-- GV-----NEK-SGEAS-SAQ-STTT-QEQTQQT--------------------------- SAA--------------------------------------------------------- --------------- > 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 M------------------TNAT-------------------------------GPQMAY -------YG--------------AASMDFGYPE---GVSIVDFVRPEIKPYVHQHWYNYP ----------PVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLA LSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRY NIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGILDSCS YDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVST ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------NEADAQRAEIRIAKTALVNVSLWFICWTPYAL ISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWF-- CV-----HET-ETKSN-DDS-QSNS-TVAQDKA--------------------------- ------------------------------------------------------------ --------------- > 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 M------------------ANVT-------------------------------GPQMAF -------YG--------------SGAATFGYPE---GMTVADFVPDRVKHMVLDHWYNYP ----------PVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLA LSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRY NIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGILDSCS YDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTN ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------NEAETQRAEIRIAKTALVNVSLWFICWTPYAA ITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWF-- CV-----HEK-DPNDV-EEN-QSSN-TQTQEKS--------------------------- ------------------------------------------------------------ --------------- > 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] M------------------ESFA-------------------------VAAAQLGPHF-- ------------------------APLS--------NGSVVDKVTPDMAHLISPYWNQFP ----------AMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY QVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNLTSCG IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLV INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC-- VF-----GKV-DDGKS-SDA-QSQA-TASEAES------KA------------------- ------------------------------------------------------------ --------------- > 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] M------------------ESFA-------------------------VAAAQLGPHF-- ------------------------APLS--------NGSVVDKVTPDMAHLISPYWNQFP ----------AMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY QVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLTSCG IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLV INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC-- VF-----GKV-DDGKS-SDA-QSQA-TASEAES------KA------------------- ------------------------------------------------------------ --------------- > 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' M------------------DSFA-------------------------AVATQLGPQF-- ------------------------AAPS--------NGSVVDKVTPDMAHLISPYWDQFP ----------AMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY QVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLTSCG IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------SEDADKSAEGKLAKVALVTISLWFMAWTPYLV INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC-- VF-----GKV-DDGKS-SEA-QSQA-TTSEAES------KA------------------- ------------------------------------------------------------ --------------- > 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] M------------------ERSH--------------------LPETPFDLAHSGPRF-- ------------------------QAQSSG------NGSVLDNVLPDMAHLVNPYWSRFA ----------PMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLA FSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRY NVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNLTACS IDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLV ICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMC-- VF-----GNT-DEPKP-DAP-ASDTETTSEADS------KA------------------- ------------------------------------------------------------ --------------- > 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 M------------------ERSL--------------------LPEPPLAMALLGPRF-- ------------------------EAQTGG------NRSVLDNVLPDMAPLVNPHWSRFA ----------PMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLA FSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRY NVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNLTACS IDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------SEDCDKSAENKLAKVALTTISLWFMAWTPYLI ICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMC-- VC-----GTT-DEPKP-DAP-PSDTETTSEAES------KD------------------- ------------------------------------------------------------ --------------- > 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] M-------------------------------------------------IAVSGPSY-- ------------------------EAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYP ----------PLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLA ISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRY NVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNMTACG TDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVAS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRS--------SENQNTSAECKLAKVALMTISLWFMAWTPYLV INFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSL-- AC-----AA--EPSSD-AVS-TTSG-TTTVTDN------EK------------------S NA---------------------------------------------------------- --------------- > 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] M------------------ANQL---------------------------------SY-- ------------------------SSLGWPYQP---NASVVDTMPKEMLYMIHEHWYAFP ----------PMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLA FSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRY NVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNLTSCT VDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVAS ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRA-------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLI ISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSL-- AC-----GSG-ESGSD-VKS-EASA-TTTMEEK------PKIPEA--------------- ------------------------------------------------------------ --------------- > 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] M------------------VEST----------------------TLVNQTWWYNPTV-- --------------------------------------------------DIHPHWAKFD ----------PIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLA MSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRY NVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGILTSCS FDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKE ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRK---------AQAGASAEMKLAKISMVIITQFMLSWSPYAI IALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLT CCQFD------EKECE-DAN-DAEE-EVVASER----GGESRDAAQMKEMMAMMQKMQAQ QAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAP QGAPPQGVDNQAYQA > 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] M------------------GRDL-----------------------RDNETWWYNPSI-- --------------------------------------------------VVHPHWREFD ----------QVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLA FSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRY NVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVLCNCS FDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKE ------------------------------------------------------------ ------------------------------------------------------------ -----------------LRK---------AQAGANAEMRLAKISIVIVSQFLLSWSPYAV VALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLT CCQFD------DKETE-DDK-DAET-EIPAGES--SDAAPSADAAQMKEMMAMMQKMQQQ QAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP----- -AAPPQGVDNQAYQA > 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] M----------------------------------------------------------- --------------------P---HLLS--------GFL-------EVTASPAPTWDAPP DNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLA LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG------- -------FPRVQPESVISLNG--------------------------------------- -----------------------VVKLQKE-------------------VEECAN----- -------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL LSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL----- QCQYRNINRK-------LSAAGMHE-ALKLAER------PERSEFVL------------Q NSDH-------------------------------------------------------- -------CGKKGHDT > 31=p A47425 serotonin receptor 5HT-7 - rat M----------------------------------------------------------- --------------------P---HLLS--------GFL-------EVTASPAPTWDAPP DNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLA LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG------- -------FPRVQPESVISLNG--------------------------------------- -----------------------VVKLQKE-------------------VEECAN----- -------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL LSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL----- QCQYRNINRK-------LSAAGMHE-ALKLAER------PERSEFVL------------Q NSDH-------------------------------------------------------- -------CGKKGHDT > 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] M------------------DVLSPG-------------------------------QG-- ------------------------NNTTSPPAPF-E---------------TGGNTTGIS ----------DVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLA VTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRY WAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTIS KDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT RHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALC----------------- -------------------------------ANGAVRQGDDGAALEVIEVHRVGNSKEHL PLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFI VALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII----- KCKFCRQ----------------------------------------------------- ------------------------------------------------------------ --------------- > 33=p A35181 serotonin receptor class 1A - rat M------------------DVFSFG-------------------------------QG-- ------------------------NNTTASQEPF-G---------------TGGNVTSIS ----------DVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLA VTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRY WAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTIS KDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT SLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPC----------------- -------------------------------TNGAVRQGDDEATLEVIEVHRVGNSKEHL PLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFI VALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII----- KCKFCRR----------------------------------------------------- ------------------------------------------------------------ --------------- > 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] M------------------ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLV TSDFNDSYGLTGQFINGSHSSRSRDNASANDT--------------SATNMTDDRYWSLT ----------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLA VADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRY WAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIIS QDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKT EETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN ANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK-- -----------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFI IALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL----- FGKYRRGHR--------------------------------------------------- ------------------------------------------------------------ --------------- > 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail M------------------ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLV TSDFNDSYGLTGQFINGSHSSRSRDNASANDT--------------SATNMTDDRYWSLT ----------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLA VADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRY WAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIIS QDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKT EETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN ANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK-- -----------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFI IALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL----- FGKYRRGHR--------------------------------------------------- ------------------------------------------------------------ --------------- > 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi M------------------EGAE-GQEELDWEALYLRLPL-------------------- ------------------------QNCSWNSTGWEPNW--------NVTVVPNTTWWQAS -----APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLA VADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRY WAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVS QDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGVGP -------PP-----------------------------------------------VPAG GALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANGLEAD PPTTGYGAVAAAYYPSLVRRK------PKEAADSKRERKAAKTLAIITGAFVACWLPFFV LAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL----- CGRRVRRRRA-----------------------------PQ------------------- ------------------------------------------------------------ --------------- mafft-7.505-without-extensions/test/samplerna.qinsi0000644000175000017500000000357414224501721022070 0ustar nileshnilesh>AJ006331.1_1230 ---------------------------------------------------ccauggcgu uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagu------------------- ------------------------------------------------------------ >Z84287.1_1250 -------------------------------uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu------------------- ------------------------------------------------------------ >AF064490.1_2296 ------------------------------------------------------------ -----ugagugucgaacagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg >Z84230.1_1250 -------------------------------uucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcucgagagu------------------- ------------------------------------------------------------ >AB049100.1_1360 auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg