config.h.in0000644001210100001440000000350011577654215012307 0ustar olifriusers/* config.h.in. Generated from configure.ac by autoheader. */ /* Define to 1 if you have the header file. */ #undef HAVE_GETOPT_H /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if your system has a GNU libc compatible `malloc' function, and to 0 otherwise. */ #undef HAVE_MALLOC /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Define to 1 if your system has a GNU libc compatible `realloc' function, and to 0 otherwise. */ #undef HAVE_REALLOC /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Define to empty if `const' does not conform to ANSI C. */ #undef const /* Define to rpl_malloc if the replacement function should be used. */ #undef malloc /* Define to rpl_realloc if the replacement function should be used. */ #undef realloc configure0000755001210100001440000041120611577654214012200 0ustar olifriusers#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for FULL-PACKAGE-NAME VERSION. # # Report bugs to . # # Copyright (C) 2003 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then set -o posix fi DUALCASE=1; export DUALCASE # for MKS sh # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # Work around bugs in pre-3.0 UWIN ksh. $as_unset ENV MAIL MAILPATH PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. for as_var in \ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ LC_TELEPHONE LC_TIME do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else $as_unset $as_var fi done # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)$' \| \ . : '\(.\)' 2>/dev/null || echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } /^X\/\(\/\/\)$/{ s//\1/; q; } /^X\/\(\/\).*/{ s//\1/; q; } s/.*/./; q'` # PATH needs CR, and LINENO needs CR and PATH. # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then echo "#! /bin/sh" >conf$$.sh echo "exit 0" >>conf$$.sh chmod +x conf$$.sh if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then PATH_SEPARATOR=';' else PATH_SEPARATOR=: fi rm -f conf$$.sh fi as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" || { # Find who we are. Look in the path if we contain no path at all # relative or not. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 { (exit 1); exit 1; }; } fi case $CONFIG_SHELL in '') as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for as_base in sh bash ksh sh5; do case $as_dir in /*) if ("$as_dir/$as_base" -c ' as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } CONFIG_SHELL=$as_dir/$as_base export CONFIG_SHELL exec "$CONFIG_SHELL" "$0" ${1+"$@"} fi;; esac done done ;; esac # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line before each line; the second 'sed' does the real # work. The second script uses 'N' to pair each line-number line # with the numbered line, and appends trailing '-' during # substitution so that $LINENO is not a special case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) sed '=' <$as_myself | sed ' N s,$,-, : loop s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, t loop s,-$,, s,^['$as_cr_digits']*\n,, ' >$as_me.lineno && chmod +x $as_me.lineno || { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensible to this). . ./$as_me.lineno # Exit status is that of the last command. exit } case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in *c*,-n*) ECHO_N= ECHO_C=' ' ECHO_T=' ' ;; *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; *) ECHO_N= ECHO_C='\c' ECHO_T= ;; esac if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then # We could just check for DJGPP; but this test a) works b) is more generic # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). if test -f conf$$.exe; then # Don't use ln at all; we don't have any links as_ln_s='cp -p' else as_ln_s='ln -s' fi elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.file if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_executable_p="test -f" # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" # IFS # We need space, tab and new line, in precisely that order. as_nl=' ' IFS=" $as_nl" # CDPATH. $as_unset CDPATH # Name of the host. # hostname on some systems (SVR3.2, Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` exec 6>&1 # # Initializations. # ac_default_prefix=/usr/local ac_config_libobj_dir=. cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. # This variable seems obsolete. It should probably be removed, and # only ac_max_sed_lines should be used. : ${ac_max_here_lines=38} # Identity of this package. PACKAGE_NAME='FULL-PACKAGE-NAME' PACKAGE_TARNAME='full-package-name' PACKAGE_VERSION='VERSION' PACKAGE_STRING='FULL-PACKAGE-NAME VERSION' PACKAGE_BUGREPORT='BUG-REPORT-ADDRESS' ac_unique_file="kalign2_hirschberg.h" # Factoring default headers for most tests. ac_includes_default="\ #include #if HAVE_SYS_TYPES_H # include #endif #if HAVE_SYS_STAT_H # include #endif #if STDC_HEADERS # include # include #else # if HAVE_STDLIB_H # include # endif #endif #if HAVE_STRING_H # if !STDC_HEADERS && HAVE_MEMORY_H # include # endif # include #endif #if HAVE_STRINGS_H # include #endif #if HAVE_INTTYPES_H # include #else # if HAVE_STDINT_H # include # endif #endif #if HAVE_UNISTD_H # include #endif" ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP LIBOBJS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. ac_init_help= ac_init_version=false # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' infodir='${prefix}/info' mandir='${prefix}/man' ac_prev= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval "$ac_prev=\$ac_option" ac_prev= continue fi ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_option in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ | --da=*) datadir=$ac_optarg ;; -disable-* | --disable-*) ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } ac_feature=`echo $ac_feature | sed 's/-/_/g'` eval "enable_$ac_feature=no" ;; -enable-* | --enable-*) ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } ac_feature=`echo $ac_feature | sed 's/-/_/g'` case $ac_option in *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; *) ac_optarg=yes ;; esac eval "enable_$ac_feature='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst \ | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* \ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } ac_package=`echo $ac_package| sed 's/-/_/g'` case $ac_option in *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; *) ac_optarg=yes ;; esac eval "with_$ac_package='$ac_optarg'" ;; -without-* | --without-*) ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } ac_package=`echo $ac_package | sed 's/-/_/g'` eval "with_$ac_package=no" ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) { echo "$as_me: error: unrecognized option: $ac_option Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 { (exit 1); exit 1; }; } ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` eval "$ac_envvar='$ac_optarg'" export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` { echo "$as_me: error: missing argument to $ac_option" >&2 { (exit 1); exit 1; }; } fi # Be sure to have absolute paths. for ac_var in exec_prefix prefix do eval ac_val=$`echo $ac_var` case $ac_val in [\\/$]* | ?:[\\/]* | NONE | '' ) ;; *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; };; esac done # Be sure to have absolute paths. for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ localstatedir libdir includedir oldincludedir infodir mandir do eval ac_val=$`echo $ac_var` case $ac_val in [\\/$]* | ?:[\\/]* ) ;; *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; };; esac done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. If a cross compiler is detected then cross compile mode will be used." >&2 elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then its parent. ac_confdir=`(dirname "$0") 2>/dev/null || $as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$0" : 'X\(//\)[^/]' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$0" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` srcdir=$ac_confdir if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r $srcdir/$ac_unique_file; then if test "$ac_srcdir_defaulted" = yes; then { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 { (exit 1); exit 1; }; } else { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 { (exit 1); exit 1; }; } fi fi (cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 { (exit 1); exit 1; }; } srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` ac_env_build_alias_set=${build_alias+set} ac_env_build_alias_value=$build_alias ac_cv_env_build_alias_set=${build_alias+set} ac_cv_env_build_alias_value=$build_alias ac_env_host_alias_set=${host_alias+set} ac_env_host_alias_value=$host_alias ac_cv_env_host_alias_set=${host_alias+set} ac_cv_env_host_alias_value=$host_alias ac_env_target_alias_set=${target_alias+set} ac_env_target_alias_value=$target_alias ac_cv_env_target_alias_set=${target_alias+set} ac_cv_env_target_alias_value=$target_alias ac_env_CC_set=${CC+set} ac_env_CC_value=$CC ac_cv_env_CC_set=${CC+set} ac_cv_env_CC_value=$CC ac_env_CFLAGS_set=${CFLAGS+set} ac_env_CFLAGS_value=$CFLAGS ac_cv_env_CFLAGS_set=${CFLAGS+set} ac_cv_env_CFLAGS_value=$CFLAGS ac_env_LDFLAGS_set=${LDFLAGS+set} ac_env_LDFLAGS_value=$LDFLAGS ac_cv_env_LDFLAGS_set=${LDFLAGS+set} ac_cv_env_LDFLAGS_value=$LDFLAGS ac_env_CPPFLAGS_set=${CPPFLAGS+set} ac_env_CPPFLAGS_value=$CPPFLAGS ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} ac_cv_env_CPPFLAGS_value=$CPPFLAGS ac_env_CPP_set=${CPP+set} ac_env_CPP_value=$CPP ac_cv_env_CPP_set=${CPP+set} ac_cv_env_CPP_value=$CPP # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures FULL-PACKAGE-NAME VERSION to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] _ACEOF cat <<_ACEOF Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --datadir=DIR read-only architecture-independent data [PREFIX/share] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --infodir=DIR info documentation [PREFIX/info] --mandir=DIR man documentation [PREFIX/man] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of FULL-PACKAGE-NAME VERSION:";; esac cat <<\_ACEOF Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . _ACEOF fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. ac_popdir=`pwd` for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d $ac_dir || continue ac_builddir=. if test "$ac_dir" != .; then ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` # A "../" for each directory in $ac_dir_suffix. ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` else ac_dir_suffix= ac_top_builddir= fi case $srcdir in .) # No --srcdir option. We are building in place. ac_srcdir=. if test -z "$ac_top_builddir"; then ac_top_srcdir=. else ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` fi ;; [\\/]* | ?:[\\/]* ) # Absolute path. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ;; *) # Relative path. ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_builddir$srcdir ;; esac # Do not use `cd foo && pwd` to compute absolute paths, because # the directories may not exist. case `pwd` in .) ac_abs_builddir="$ac_dir";; *) case "$ac_dir" in .) ac_abs_builddir=`pwd`;; [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; *) ac_abs_builddir=`pwd`/"$ac_dir";; esac;; esac case $ac_abs_builddir in .) ac_abs_top_builddir=${ac_top_builddir}.;; *) case ${ac_top_builddir}. in .) ac_abs_top_builddir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; esac;; esac case $ac_abs_builddir in .) ac_abs_srcdir=$ac_srcdir;; *) case $ac_srcdir in .) ac_abs_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; esac;; esac case $ac_abs_builddir in .) ac_abs_top_srcdir=$ac_top_srcdir;; *) case $ac_top_srcdir in .) ac_abs_top_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; esac;; esac cd $ac_dir # Check for guested configure; otherwise get Cygnus style configure. if test -f $ac_srcdir/configure.gnu; then echo $SHELL $ac_srcdir/configure.gnu --help=recursive elif test -f $ac_srcdir/configure; then echo $SHELL $ac_srcdir/configure --help=recursive elif test -f $ac_srcdir/configure.ac || test -f $ac_srcdir/configure.in; then echo $ac_configure --help else echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi cd $ac_popdir done fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF FULL-PACKAGE-NAME configure VERSION generated by GNU Autoconf 2.59 Copyright (C) 2003 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit 0 fi exec 5>config.log cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by FULL-PACKAGE-NAME $as_me VERSION, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ _ACEOF { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` hostinfo = `(hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. echo "PATH: $as_dir" done } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_sep= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; 2) ac_configure_args1="$ac_configure_args1 '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" # Get rid of the leading space. ac_sep=" " ;; esac done done $as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Be sure not to use single quotes in there, as some shells, # such as our DU 5.0 friend, will then `close' the trap. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo cat <<\_ASBOX ## ---------------- ## ## Cache variables. ## ## ---------------- ## _ASBOX echo # The following way of writing the cache mishandles newlines in values, { (set) 2>&1 | case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in *ac_space=\ *) sed -n \ "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" ;; *) sed -n \ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" ;; esac; } echo cat <<\_ASBOX ## ----------------- ## ## Output variables. ## ## ----------------- ## _ASBOX echo for ac_var in $ac_subst_vars do eval ac_val=$`echo $ac_var` echo "$ac_var='"'"'$ac_val'"'"'" done | sort echo if test -n "$ac_subst_files"; then cat <<\_ASBOX ## ------------- ## ## Output files. ## ## ------------- ## _ASBOX echo for ac_var in $ac_subst_files do eval ac_val=$`echo $ac_var` echo "$ac_var='"'"'$ac_val'"'"'" done | sort echo fi if test -s confdefs.h; then cat <<\_ASBOX ## ----------- ## ## confdefs.h. ## ## ----------- ## _ASBOX echo sed "/^$/d" confdefs.h | sort echo fi test "$ac_signal" != 0 && echo "$as_me: caught signal $ac_signal" echo "$as_me: exit $exit_status" } >&5 rm -f core *.core && rm -rf conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -rf conftest* confdefs.h # AIX cpp loses on an empty file, so make sure it contains at least a newline. echo >confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer explicitly selected file to automatically selected ones. if test -z "$CONFIG_SITE"; then if test "x$prefix" != xNONE; then CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" else CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi fi for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special # files actually), so we avoid doing that. if test -f "$cache_file"; then { echo "$as_me:$LINENO: loading cache $cache_file" >&5 echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . $cache_file;; *) . ./$cache_file;; esac fi else { echo "$as_me:$LINENO: creating cache $cache_file" >&5 echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in `(set) 2>&1 | sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val="\$ac_cv_env_${ac_var}_value" eval ac_new_val="\$ac_env_${ac_var}_value" case $ac_old_set,$ac_new_set in set,) { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 echo "$as_me: former value: $ac_old_val" >&2;} { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 echo "$as_me: current value: $ac_new_val" >&2;} ac_cache_corrupted=: fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 echo "$as_me: error: changes in the environment can compromise the build" >&2;} { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_config_headers="$ac_config_headers config.h" # Checks for programs. ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi CC=$ac_ct_CC else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi CC=$ac_ct_CC else CC="$ac_cv_prog_CC" fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi test -n "$ac_ct_CC" && break done CC=$ac_ct_CC fi fi test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&5 echo "$as_me: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } # Provide some information about the compiler. echo "$as_me:$LINENO:" \ "checking for C compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 (eval $ac_compiler --version &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 (eval $ac_compiler -v &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 (eval $ac_compiler -V &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 (eval $ac_link_default) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Find the output, starting from the most likely. This scheme is # not robust to junk in `.', hence go to wildcards (a.*) only as a last # resort. # Be careful to initialize this variable, since it used to be cached. # Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. ac_cv_exeext= # b.out is created by i960 compilers. for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; conftest.$ac_ext ) # This is the source file. ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` # FIXME: I believe we export ac_cv_exeext for Libtool, # but it would be cool to find out if it's true. Does anybody # maintain Libtool? --akim. export ac_cv_exeext break;; * ) break;; esac done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: C compiler cannot create executables See \`config.log' for more details." >&5 echo "$as_me: error: C compiler cannot create executables See \`config.log' for more details." >&2;} { (exit 77); exit 77; }; } fi ac_exeext=$ac_cv_exeext echo "$as_me:$LINENO: result: $ac_file" >&5 echo "${ECHO_T}$ac_file" >&6 # Check the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. echo "$as_me:$LINENO: checking whether the C compiler works" >&5 echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 # FIXME: These cross compiler hacks should be removed for Autoconf 3.0 # If not cross compiling, check that we can run a simple program. if test "$cross_compiling" != yes; then if { ac_try='./$ac_file' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { echo "$as_me:$LINENO: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&5 echo "$as_me: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi fi fi echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6 rm -f a.out a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save # Check the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 echo "$as_me:$LINENO: result: $cross_compiling" >&5 echo "${ECHO_T}$cross_compiling" >&6 echo "$as_me:$LINENO: checking for suffix of executables" >&5 echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` export ac_cv_exeext break;; * ) break;; esac done else { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&5 echo "$as_me: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f conftest$ac_cv_exeext echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 echo "${ECHO_T}$ac_cv_exeext" >&6 rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT echo "$as_me:$LINENO: checking for suffix of object files" >&5 echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 if test "${ac_cv_objext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&5 echo "$as_me: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 echo "${ECHO_T}$ac_cv_objext" >&6 OBJEXT=$ac_cv_objext ac_objext=$OBJEXT echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 if test "${ac_cv_c_compiler_gnu+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_compiler_gnu=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 GCC=`test $ac_compiler_gnu = yes && echo yes` ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS CFLAGS="-g" echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 if test "${ac_cv_prog_cc_g+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_prog_cc_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_prog_cc_g=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 if test "${ac_cv_prog_cc_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_prog_cc_stdc=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std1 is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std1. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF # Don't try gcc -ansi; that turns off useful extensions and # breaks some systems' header files. # AIX -qlanglvl=ansi # Ultrix and OSF/1 -std1 # HP-UX 10.20 and later -Ae # HP-UX older versions -Aa -D_HPUX_SOURCE # SVR4 -Xc -D__EXTENSIONS__ for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_prog_cc_stdc=$ac_arg break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.err conftest.$ac_objext done rm -f conftest.$ac_ext conftest.$ac_objext CC=$ac_save_CC fi case "x$ac_cv_prog_cc_stdc" in x|xno) echo "$as_me:$LINENO: result: none needed" >&5 echo "${ECHO_T}none needed" >&6 ;; *) echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 CC="$CC $ac_cv_prog_cc_stdc" ;; esac # Some people use a C++ compiler to compile C. Since we use `exit', # in C++ we need to declare it. In case someone uses the same compiler # for both compiling C and C++ we need to have the C++ compiler decide # the declaration of exit, since it's the most demanding environment. cat >conftest.$ac_ext <<_ACEOF #ifndef __cplusplus choke me #endif _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then for ac_declaration in \ '' \ 'extern "C" void std::exit (int) throw (); using std::exit;' \ 'extern "C" void std::exit (int); using std::exit;' \ 'extern "C" void exit (int) throw ();' \ 'extern "C" void exit (int);' \ 'void exit (int);' do cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_declaration #include int main () { exit (42); ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 continue fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_declaration int main () { exit (42); ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext done rm -f conftest* if test -n "$ac_declaration"; then echo '#ifdef __cplusplus' >>confdefs.h echo $ac_declaration >>confdefs.h echo '#endif' >>confdefs.h fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Checks for libraries. # Checks for header files. ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if test "${ac_cv_prog_CPP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether non-existent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi echo "$as_me:$LINENO: result: $CPP" >&5 echo "${ECHO_T}$CPP" >&6 ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether non-existent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&5 echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu echo "$as_me:$LINENO: checking for egrep" >&5 echo $ECHO_N "checking for egrep... $ECHO_C" >&6 if test "${ac_cv_prog_egrep+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if echo a | (grep -E '(a|b)') >/dev/null 2>&1 then ac_cv_prog_egrep='grep -E' else ac_cv_prog_egrep='egrep' fi fi echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5 echo "${ECHO_T}$ac_cv_prog_egrep" >&6 EGREP=$ac_cv_prog_egrep echo "$as_me:$LINENO: checking for ANSI C header files" >&5 echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 if test "${ac_cv_header_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_header_stdc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdc=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); exit (0); } _ACEOF rm -f conftest$ac_exeext if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi fi echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 echo "${ECHO_T}$ac_cv_header_stdc" >&6 if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF #define STDC_HEADERS 1 _ACEOF fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval "$as_ac_Header=no" fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in stdlib.h string.h unistd.h getopt.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` if eval "test \"\${$as_ac_Header+set}\" = set"; then echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 else # Is the header compilable? echo "$as_me:$LINENO: checking $ac_header usability" >&5 echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? echo "$as_me:$LINENO: checking $ac_header presence" >&5 echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## --------------------------------- ## ## Report this to BUG-REPORT-ADDRESS ## ## --------------------------------- ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 fi if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done # Checks for typedefs, structures, and compiler characteristics. echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5 echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6 if test "${ac_cv_c_const+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { /* FIXME: Include the comments suggested by Paul. */ #ifndef __cplusplus /* Ultrix mips cc rejects this. */ typedef int charset[2]; const charset x; /* SunOS 4.1.1 cc rejects this. */ char const *const *ccp; char **p; /* NEC SVR4.0.2 mips cc rejects this. */ struct point {int x, y;}; static struct point const zero = {0,0}; /* AIX XL C 1.02.0.0 rejects this. It does not let you subtract one const X* pointer from another in an arm of an if-expression whose if-part is not a constant expression */ const char *g = "string"; ccp = &g + (g ? g-g : 0); /* HPUX 7.0 cc rejects these. */ ++ccp; p = (char**) ccp; ccp = (char const *const *) p; { /* SCO 3.2v4 cc rejects this. */ char *t; char const *s = 0 ? (char *) 0 : (char const *) 0; *t++ = 0; } { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ int x[] = {25, 17}; const int *foo = &x[0]; ++foo; } { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ typedef const int *iptr; iptr p = 0; ++p; } { /* AIX XL C 1.02.0.0 rejects this saying "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ struct s { int j; const int *ap[3]; }; struct s *b; b->j = 5; } { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ const int foo = 10; } #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_c_const=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_c_const=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5 echo "${ECHO_T}$ac_cv_c_const" >&6 if test $ac_cv_c_const = no; then cat >>confdefs.h <<\_ACEOF #define const _ACEOF fi # Checks for library functions. for ac_header in stdlib.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` if eval "test \"\${$as_ac_Header+set}\" = set"; then echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 else # Is the header compilable? echo "$as_me:$LINENO: checking $ac_header usability" >&5 echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? echo "$as_me:$LINENO: checking $ac_header presence" >&5 echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## --------------------------------- ## ## Report this to BUG-REPORT-ADDRESS ## ## --------------------------------- ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 fi if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done echo "$as_me:$LINENO: checking for GNU libc compatible malloc" >&5 echo $ECHO_N "checking for GNU libc compatible malloc... $ECHO_C" >&6 if test "${ac_cv_func_malloc_0_nonnull+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ac_cv_func_malloc_0_nonnull=no else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #if STDC_HEADERS || HAVE_STDLIB_H # include #else char *malloc (); #endif int main () { exit (malloc (0) ? 0 : 1); ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_func_malloc_0_nonnull=yes else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_func_malloc_0_nonnull=no fi rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi echo "$as_me:$LINENO: result: $ac_cv_func_malloc_0_nonnull" >&5 echo "${ECHO_T}$ac_cv_func_malloc_0_nonnull" >&6 if test $ac_cv_func_malloc_0_nonnull = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_MALLOC 1 _ACEOF else cat >>confdefs.h <<\_ACEOF #define HAVE_MALLOC 0 _ACEOF case $LIBOBJS in "malloc.$ac_objext" | \ *" malloc.$ac_objext" | \ "malloc.$ac_objext "* | \ *" malloc.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS malloc.$ac_objext" ;; esac cat >>confdefs.h <<\_ACEOF #define malloc rpl_malloc _ACEOF fi for ac_header in stdlib.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` if eval "test \"\${$as_ac_Header+set}\" = set"; then echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 else # Is the header compilable? echo "$as_me:$LINENO: checking $ac_header usability" >&5 echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? echo "$as_me:$LINENO: checking $ac_header presence" >&5 echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## --------------------------------- ## ## Report this to BUG-REPORT-ADDRESS ## ## --------------------------------- ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 fi if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done echo "$as_me:$LINENO: checking for GNU libc compatible realloc" >&5 echo $ECHO_N "checking for GNU libc compatible realloc... $ECHO_C" >&6 if test "${ac_cv_func_realloc_0_nonnull+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ac_cv_func_realloc_0_nonnull=no else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #if STDC_HEADERS || HAVE_STDLIB_H # include #else char *realloc (); #endif int main () { exit (realloc (0, 0) ? 0 : 1); ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_func_realloc_0_nonnull=yes else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_func_realloc_0_nonnull=no fi rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi echo "$as_me:$LINENO: result: $ac_cv_func_realloc_0_nonnull" >&5 echo "${ECHO_T}$ac_cv_func_realloc_0_nonnull" >&6 if test $ac_cv_func_realloc_0_nonnull = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_REALLOC 1 _ACEOF else cat >>confdefs.h <<\_ACEOF #define HAVE_REALLOC 0 _ACEOF case $LIBOBJS in "realloc.$ac_objext" | \ *" realloc.$ac_objext" | \ "realloc.$ac_objext "* | \ *" realloc.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS realloc.$ac_objext" ;; esac cat >>confdefs.h <<\_ACEOF #define realloc rpl_realloc _ACEOF fi ac_config_files="$ac_config_files Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. { (set) 2>&1 | case `(ac_space=' '; set | grep ac_space) 2>&1` in *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote # substitution turns \\\\ into \\, and sed turns \\ into \). sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n \ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" ;; esac; } | sed ' t clear : clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ : end' >>confcache if diff $cache_file confcache >/dev/null 2>&1; then :; else if test -w $cache_file; then test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" cat confcache >$cache_file else echo "not updating unwritable cache $cache_file" fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # VPATH may cause trouble with some makes, so we remove $(srcdir), # ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=/{ s/:*\$(srcdir):*/:/; s/:*\${srcdir}:*/:/; s/:*@srcdir@:*/:/; s/^\([^=]*=[ ]*\):*/\1/; s/:*$//; s/^[^=]*=[ ]*$//; }' fi DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_i=`echo "$ac_i" | sed 's/\$U\././;s/\.o$//;s/\.obj$//'` # 2. Add them. ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : ${CONFIG_STATUS=./config.status} ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 echo "$as_me: creating $CONFIG_STATUS" >&6;} cat >$CONFIG_STATUS <<_ACEOF #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then set -o posix fi DUALCASE=1; export DUALCASE # for MKS sh # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # Work around bugs in pre-3.0 UWIN ksh. $as_unset ENV MAIL MAILPATH PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. for as_var in \ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ LC_TELEPHONE LC_TIME do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else $as_unset $as_var fi done # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)$' \| \ . : '\(.\)' 2>/dev/null || echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } /^X\/\(\/\/\)$/{ s//\1/; q; } /^X\/\(\/\).*/{ s//\1/; q; } s/.*/./; q'` # PATH needs CR, and LINENO needs CR and PATH. # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then echo "#! /bin/sh" >conf$$.sh echo "exit 0" >>conf$$.sh chmod +x conf$$.sh if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then PATH_SEPARATOR=';' else PATH_SEPARATOR=: fi rm -f conf$$.sh fi as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" || { # Find who we are. Look in the path if we contain no path at all # relative or not. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} { (exit 1); exit 1; }; } fi case $CONFIG_SHELL in '') as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for as_base in sh bash ksh sh5; do case $as_dir in /*) if ("$as_dir/$as_base" -c ' as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } CONFIG_SHELL=$as_dir/$as_base export CONFIG_SHELL exec "$CONFIG_SHELL" "$0" ${1+"$@"} fi;; esac done done ;; esac # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line before each line; the second 'sed' does the real # work. The second script uses 'N' to pair each line-number line # with the numbered line, and appends trailing '-' during # substitution so that $LINENO is not a special case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) sed '=' <$as_myself | sed ' N s,$,-, : loop s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, t loop s,-$,, s,^['$as_cr_digits']*\n,, ' >$as_me.lineno && chmod +x $as_me.lineno || { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensible to this). . ./$as_me.lineno # Exit status is that of the last command. exit } case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in *c*,-n*) ECHO_N= ECHO_C=' ' ECHO_T=' ' ;; *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; *) ECHO_N= ECHO_C='\c' ECHO_T= ;; esac if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then # We could just check for DJGPP; but this test a) works b) is more generic # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). if test -f conf$$.exe; then # Don't use ln at all; we don't have any links as_ln_s='cp -p' else as_ln_s='ln -s' fi elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.file if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_executable_p="test -f" # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" # IFS # We need space, tab and new line, in precisely that order. as_nl=' ' IFS=" $as_nl" # CDPATH. $as_unset CDPATH exec 6>&1 # Open the log real soon, to keep \$[0] and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. Logging --version etc. is OK. exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX } >&5 cat >&5 <<_CSEOF This file was extended by FULL-PACKAGE-NAME $as_me VERSION, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ _CSEOF echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 echo >&5 _ACEOF # Files that config.status was made for. if test -n "$ac_config_files"; then echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS fi if test -n "$ac_config_headers"; then echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS fi if test -n "$ac_config_links"; then echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS fi if test -n "$ac_config_commands"; then echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS fi cat >>$CONFIG_STATUS <<\_ACEOF ac_cs_usage="\ \`$as_me' instantiates files from templates according to the current configuration. Usage: $0 [OPTIONS] [FILE]... -h, --help print this help, then exit -V, --version print version number, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE --header=FILE[:TEMPLATE] instantiate the configuration header FILE Configuration files: $config_files Configuration headers: $config_headers Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ FULL-PACKAGE-NAME config.status VERSION configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" Copyright (C) 2003 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." srcdir=$srcdir _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # If no file are specified by the user, then we need to provide default # value. By we need to know if files were specified by the user. ac_need_defaults=: while test $# != 0 do case $1 in --*=*) ac_option=`expr "x$1" : 'x\([^=]*\)='` ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` ac_shift=: ;; -*) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; *) # This is not an option, so the user has probably given explicit # arguments. ac_option=$1 ac_need_defaults=false;; esac case $ac_option in # Handling of the options. _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --vers* | -V ) echo "$ac_cs_version"; exit 0 ;; --he | --h) # Conflict between --help and --header { { echo "$as_me:$LINENO: error: ambiguous option: $1 Try \`$0 --help' for more information." >&5 echo "$as_me: error: ambiguous option: $1 Try \`$0 --help' for more information." >&2;} { (exit 1); exit 1; }; };; --help | --hel | -h ) echo "$ac_cs_usage"; exit 0 ;; --debug | --d* | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift CONFIG_FILES="$CONFIG_FILES $ac_optarg" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" ac_need_defaults=false;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 Try \`$0 --help' for more information." >&5 echo "$as_me: error: unrecognized option: $1 Try \`$0 --help' for more information." >&2;} { (exit 1); exit 1; }; } ;; *) ac_config_targets="$ac_config_targets $1" ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF if \$ac_cs_recheck; then echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF for ac_config_target in $ac_config_targets do case "$ac_config_target" in # Handling of arguments. "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 echo "$as_me: error: invalid argument: $ac_config_target" >&2;} { (exit 1); exit 1; }; };; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason to put it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Create a temporary directory, and hook for its removal unless debugging. $debug || { trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 trap '{ (exit 1); exit 1; }' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" } || { tmp=./confstat$$-$RANDOM (umask 077 && mkdir $tmp) } || { echo "$me: cannot create a temporary directory in ." >&2 { (exit 1); exit 1; } } _ACEOF cat >>$CONFIG_STATUS <<_ACEOF # # CONFIG_FILES section. # # No need to generate the scripts if there are no CONFIG_FILES. # This happens for instance when ./config.status config.h if test -n "\$CONFIG_FILES"; then # Protect against being on the right side of a sed subst in config.status. sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF s,@SHELL@,$SHELL,;t t s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t s,@exec_prefix@,$exec_prefix,;t t s,@prefix@,$prefix,;t t s,@program_transform_name@,$program_transform_name,;t t s,@bindir@,$bindir,;t t s,@sbindir@,$sbindir,;t t s,@libexecdir@,$libexecdir,;t t s,@datadir@,$datadir,;t t s,@sysconfdir@,$sysconfdir,;t t s,@sharedstatedir@,$sharedstatedir,;t t s,@localstatedir@,$localstatedir,;t t s,@libdir@,$libdir,;t t s,@includedir@,$includedir,;t t s,@oldincludedir@,$oldincludedir,;t t s,@infodir@,$infodir,;t t s,@mandir@,$mandir,;t t s,@build_alias@,$build_alias,;t t s,@host_alias@,$host_alias,;t t s,@target_alias@,$target_alias,;t t s,@DEFS@,$DEFS,;t t s,@ECHO_C@,$ECHO_C,;t t s,@ECHO_N@,$ECHO_N,;t t s,@ECHO_T@,$ECHO_T,;t t s,@LIBS@,$LIBS,;t t s,@CC@,$CC,;t t s,@CFLAGS@,$CFLAGS,;t t s,@LDFLAGS@,$LDFLAGS,;t t s,@CPPFLAGS@,$CPPFLAGS,;t t s,@ac_ct_CC@,$ac_ct_CC,;t t s,@EXEEXT@,$EXEEXT,;t t s,@OBJEXT@,$OBJEXT,;t t s,@CPP@,$CPP,;t t s,@EGREP@,$EGREP,;t t s,@LIBOBJS@,$LIBOBJS,;t t s,@LTLIBOBJS@,$LTLIBOBJS,;t t CEOF _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # Split the substitutions into bite-sized pieces for seds with # small command number limits, like on Digital OSF/1 and HP-UX. ac_max_sed_lines=48 ac_sed_frag=1 # Number of current file. ac_beg=1 # First line for current file. ac_end=$ac_max_sed_lines # Line after last line for current file. ac_more_lines=: ac_sed_cmds= while $ac_more_lines; do if test $ac_beg -gt 1; then sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag else sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag fi if test ! -s $tmp/subs.frag; then ac_more_lines=false else # The purpose of the label and of the branching condition is to # speed up the sed processing (if there are no `@' at all, there # is no need to browse any of the substitutions). # These are the two extra sed commands mentioned above. (echo ':t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed if test -z "$ac_sed_cmds"; then ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" else ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" fi ac_sed_frag=`expr $ac_sed_frag + 1` ac_beg=$ac_end ac_end=`expr $ac_end + $ac_max_sed_lines` fi done if test -z "$ac_sed_cmds"; then ac_sed_cmds=cat fi fi # test -n "$CONFIG_FILES" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case $ac_file in - | *:- | *:-:* ) # input from stdin cat >$tmp/stdin ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; * ) ac_file_in=$ac_file.in ;; esac # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. ac_dir=`(dirname "$ac_file") 2>/dev/null || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` { if $as_mkdir_p; then mkdir -p "$ac_dir" else as_dir="$ac_dir" as_dirs= while test ! -d "$as_dir"; do as_dirs="$as_dir $as_dirs" as_dir=`(dirname "$as_dir") 2>/dev/null || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` done test ! -n "$as_dirs" || mkdir $as_dirs fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} { (exit 1); exit 1; }; }; } ac_builddir=. if test "$ac_dir" != .; then ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` # A "../" for each directory in $ac_dir_suffix. ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` else ac_dir_suffix= ac_top_builddir= fi case $srcdir in .) # No --srcdir option. We are building in place. ac_srcdir=. if test -z "$ac_top_builddir"; then ac_top_srcdir=. else ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` fi ;; [\\/]* | ?:[\\/]* ) # Absolute path. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ;; *) # Relative path. ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_builddir$srcdir ;; esac # Do not use `cd foo && pwd` to compute absolute paths, because # the directories may not exist. case `pwd` in .) ac_abs_builddir="$ac_dir";; *) case "$ac_dir" in .) ac_abs_builddir=`pwd`;; [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; *) ac_abs_builddir=`pwd`/"$ac_dir";; esac;; esac case $ac_abs_builddir in .) ac_abs_top_builddir=${ac_top_builddir}.;; *) case ${ac_top_builddir}. in .) ac_abs_top_builddir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; esac;; esac case $ac_abs_builddir in .) ac_abs_srcdir=$ac_srcdir;; *) case $ac_srcdir in .) ac_abs_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; esac;; esac case $ac_abs_builddir in .) ac_abs_top_srcdir=$ac_top_srcdir;; *) case $ac_top_srcdir in .) ac_abs_top_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; esac;; esac if test x"$ac_file" != x-; then { echo "$as_me:$LINENO: creating $ac_file" >&5 echo "$as_me: creating $ac_file" >&6;} rm -f "$ac_file" fi # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ if test x"$ac_file" = x-; then configure_input= else configure_input="$ac_file. " fi configure_input=$configure_input"Generated from `echo $ac_file_in | sed 's,.*/,,'` by configure." # First look for the input files in the build tree, otherwise in the # src tree. ac_file_inputs=`IFS=: for f in $ac_file_in; do case $f in -) echo $tmp/stdin ;; [\\/$]*) # Absolute (can't be DOS-style, as IFS=:) test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 echo "$as_me: error: cannot find input file: $f" >&2;} { (exit 1); exit 1; }; } echo "$f";; *) # Relative if test -f "$f"; then # Build tree echo "$f" elif test -f "$srcdir/$f"; then # Source tree echo "$srcdir/$f" else # /dev/null tree { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 echo "$as_me: error: cannot find input file: $f" >&2;} { (exit 1); exit 1; }; } fi;; esac done` || { (exit 1); exit 1; } _ACEOF cat >>$CONFIG_STATUS <<_ACEOF sed "$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s,@configure_input@,$configure_input,;t t s,@srcdir@,$ac_srcdir,;t t s,@abs_srcdir@,$ac_abs_srcdir,;t t s,@top_srcdir@,$ac_top_srcdir,;t t s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t s,@builddir@,$ac_builddir,;t t s,@abs_builddir@,$ac_abs_builddir,;t t s,@top_builddir@,$ac_top_builddir,;t t s,@abs_top_builddir@,$ac_abs_top_builddir,;t t " $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out rm -f $tmp/stdin if test x"$ac_file" != x-; then mv $tmp/out $ac_file else cat $tmp/out rm -f $tmp/out fi done _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # # CONFIG_HEADER section. # # These sed commands are passed to sed as "A NAME B NAME C VALUE D", where # NAME is the cpp macro being defined and VALUE is the value it is being given. # # ac_d sets the value in "#define NAME VALUE" lines. ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' ac_dB='[ ].*$,\1#\2' ac_dC=' ' ac_dD=',;t' # ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' ac_uB='$,\1#\2define\3' ac_uC=' ' ac_uD=',;t' for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case $ac_file in - | *:- | *:-:* ) # input from stdin cat >$tmp/stdin ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; * ) ac_file_in=$ac_file.in ;; esac test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 echo "$as_me: creating $ac_file" >&6;} # First look for the input files in the build tree, otherwise in the # src tree. ac_file_inputs=`IFS=: for f in $ac_file_in; do case $f in -) echo $tmp/stdin ;; [\\/$]*) # Absolute (can't be DOS-style, as IFS=:) test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 echo "$as_me: error: cannot find input file: $f" >&2;} { (exit 1); exit 1; }; } # Do quote $f, to prevent DOS paths from being IFS'd. echo "$f";; *) # Relative if test -f "$f"; then # Build tree echo "$f" elif test -f "$srcdir/$f"; then # Source tree echo "$srcdir/$f" else # /dev/null tree { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 echo "$as_me: error: cannot find input file: $f" >&2;} { (exit 1); exit 1; }; } fi;; esac done` || { (exit 1); exit 1; } # Remove the trailing spaces. sed 's/[ ]*$//' $ac_file_inputs >$tmp/in _ACEOF # Transform confdefs.h into two sed scripts, `conftest.defines' and # `conftest.undefs', that substitutes the proper values into # config.h.in to produce config.h. The first handles `#define' # templates, and the second `#undef' templates. # And first: Protect against being on the right side of a sed subst in # config.status. Protect against being in an unquoted here document # in config.status. rm -f conftest.defines conftest.undefs # Using a here document instead of a string reduces the quoting nightmare. # Putting comments in sed scripts is not portable. # # `end' is used to avoid that the second main sed command (meant for # 0-ary CPP macros) applies to n-ary macro definitions. # See the Autoconf documentation for `clear'. cat >confdef2sed.sed <<\_ACEOF s/[\\&,]/\\&/g s,[\\$`],\\&,g t clear : clear s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp t end s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp : end _ACEOF # If some macros were called several times there might be several times # the same #defines, which is useless. Nevertheless, we may not want to # sort them, since we want the *last* AC-DEFINE to be honored. uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs rm -f confdef2sed.sed # This sed command replaces #undef with comments. This is necessary, for # example, in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. cat >>conftest.undefs <<\_ACEOF s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */, _ACEOF # Break up conftest.defines because some shells have a limit on the size # of here documents, and old seds have small limits too (100 cmds). echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS echo ' :' >>$CONFIG_STATUS rm -f conftest.tail while grep . conftest.defines >/dev/null do # Write a limited-size here document to $tmp/defines.sed. echo ' cat >$tmp/defines.sed <>$CONFIG_STATUS # Speed up: don't consider the non `#define' lines. echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS # Work around the forget-to-reset-the-flag bug. echo 't clr' >>$CONFIG_STATUS echo ': clr' >>$CONFIG_STATUS sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS echo 'CEOF sed -f $tmp/defines.sed $tmp/in >$tmp/out rm -f $tmp/in mv $tmp/out $tmp/in ' >>$CONFIG_STATUS sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail rm -f conftest.defines mv conftest.tail conftest.defines done rm -f conftest.defines echo ' fi # grep' >>$CONFIG_STATUS echo >>$CONFIG_STATUS # Break up conftest.undefs because some shells have a limit on the size # of here documents, and old seds have small limits too (100 cmds). echo ' # Handle all the #undef templates' >>$CONFIG_STATUS rm -f conftest.tail while grep . conftest.undefs >/dev/null do # Write a limited-size here document to $tmp/undefs.sed. echo ' cat >$tmp/undefs.sed <>$CONFIG_STATUS # Speed up: don't consider the non `#undef' echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS # Work around the forget-to-reset-the-flag bug. echo 't clr' >>$CONFIG_STATUS echo ': clr' >>$CONFIG_STATUS sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS echo 'CEOF sed -f $tmp/undefs.sed $tmp/in >$tmp/out rm -f $tmp/in mv $tmp/out $tmp/in ' >>$CONFIG_STATUS sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail rm -f conftest.undefs mv conftest.tail conftest.undefs done rm -f conftest.undefs cat >>$CONFIG_STATUS <<\_ACEOF # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ if test x"$ac_file" = x-; then echo "/* Generated by configure. */" >$tmp/config.h else echo "/* $ac_file. Generated by configure. */" >$tmp/config.h fi cat $tmp/in >>$tmp/config.h rm -f $tmp/in if test x"$ac_file" != x-; then if diff $ac_file $tmp/config.h >/dev/null 2>&1; then { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 echo "$as_me: $ac_file is unchanged" >&6;} else ac_dir=`(dirname "$ac_file") 2>/dev/null || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` { if $as_mkdir_p; then mkdir -p "$ac_dir" else as_dir="$ac_dir" as_dirs= while test ! -d "$as_dir"; do as_dirs="$as_dir $as_dirs" as_dir=`(dirname "$as_dir") 2>/dev/null || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` done test ! -n "$as_dirs" || mkdir $as_dirs fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} { (exit 1); exit 1; }; }; } rm -f $ac_file mv $tmp/config.h $ac_file fi else cat $tmp/config.h rm -f $tmp/config.h fi done _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF { (exit 0); exit 0; } _ACEOF chmod +x $CONFIG_STATUS ac_clean_files=$ac_clean_files_save # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || { (exit 1); exit 1; } fi configure.ac0000644001210100001440000000114411577654215012554 0ustar olifriusers# -*- Autoconf -*- # Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) AC_INIT(FULL-PACKAGE-NAME, VERSION, BUG-REPORT-ADDRESS) AC_CONFIG_SRCDIR([kalign2_hirschberg.h]) AC_CONFIG_HEADER([config.h]) # Checks for programs. AC_PROG_CC # Checks for libraries. # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([stdlib.h string.h unistd.h getopt.h]) # Checks for typedefs, structures, and compiler characteristics. AC_C_CONST # Checks for library functions. AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CONFIG_FILES([Makefile]) AC_OUTPUT COPYING0000644001210100001440000004313111577654215011323 0ustar olifriusers GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. kalign2_advanced_gaps.c0000644001210100001440000006372511577654215014635 0ustar olifriusers/* kalign2_advanced_gaps.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_advanced_gaps.h" int** advanced_hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength,float internal_gap_weight) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); // fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ // hirsch_path[j] = -1; map[c][j] = -1; // map[c][j] = 0; } // map[c][0] = len_a; //map[c][len_a+len_b+1] = 3; if (a < numseq){ profile[a] = advanced_make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = advanced_make_profile(profile[b],aln->s[b],len_b,submatrix); } //set_gap_penalties(profile[a],len_a,aln->nsip[b]); advanced_smooth_gaps(profile[a],len_a,window,strength); //set_gap_penalties(profile[b],len_b,aln->nsip[a]); advanced_smooth_gaps(profile[b],len_b,window,strength); hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(len_a < len_b){ map[c] = advanced_hirsch_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = advanced_hirsch_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*64*(map[c][0]+2)); profile[c] = advanced_update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b],internal_gap_weight); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } float* advanced_make_profile(float* prof, int* seq,int len,float** subm) { int i,j,c; prof = malloc(sizeof(float)*(len+2)*64); prof += (64 *(len+1)); for (i = 0;i < 64;i++){ prof[i] = 0; } prof[23+32] = -gpo; prof[24+32] = -gpe; prof[25+32] = -tgpe; prof[26] = 1; i = len; while(i--){ prof -= 64; for (j = 0;j < 64;j++){ prof[j] = 0; } prof[26] = 1;//number of residues // both additive c = seq[i]; prof[c] += 1.0; prof += 32; for(j = 23;j--;){ prof[j] = subm[c][j]; } prof[23] = -gpo; prof[24] = -gpe; prof[25] = -tgpe; prof -= 32; } prof -= 64; for (i = 0;i < 64;i++){ prof[i] = 0; } prof[23+32] = -gpo; prof[24+32] = -gpe; prof[25+32] = -tgpe; prof[26] = 1; return prof; } void advanced_smooth_gaps(float* prof,int len,int window,float strength) { float tmp_gpo; float tmp_gpe; float tmp_tgpe; int i,j; if(!(window &1)){ window--; } for ( i = (window/2); i < len - (window/2);i++){ tmp_gpo = 0.0; tmp_gpe = 0.0; tmp_tgpe = 0.0; for (j = -(window/2); j < (window/2);j++){ tmp_gpo += (float)prof[55+((i+j)*64)]*strength; tmp_gpe += (float) prof[56+((i+j)*64)]*strength; tmp_tgpe += (float) prof[57+((i+j)*64)]*strength; } tmp_gpo /= window; tmp_gpe /= window; tmp_tgpe /= window; prof[27+(i*64)] = prof[55+(i*64)]*(1.0-strength) + tmp_gpo; prof[28+(i*64)] = prof[56+(i*64)]*(1.0-strength) + tmp_gpe; prof[29+(i*64)] = prof[57+(i*64)]*(1.0-strength) + tmp_tgpe; } } float* advanced_update(const float* profa,const float* profb,float* newp,int* path,int sipa,int sipb,float internal_gap_weight) { int i,j,c; for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; newp += 64; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = 64; i--;){ newp[i] = profb[i]; } profb += 64; if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); i = tgpe* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); }else{ newp[24] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight));//1; i = gpe* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); } for (j = 32; j < 55;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight));//1; i = tgpe* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); newp[23] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight));//1; i += gpo* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); }else{ newp[23] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); i = gpo* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); } for (j = 32; j < 55;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); i = tgpe* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); newp[23] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); i += gpo* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); }else{ newp[23] += (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); i = gpo* (profa[26] + ((sipa- profa[26]) * internal_gap_weight)); } for (j = 32; j < 55;j++){ newp[j] -=i; } } } } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = 64; i--;){ newp[i] = profa[i]; } profa+=64; if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight)); i = tgpe*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); }else{ newp[24] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i = gpe*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); } for (j = 32; j < 55;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i = tgpe*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); newp[23] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i += gpo*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); }else{ newp[23] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i = gpo*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); } for (j = 32; j < 55;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] +=(profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i = tgpe*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); newp[23] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i += gpo*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); }else{ newp[23] += (profb[26] + ((sipb- profb[26]) * internal_gap_weight));//1; i = gpo*(profb[26] + ((sipb- profb[26]) * internal_gap_weight)); } for (j = 32; j < 55;j++){ newp[j] -=i; } } } } newp += 64; c++; } for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } newp -= (path[0]+1) *64; return newp; } int* advanced_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; //fprintf(stderr,"starta:%d enda:%d startb:%d endb:%d mid:%d\n",hm->starta,hm->enda,hm->startb,hm->endb,mid); if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = advanced_foward_hirsch_pp_dyn(prof1,prof2,hm); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = advanced_backward_hirsch_pp_dyn(prof1,prof2,hm); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = advanced_hirsch_align_two_pp_vector(prof1,prof2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* advanced_hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; float middle = (hm->endb - hm->startb)/2 + hm->startb; float sub = 0.0; prof1+= (64 * (old_cor[4]+1)); prof2 += 64 * (hm->startb); i = hm->startb; c = -1; for(i = hm->startb; i < hm->endb;i++){ sub = abs(middle -i); sub /= 1000; prof2 += 64; //fprintf(stderr,"%d %d %d \n",f[i].a,b[i].a,max); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga+prof2[27]*prof1[26]-sub > max){ max = f[i].a+b[i].ga+prof2[27]*prof1[26]-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[27]*prof2[26] -sub> max){ max = f[i].a+b[i].gb+prof1[27]*prof2[26]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a+prof2[27]*prof1[26]-sub > max){ max = f[i].ga+b[i].a+prof2[27]*prof1[26]-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[29]*prof2[26]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]*prof2[26]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]*prof2[26]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]*prof2[26]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[27]*prof2[26]-sub > max){ max = f[i].gb+b[i].a+prof1[27]*prof2[26]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } i = hm->endb; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[27]*prof2[26]-sub > max){ max = f[i].a+b[i].gb+prof1[27]*prof2[26]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[29]*prof2[26]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]*prof2[26]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]*prof2[26]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]*prof2[26]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (64 * (old_cor[4]+1)); prof2 -= hm->endb << 6; //fprintf(stderr,"Transition:%d at:%d\n",transition,c); //if(transition == -1){ // exit(0); //} j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -INFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = advanced_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; } return hirsch_path; } struct states* advanced_foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { unsigned int freq[26]; struct states* s = hm->f; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->starta) << 6; prof2 += (hm->startb) << 6; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; if(hm->startb == 0){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; s[j].ga = s[j-1].a+prof2[29]*prof1[26]; if (s[j-1].ga+prof2[29]*prof1[26] > s[j].ga){ s[j].ga = s[j-1].ga+prof2[29]*prof1[26]; } s[j].gb = -FLOATINFTY; } prof2+=64; }else{ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; s[j].ga = s[j-1].a+prof2[27]*prof1[26]; if (s[j-1].ga+prof2[28]*prof1[26] > s[j].ga){ s[j].ga = s[j-1].ga+prof2[28]*prof1[26]; } s[j].gb = -FLOATINFTY; // prof2+=64; } prof2+=64; } prof2 -= (hm->endb-hm->startb) << 6; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; s[hm->endb].gb = -FLOATINFTY; for (i = hm->starta;i < hm->enda;i++){ prof1 += 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; if(hm->startb == 0){ s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = pa+prof1[29] * prof2[26]; if(pgb+prof1[29] * prof2[26] > s[hm->startb].gb){ s[hm->startb].gb = pgb+prof1[29] * prof2[26]; } }else{ s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = pa+prof1[27]*prof2[26]; if(pgb+prof1[28]*prof2[26] > s[hm->startb].gb){ s[hm->startb].gb = pgb+prof1[28]*prof2[26]; } } for (j = hm->startb+1; j <= hm->endb;j++){ prof2 += 64; ca = s[j].a; if((pga += prof2[27-64]*prof1[26-64]) > pa){ pa = pga; } if((pgb += prof1[27-64]*prof2[26-64]) > pa){ pa = pgb; } prof2 += 32; for (c = freq[0];--c;){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j-1].a+prof2[27]*prof1[26]; if (s[j-1].ga+prof2[28]*prof1[26] > s[j].ga){ s[j].ga = s[j-1].ga+prof2[28]*prof1[26]; } pgb = s[j].gb; s[j].gb = ca+prof1[27]*prof2[26]; if(pgb+prof1[28]*prof2[26] > s[j].gb){ s[j].gb = pgb+prof1[28]*prof2[26]; } pa = ca; } prof2 -= (hm->endb-hm->startb) << 6; } prof1 -= 64 * (hm->enda); return s; } struct states* advanced_backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { unsigned int freq[26]; struct states* s = hm->b; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->enda+1) << 6; prof2 += (hm->endb+1) << 6; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(hm->endb == hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; s[j].ga = s[j+1].a+prof2[29]*prof1[26]; if (s[j+1].ga+prof2[29]*prof1[26] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]*prof1[26]; } s[j].gb = -FLOATINFTY; } prof2 -= 64; }else{ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; s[j].ga = s[j+1].a+prof2[27]*prof1[26]; if (s[j+1].ga+prof2[28]*prof1[26] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]*prof1[26]; } s[j].gb = -FLOATINFTY; // prof2 -= 64; } prof2 -= 64; } s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = -FLOATINFTY; // prof2 -= (endb -startb) << 6; i = hm->enda-hm->starta; while(i--){ prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; if(hm->endb == hm->len_b){ s[hm->endb].gb = pa+prof1[29]*prof2[26]; if(pgb+prof1[29]*prof2[26] > s[hm->endb].gb){ s[hm->endb].gb = pgb+prof1[29]*prof2[26]; } }else{ s[hm->endb].gb = pa+prof1[27]*prof2[26]; if(pgb+prof1[28]*prof2[26] > s[hm->endb].gb){ s[hm->endb].gb = pgb+prof1[28]*prof2[26]; } } //j = endb-startb; prof2 += (hm->endb-hm->startb) << 6; //while(j--){ for(j = hm->endb-1;j >= hm->startb;j--){ prof2 -= 64; ca = s[j].a; if((pga += prof2[64+27]*prof1[26]) > pa){ pa = pga; } if((pgb += prof1[64+27]*prof2[26]) > pa){ pa = pgb; } prof2 += 32; for (c = freq[0];--c;){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]*prof1[26]; if (s[j+1].ga+prof2[28]*prof1[26] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]*prof1[26]; } pgb = s[j].gb; s[j].gb = ca+prof1[27]*prof2[26]; if(pgb+prof1[28]*prof2[26] > s[j].gb){ s[j].gb = pgb+prof1[28]*prof2[26]; } pa = ca; } } return s; } kalign2_advanced_gaps.h0000644001210100001440000000315611577654215014632 0ustar olifriusers/* kalign2_advanced_gaps.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ void advanced_smooth_gaps(float* prof,int len,int window,float strength); float* advanced_make_profile(float* prof, int* seq,int len,float** subm); float* advanced_update(const float* profa,const float* profb,float* newp,int* path,int sipa,int sipb,float internal_gap_weight); int* advanced_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path); int* advanced_hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); struct states* advanced_foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); struct states* advanced_backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); kalign2_alignment_types.c0000644001210100001440000005260011577654215015246 0ustar olifriusers/* kalign2_alignment_types.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" int** default_alignment(struct alignment* aln,int* tree,float**submatrix, int** map) { struct dp_matrix *dp = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; float* profa = 0; float* profb = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } dp = dp_matrix_alloc(dp,511,511); fprintf(stderr,"\nAlignment:\n"); //c = numseq; for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d %d %d\n",a,b,c,numseq,i); len_a = aln->sl[a]; len_b = aln->sl[b]; dp = dp_matrix_realloc(dp,len_a,len_b); map[c] = malloc(sizeof(int) * (len_a+len_b+2)); for (j = len_a+len_b+2;j--;){ map[c][j] = 0; } if (a < numseq){ profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix); } profa = profile[a]+64; profb = profile[b]+64; set_gap_penalties(profile[a],len_a,aln->nsip[b],0,aln->nsip[a]); set_gap_penalties(profile[b],len_b,aln->nsip[a],0,aln->nsip[b]); if(aln->nsip[a] == 1){ if(aln->nsip[b] == 1){ map[c] = ss_dyn(submatrix,map[c],dp,aln->s[a],aln->s[b],len_a,len_b); }else{ map[c] = ps_dyn(map[c],dp,profb,aln->s[a],len_b,len_a,aln->nsip[b]); map[c] = mirror_path(map[c]); } }else{ if(aln->nsip[b] == 1){ map[c] = ps_dyn(map[c],dp,profa,aln->s[b],len_a,len_b,aln->nsip[a]); }else{ if (len_a > len_b){ map[c] = pp_dyn(map[c],dp,profa,profb,len_a,len_b); }else{ map[c] = pp_dyn(map[c],dp,profb,profa,len_b,len_a); map[c] = mirror_path(map[c]); } } } profile[c] = malloc(sizeof(float)*64*(len_a+len_b+2)); profile[c] = update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile[numprofiles-1]); free(profile); dp_matrix_free(dp); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } /* int** aa_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,int mmbonus) { struct dp_matrix *dp = 0; int i,j,g,a,b,c; int len_a; int len_b; int** profile = 0; int* profa = 0; int* profb = 0; int pbonus = 0; profile = malloc(sizeof(int*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } dp = dp_matrix_alloc(dp,511,511); c = numseq; for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c); len_a = aln->sl[a]; len_b = aln->sl[b]; dp = dp_matrix_realloc(dp,len_a,len_b); map[c] = malloc(sizeof(int) * (len_a+len_b+2)); for (j = len_a+len_b+2;j--;){ map[c][j] = 0; } if (a < numseq){ profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix); } profa = profile[a]; profb = profile[b]; set_gap_penalties(profa,len_a,aln->nsip[b]); set_gap_penalties(profb,len_b,aln->nsip[a]); pbonus = mmbonus * aln->nsip[a] * aln->nsip[b]; if (len_a > len_b){ map[c] = aapp_dyn(map[c],dp,profa,profb,len_a,len_b,pbonus); }else{ map[c] = aapp_dyn(map[c],dp,profb,profa,len_b,len_a,pbonus); map[c] = mirror_path(map[c]); } profile[c] = malloc(sizeof(int)*64*(len_a+len_b+2)); profile[c] = update(profa,profb,profile[c],map[c],aln->nsip[a],aln->nsip[b]); aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profa); free(profb); } free(profile[numprofiles-1]); free(profile); dp_matrix_free(dp); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; }*/ /* int** alter_gaps_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,int n,float range,int weight) { struct dp_matrix *dp = 0; int i,j,g,a,b,c; int org_gpo = gpo; int org_gpe = gpe; int org_tgpe = tgpe; float gpo_step = 0; float gpe_step = 0; float tgpe_step = 0; int len_a; int len_b; int** profile = 0; int* profa = 0; int* profb = 0; int* path = 0; int* fprofa = 0; int* fprofb = 0; if(!(n &1)){ n--; } float per = 0.0; per =(float) range*2/(n+1); gpo_step = (float)gpo * per; gpe_step = (float)gpe * per; tgpe_step = (float)tgpe * per; profile = malloc(sizeof(int*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } dp = dp_matrix_alloc(dp,511,511); c = numseq; for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c); len_a = aln->sl[a]; len_b = aln->sl[b]; dp = dp_matrix_realloc(dp,len_a,len_b); map[c] = malloc(sizeof(int) * (len_a+len_b+2)); for (j = len_a+len_b+2;j--;){ map[c][j] = 0; } if (a < numseq){ profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix); } profa = profile[a]; profb = profile[b]; fprofa = malloc(sizeof(int)*(len_a+1)*2); for (j = 0;j < (len_a+1)*2;j++){ fprofa[j] = 0; } fprofb = malloc(sizeof(int)*(len_b+1)*2); for (j = 0;j < (len_b+1)*2;j++){ fprofb[j] = 0; } gpo = org_gpo - ((int)gpo_step* (n/2)); gpe = org_gpe - ((int)gpe_step* (n/2)); tgpe = org_tgpe - ((int)tgpe_step* (n/2)); for (j = 0; j < n;j++){ set_gap_penalties(profa,len_a,aln->nsip[b]); set_gap_penalties(profb,len_b,aln->nsip[a]); path = malloc(sizeof(int) * (len_a+len_b+2)); for (g = len_a+len_b+2;g--;){ path[g] = 0; } if(aln->nsip[a] == 1){ if(aln->nsip[b] == 1){ path = ss_dyn(submatrix,path,dp,aln->s[a],aln->s[b],len_a,len_b); }else{ path = ps_dyn(path,dp,profb,aln->s[a],len_b,len_a,aln->nsip[b]); path = mirror_path(path); } }else{ if(aln->nsip[b] == 1){ path = ps_dyn(path,dp,profa,aln->s[b],len_a,len_b,aln->nsip[a]); }else{ if (len_a > len_b){ path = pp_dyn(path,dp,profa,profb,len_a,len_b); }else{ path = pp_dyn(path,dp,profb,profa,len_b,len_a); path = mirror_path(path); } } } fprintf(stderr,"Test alignment with gpo:%d gpe:%d tgpe:%d\n",gpo,gpe,tgpe); add_feature_information_from_alignment(path,fprofa,fprofb,weight/n); gpo += (int)gpo_step; gpe += (int)gpe_step; tgpe += (int)tgpe_step; } gpo = org_gpo; gpe = org_gpe; tgpe = org_tgpe; set_gap_penalties(profa,len_a,aln->nsip[b]); set_gap_penalties(profb,len_b,aln->nsip[a]); if (len_a > len_b){ // map[c] = f_only_pp_dyn(map[c],dp,fprofa,fprofb,len_a,len_b,1,2); map[c] = fpp_dyn(map[c],dp,profa,profb,fprofa,fprofb,len_a,len_b,1,2); }else{ // map[c] = f_only_pp_dyn(map[c],dp,fprofb,fprofa,len_b,len_a,1,2); map[c] = fpp_dyn(map[c],dp,profb,profa,fprofb,fprofa,len_b,len_a,1,2); map[c] = mirror_path(map[c]); } profile[c] = malloc(sizeof(int)*64*(len_a+len_b+2)); profile[c] = update(profa,profb,profile[c],map[c],aln->nsip[a],aln->nsip[b]); aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profa); free(profb); free(fprofa); free(fprofb); } free(profile[numprofiles-1]); free(profile); dp_matrix_free(dp); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; }*/ /* int** test_alignment(struct alignment* aln,int* tree,float **submatrix, int** map,float internal_gap_weight,int window,float strength) { struct dp_matrix *dp = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; float* profa = 0; float* profb = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } dp = dp_matrix_alloc(dp,511,511); c = numseq; for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c); len_a = aln->sl[a]; len_b = aln->sl[b]; dp = dp_matrix_realloc(dp,len_a,len_b); map[c] = malloc(sizeof(int) * (len_a+len_b+2)); for (j = len_a+len_b+2;j--;){ map[c][j] = 0; } if (a < numseq){ profile[a] = make_profile2(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = make_profile2(profile[b],aln->s[b],len_b,submatrix); } profa = profile[a]; profb = profile[b]; set_gap_penalties2(profa,len_a,aln->nsip[b],window,strength); set_gap_penalties2(profb,len_b,aln->nsip[a],window,strength); if(aln->nsip[a] == 1){ if(aln->nsip[b] == 1){ map[c] = ss_dyn2(submatrix,map[c],dp,aln->s[a],aln->s[b],len_a,len_b); }else{ // map[c] = ps_dyn2(map[c],dp,profb,aln->s[a],len_b,len_a,aln->nsip[b]); map[c] = pp_dyn2(map[c],dp,profb,profa,len_b,len_a); map[c] = mirror_path(map[c]); } }else{ if(aln->nsip[b] == 1){ // map[c] = ps_dyn2(map[c],dp,profa,aln->s[b],len_a,len_b,aln->nsip[a]); map[c] = pp_dyn2(map[c],dp,profa,profb,len_a,len_b); }else{ if (len_a > len_b){ map[c] = pp_dyn2(map[c],dp,profa,profb,len_a,len_b); }else{ map[c] = pp_dyn2(map[c],dp,profb,profa,len_b,len_a); map[c] = mirror_path(map[c]); } } } profile[c] = malloc(sizeof(float)*64*(len_a+len_b+2)); profile[c] = update2(profa,profb,profile[c],map[c],aln->nsip[a],aln->nsip[b],internal_gap_weight); aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profa); free(profb); } free(profile[numprofiles-1]); free(profile); dp_matrix_free(dp); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; }*/ /* int** feature_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,struct feature_matrix* fm) { struct dp_matrix *dp = 0; int i,j,g,a,b,c; int len_a; int len_b; int** profile = 0; int* profa = 0; int* profb = 0; int** fprofile = 0; int* fprofa = 0; int* fprofb = 0; profile = malloc(sizeof(int*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } fprofile = malloc(sizeof(int*)*numprofiles); for ( i = 0;i< numprofiles;i++){ fprofile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } dp = dp_matrix_alloc(dp,511,511); c = numseq; //if(!param->dna){ for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c); len_a = aln->sl[a]; len_b = aln->sl[b]; dp = dp_matrix_realloc(dp,len_a,len_b); map[c] = malloc(sizeof(int) * (len_a+len_b+2)); for (j = len_a+len_b+2;j--;){ map[c][j] = 0; } if (a < numseq){ profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix); // fprintf(stderr,"Making feature profile for %d (%s)\n",a,aln->sn[a]); fprofile[a] = make_feature_profile(fprofile[a],aln->ft[a],len_a,fm); } if (b < numseq){ profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix); // fprintf(stderr,"Making feature profile for %d (%s)\n",b,aln->sn[b]); fprofile[b] = make_feature_profile(fprofile[b],aln->ft[b],len_b,fm); } //profa = profile[a]; //profb = profile[b]; profa = profile[a]+64; profb = profile[b]+64; fprofa = fprofile[a]; fprofb = fprofile[b]; set_gap_penalties(profile[a],len_a,aln->nsip[b]); set_gap_penalties(profile[b],len_b,aln->nsip[a]); if (len_a > len_b){ map[c] = fpp_dyn(map[c],dp,profa,profb,fprofa,fprofb,len_a,len_b,fm->mdim,fm->stride); }else{ map[c] = fpp_dyn(map[c],dp,profb,profa,fprofb,fprofa,len_b,len_a,fm->mdim,fm->stride); map[c] = mirror_path(map[c]); } profile[c] = malloc(sizeof(int)*64*(len_a+len_b+2)); profile[c] = update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); fprofile[c] = malloc(sizeof(int)*fm->stride*(len_a+len_b+2)); fprofile[c] = feature_update(fprofa,fprofb,fprofile[c],map[c],fm->stride); aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); free(fprofa); free(fprofb); } free(profile[numprofiles-1]); free(profile); free(fprofile[numprofiles-1]); free(fprofile ); dp_matrix_free(dp); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); free_feature_matrix(fm); return map; }*/ struct ntree_data* ntree_sub_alignment(struct ntree_data* ntree_data,int* tree,int num) { struct dp_matrix *dp = 0; struct alignment* aln = 0; int i,j,g,a,b,c; int len_a; int len_b; float** local_profile = 0; float* profa = 0; float* profb = 0; int** local_map = 0; int* local_sl = 0; int* local_nsip = 0; int** local_sip = 0; int* which_to_alloc = 0; aln = ntree_data->aln; which_to_alloc = malloc(sizeof(int*)*numprofiles); for ( i = 0;i< numprofiles;i++){ which_to_alloc[i] = 0; } local_profile = malloc(sizeof(float*)*numprofiles); local_sl = malloc(sizeof(int)*numprofiles); local_nsip = malloc(sizeof(int)*numprofiles); local_sip = malloc(sizeof(int*)*numprofiles); for (i = 0; i < num-1;i++){ a = tree[i*3+1]; if(!which_to_alloc[a]){ which_to_alloc[a] = 1; } b = tree[i*3+2]; if(!which_to_alloc[b]){ which_to_alloc[b] = 1; } c = tree[i*3+3]; if(!which_to_alloc[c]){ which_to_alloc[c] = 2; } } //for ( i = 0;i< numprofiles;i++){ // fprintf(stderr,"alloc?:%d %d\n",i,which_to_alloc[i]); //} // exit(0); for ( i = 0;i< numprofiles;i++){ if(which_to_alloc[i] == 1){ local_profile[i] = ntree_data->profile[i]; local_sl[i] = aln->sl[i]; local_nsip[i] = aln->nsip[i]; local_sip[i] = malloc(sizeof(int*)*aln->nsip[i]); for(j = 0;j < aln->nsip[i];j++){ local_sip[i][j] = aln->sip[i][j]; } }else{ local_profile[i] = 0; local_sl[i] = 0; local_nsip[i] = 0; local_sip[i] = 0; } } /* for ( i = 0;i< numprofiles;i++){ local_profile[i] = ntree_data->profile[i]; local_sl[i] = aln->sl[i]; local_nsip[i] = aln->nsip[i]; if(aln->sip[i]){ fprintf(stderr,"Allocing..:%d\n",aln->nsip[i]); local_sip[i] = malloc(sizeof(int*)*aln->nsip[i]); for(j = 0;j < aln->nsip[i];j++){ local_sip[i][j] = aln->sip[i][j]; } }else{ local_sip[i] = 0; } }*/ local_map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ local_map[i] = 0; } dp = dp_matrix_alloc(dp,511,511); c = numseq; for (i = 0; i < num-1;i++){ a = tree[i*3+1]; b = tree[i*3+2]; c = tree[i*3+3]; // fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c); len_a = local_sl[a]; len_b = local_sl[b]; dp = dp_matrix_realloc(dp,len_a,len_b); local_map[c] = malloc(sizeof(int) * (len_a+len_b+2)); for (j = len_a+len_b+2;j--;){ local_map[c][j] = 0; } if (a < numseq){ local_profile[a] = make_profile(local_profile[a],aln->s[a],len_a,ntree_data->submatrix); } if (b < numseq){ local_profile[b] = make_profile(local_profile[b],aln->s[b],len_b,ntree_data->submatrix); } profa = local_profile[a]; profb = local_profile[b]; set_gap_penalties(profa,len_a,local_nsip[b],0,local_nsip[a]); set_gap_penalties(profb,len_b,local_nsip[a],0,local_nsip[b]); if(local_nsip[a] == 1){ if(local_nsip[b] == 1){ local_map[c] = ss_dyn(ntree_data->submatrix,local_map[c],dp,aln->s[a],aln->s[b],len_a,len_b); }else{ local_map[c] = ps_dyn(local_map[c],dp,profb,aln->s[a],len_b,len_a,local_nsip[b]); local_map[c] = mirror_path(local_map[c]); } }else{ if(local_nsip[b] == 1){ local_map[c] = ps_dyn(local_map[c],dp,profa,aln->s[b],len_a,len_b,local_nsip[a]); }else{ if (len_a > len_b){ local_map[c] = pp_dyn(local_map[c],dp,profa,profb,len_a,len_b); }else{ local_map[c] = pp_dyn(local_map[c],dp,profb,profa,len_b,len_a); local_map[c] = mirror_path(local_map[c]); } } } local_profile[c] = malloc(sizeof(float)*64*(len_a+len_b+2)); local_profile[c] = update(profa,profb,local_profile[c],local_map[c],local_nsip[a],local_nsip[b]); local_sl[c] = local_map[c][0]; local_nsip[c] = local_nsip[a] + local_nsip[b]; local_sip[c] = malloc(sizeof(int)*(local_nsip[a] + local_nsip[b])); g =0; for (j = local_nsip[a];j--;){ local_sip[c][g] = local_sip[a][j]; g++; } for (j = local_nsip[b];j--;){ local_sip[c][g] = local_sip[b][j]; g++; } // free(profa); // free(profb); } if(ntree_data->profile[c]){ if(ntree_data->map[c][ntree_data->map[c][0]+2] < local_map[c][local_map[c][0]+2]){ fprintf(stderr,"%d\n",local_map[c][local_map[c][0]+2]); //remove old map,profile,etc.. for (i = 0; i < num-1;i++){ c = tree[i*3+3]; free(ntree_data->map[c]); free(ntree_data->profile[c]); free(aln->sip[c]); ntree_data->map[c] = malloc(sizeof(int)*(local_map[c][0]+3)); for (j = 0; j < local_map[c][0]+3;j++){ ntree_data->map[c][j] = local_map[c][j]; } aln->sip[c] = malloc(sizeof(int)*local_nsip[c]); aln->nsip[c] = local_nsip[c]; for (j = 0; j < local_nsip[c];j++){ aln->sip[c][j] = local_sip[c][j]; } aln->sl[c] = local_sl[c]; } ntree_data->profile[c] = malloc(sizeof(int)*64*(aln->sl[c]+1)); for (i = 0; i < (64*(aln->sl[c]+1));i++){ ntree_data->profile[c][i] = local_profile[c][i]; } ntree_data->tree[0] -= (tree[0]-1); for (j = 1; j < tree[0];j++){ ntree_data->tree[ntree_data->tree[0]+j-1] = tree[j]; } ntree_data->tree[0] += (tree[0]-1); }else{ fprintf(stderr,"no improvement\n"); } }else{ fprintf(stderr,"%d\n",local_map[c][local_map[c][0]+2]); for (i = 0; i < num-1;i++){ c = tree[i*3+3]; ntree_data->map[c] = malloc(sizeof(int)*(local_map[c][0]+3)); for (j = 0; j < local_map[c][0]+3;j++){ ntree_data->map[c][j] = local_map[c][j]; } aln->sip[c] = malloc(sizeof(int)*local_nsip[c]); aln->nsip[c] = local_nsip[c]; for (j = 0; j < local_nsip[c];j++){ aln->sip[c][j] = local_sip[c][j]; } aln->sl[c] = local_sl[c]; } ntree_data->profile[c] = malloc(sizeof(int)*64*(aln->sl[c]+1)); for (i = 0; i < (64*(aln->sl[c]+1));i++){ ntree_data->profile[c][i] = local_profile[c][i]; } for (j = 1; j < tree[0];j++){ ntree_data->tree[ntree_data->tree[0]+j-1] = tree[j]; } ntree_data->tree[0] += tree[0]-1; } for ( i = 0;i< numprofiles;i++){ if(which_to_alloc[i] == 1){ free(local_sip[i]); if(i < numseq){ free(local_profile[i]); } } if(which_to_alloc[i] == 2){ free(local_profile[i]); free(local_map[i]); free(local_sip[i]); } } free(which_to_alloc); free(local_map); free(local_sip); free(local_nsip); free(local_profile); free(local_sl); dp_matrix_free(dp); return ntree_data; } struct ntree_data* ntree_alignment(struct ntree_data* ntree_data) { int i; ntree_data->profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ ntree_data->profile[i] = 0; } ntree_data->map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ ntree_data->map[i] = 0; } ntree_data = alignntree(ntree_data,ntree_data->realtree); for ( i = 0;i< numprofiles;i++){ if(ntree_data->profile[i]){ free(ntree_data->profile[i]); } } free(ntree_data->profile); for (i = 32;i--;){ free(ntree_data->submatrix[i]); } free(ntree_data->submatrix); free_real_tree(ntree_data->realtree); return ntree_data; } kalign2_conservation.c0000644001210100001440000000311011577654215014546 0ustar olifriusers/* kalign2_conservation.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include #include "kalign2.h" void entrophy(int* prof,int len) { int i,j; float shannon = 0.0; float log_two = log(2); fprintf(stderr,"%f\n",log_two); for ( i = 0; i < len; i++){ shannon = 0.0; //prof[3] = 10; //prof[23] += 10; for ( j = 0; j < 23;j++){ if(prof[j]){ shannon += (float)prof[j]* log((float)prof[j]/(float)prof[23])/log_two; // fprintf(stderr,"%f += %d/%d * %f\n",shannon,prof[j],prof[23],log((float)prof[j]/(float)prof[23])/log_two); } } fprintf(stderr,"%f ",shannon); if (prof[23] < 23){ shannon = -shannon / (log((float)prof[23])/log_two); }else{ shannon = -shannon / (log((float)23)/log_two); } fprintf(stderr,"%f\n",shannon); prof+=64; } } kalign2_distance_calculation.c0000644001210100001440000005124411577654215016217 0ustar olifriusers/* kalign2_distance_calculation.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" float** protein_pairwise_alignment_distance(struct alignment* aln,float** dm,struct parameters* param,float**subm, int nj) { int i,j,c; int * path = 0; int len_a = 0; int len_b = 0; struct dp_matrix *dp = 0; int a,b; fprintf(stderr,"Distance Calculation:\n"); b = (numseq*(numseq-1))/2; a = 1; dp = dp_matrix_alloc(dp,511,511); if (nj){ dm = malloc (sizeof(float*)*numprofiles); for (i = numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(numprofiles)); for (j = numprofiles;j--;){ dm[i][j] = 0.0f; } } }else{ dm = malloc (sizeof(float*)*numseq); for (i = numseq;i--;){ dm[i] = malloc (sizeof (float)*(numseq)); for (j = numseq;j--;){ dm[i][j] = 0.0f; } } } /*dm = malloc (sizeof(float*)*numprofiles); for (i = numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(numprofiles)); for (j = numprofiles;j--;){ dm[i][j] = 0.0f; } }*/ for (i = 0; i < numseq-1;i++){ len_a = aln->sl[i]; for(j = i+1; j < numseq;j++){ len_b = aln->sl[j]; path = malloc(sizeof(int) * (len_a+len_b+2)); for (c = len_a+len_b+2;c--;){ path[c] = 0; } dp = dp_matrix_realloc(dp,len_a,len_b); path = ss_dyn(subm,path,dp,aln->s[i],aln->s[j],len_a,len_b); dm[i][j] = get_distance_from_pairwise_alignment(path,aln->s[i],aln->s[j]); dm[j][i] = dm[i][j]; fprintf(stderr,"\r%8.0f percent done",(float)a /(float)b * 100); a++; free(path); } } dp_matrix_free(dp); return dm; } float get_distance_from_pairwise_alignment(int* path,int* seq1,int* seq2) { float dist = 0; int i,j,c; int pairs = 0; int identical = 0; i = 0; j = 0; c = 1; while(path[c] != 3){ if (!path[c]){ if (seq1[i] == seq2[j]){ identical++; } pairs++; i++; j++; } if (path[c] & 1){ j++; } if (path[c] & 2){ i++; } c++; } dist = (float)identical/(float)pairs*100; //dist = (float)identical/(float)c;//pairs*100; return dist; } float** protein_wu_distance2(struct alignment* aln,float** dm,struct parameters* param) { struct node* hash[1024]; int i,j; unsigned int hv; int*p =0; for (i = 0;i < 1024;i++){ hash[i] = 0; } if(!aln->ft){ aln->ft = malloc(sizeof(struct feature* ) * (numseq)); for(i =0;i < numseq;i++){ aln->ft[i] = 0; } } dm = malloc (sizeof(float*)*numprofiles); for (i = numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(numprofiles)); for (j = numprofiles;j--;){ dm[i][j] = 0.0f; } } for (i = 0; i < numseq-1;i++){ p = aln->s[i]; for (j = aln->sl[i]-2;j--;){ hv = (p[j] << 5) + p[j+1]; hash[hv] = insert_hash(hash[hv],j+1); // hash[hv] = insert_hash(hash[hv],j+1); hv = (p[j] << 5) + p[j+2]; hash[hv] = insert_hash(hash[hv],j+1); hv = (p[j+1] << 5) + p[j+2]; hash[hv] = insert_hash(hash[hv],j+1); } for (j = i+1; j < numseq;j++){ dm[i][j] = protein_wu_distance_calculation3(hash,aln->s[j],aln->sl[j],aln->sl[j]+aln->sl[i],param->zlevel); // aln = protein_wu_sw2(hash,aln,i,j); dm[i][j] /= (aln->sl[i] > aln->sl[j]) ? aln->sl[j] : aln->sl[i]; dm[j][i] = dm[i][j]; } for (j = 1024;j--;){ if (hash[j]){ remove_nodes(hash[j]); hash[j] = 0; } } } return dm; } float protein_wu_distance_calculation2(struct node* hash[],int* seq,int seqlen,int diagonals,int mode) { struct node* node_p; int* d = 0; float out = 0.0; int i; unsigned int hv; d = malloc(sizeof(int)*diagonals); //for (i = diagonals;i--;){ for (i = 0;i < diagonals;i++){ d[i] = 0; } for (i = seqlen-2;i--;){ hv = (seq[i] << 5) + seq[i+1]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ d[node_p->pos]++; // d[node_p->pos+1]++; node_p = node_p->next; } } hv = (seq[i] << 5) + seq[i+2]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ d[node_p->pos]++; // d[node_p->pos+1]++; node_p = node_p->next; } } hv = (seq[i+1] << 5) + seq[i+2]; if (hash[hv]){ node_p = hash[hv]; while(node_p){ d[node_p->pos]++; node_p = node_p->next; } } d++; } //exit(0); d -= (seqlen-2); for (i = diagonals;i--;){ //printf("%d ",d[i]); if(d[i] > mode){ out += d[i]; } } free(d); return out; } struct alignment* protein_wu_sw(struct node* hash[],struct alignment* aln,int a,int b) { int*seq = aln->s[b]; int len_a = aln->sl[b]; int len_b = aln->sl[a]; struct node* node_p = 0; int i,c; unsigned int hv; //int notel = aln->lsn[a] + aln->lsn[b]; struct feature *n = 0; //float counta[1024]; //float countb[1024]; int *weight = 0; int *len = 0; int* added = 0; weight = malloc(sizeof(int*)*(len_a+len_b-1)); len = malloc(sizeof(int*)*(len_a+len_b-1)); added = malloc(sizeof(int*)*(len_a+len_b-1)); for (i = 0; i <(len_a+len_b-1);i++){ weight[i] = 0; len[i] = 0; added[i] = 0; } //for (i = 0; i <1024;i++){ // counta[i] = 0; // countb[i] = 0; // if(hash[i]){ // node_p = hash[i]; // while(node_p){ // countb[i]++; // node_p = node_p->next; // } // fprintf(stderr,"COUNT:%d %f\n",i,countb[i]); // } //} //for (i = len_a-2;i--;){ // hv = (seq[i+1] << 5) + seq[i+2]; // counta[hv]++; // hv = (seq[i] << 5) + seq[i+1]; // counta[hv]++; // hv = (seq[i] << 5) + seq[i+2]; // counta[hv]++; //} c = 1; for (i = len_a-2;i--;){ for (hv = 0; hv <(len_a+len_b-1);hv++){ added[hv] = 0; } hv = (seq[i] << 5) + seq[i+1]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ added[node_p->pos+c] = 1; weight[node_p->pos+c]++; // len[node_p->pos+c] = 1 + len[node_p->pos+c]; node_p = node_p->next; } } hv = (seq[i] << 5) + seq[i+2]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ added[node_p->pos+c] = 1; weight[node_p->pos+c]++; // len[node_p->pos+c] = 1 + len[node_p->pos+c]; node_p = node_p->next; } } hv = (seq[i+1] << 5) + seq[i+2]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ added[node_p->pos+c] = 1; weight[node_p->pos+c]++; // len[node_p->pos+c] = 1 + len[node_p->pos+c]; node_p = node_p->next; } } // fprintf(stderr,"pos_a:%d ",i+1); for (hv = 0; hv <(len_a+len_b-1);hv++){ len[hv] += added[hv]; if(!added[hv] && len[hv]){ if(len[hv] > 10){ n = malloc(sizeof(struct feature)); n->next = 0; n->color = 0; n->type = malloc(sizeof(char)*8); n->type[0] = 'w'; n->type[1] = 'u'; n->type[2] = 'm'; n->type[3] = 'a'; n->type[4] = 'n'; n->type[5] = 'b'; n->type[6] = 'e'; n->type[7] = 'r'; n->type[8] = 0; n->start = i+2; n->end = len[hv]+n->start -1; n->note = malloc(sizeof(char)*(2)); n->note[0] = 'w'; n->note[1] = 0; /*n->note = malloc(sizeof(char)*(notel+1)); for (j = 0;j < aln->lsn[a];j++){ n->note[j] = aln->sn[a][j]; } while(j < notel){ n->note[j] = aln->sn[b][j-aln->lsn[a]]; j++; } n->note[notel] = 0;*/ //n->note[0] = 'w'; //n->note[1] = 0; if(! aln->ft[b]){ aln->ft[b] = n; }else{ n->next = aln->ft[b]; aln->ft[b] = n; } //if((old_n = aln->ft[b])!= 0){ // while(old_n->next!=0){ // old_n = old_n->next; // } // old_n->next = n; //}else{ // aln->ft[b] = n; //} n = 0; n = malloc(sizeof(struct feature)); n->next = 0; n->color = 0; n->type = malloc(sizeof(char)*8); n->type[0] = 'w'; n->type[1] = 'u'; n->type[2] = 'm'; n->type[3] = 'a'; n->type[4] = 'n'; n->type[5] = 'b'; n->type[6] = 'e'; n->type[7] = 'r'; n->type[8] = 0; n->start = (hv - (len_a))+i+3; n->end = len[hv]+n->start -1; n->note = malloc(sizeof(char)*(2)); n->note[0] = 'w'; n->note[1] = 0; /*n->note = malloc(sizeof(char)*(notel+1)); for (j = 0;j < aln->lsn[a];j++){ n->note[j] = aln->sn[a][j]; } while(j < notel){ n->note[j] = aln->sn[b][j-aln->lsn[a]]; j++; } n->note[notel] = 0;*/ if(! aln->ft[a]){ aln->ft[a] = n; }else{ n->next = aln->ft[a]; aln->ft[a] = n; } //if((old_n = aln->ft[a])!= 0){ // while(old_n->next!=0){ // old_n = old_n->next; // } // old_n->next = n; //}else{ // aln->ft[a] = n; //} n = 0; // fprintf(stderr,"\nDiagonal found A:%d %d\n",i+2,len[hv]); // fprintf(stderr,"Diagonal found B:%d %d\n",(hv - (len_a))+i+3,len[hv]); } len[hv] = 0; weight[hv] = 0; } // fprintf(stderr,"%d,%d ",hv,(hv - (len_a))+i+3); } // fprintf(stderr,"\n"); c++; } i++; //fprintf(stderr,"pos_a:%d ",i); for (hv = 0; hv <(len_a+len_b-1);hv++){ if(len[hv]){ if(len[hv] > 10){ n = malloc(sizeof(struct feature)); n->next = 0; n->color = 0; n->type = malloc(sizeof(char)*8); n->type[0] = 'w'; n->type[1] = 'u'; n->type[2] = 'm'; n->type[3] = 'a'; n->type[4] = 'n'; n->type[5] = 'b'; n->type[6] = 'e'; n->type[7] = 'r'; n->type[8] = 0; n->start = i+1; n->end = len[hv]+n->start-1; /* n->note = malloc(sizeof(char)*(notel+1)); for (j = 0;j < aln->lsn[a];j++){ n->note[j] = aln->sn[a][j]; } while(j < notel){ n->note[j] = aln->sn[b][j-aln->lsn[a]]; j++; } n->note[notel] = 0;*/ n->note = malloc(sizeof(char)*(2)); n->note[0] = 'w'; n->note[1] = 0; if(! aln->ft[b]){ aln->ft[b] = n; }else{ n->next = aln->ft[b]; aln->ft[b] = n; } /* if((old_n = aln->ft[b])!= 0){ while(old_n->next!=0){ old_n = old_n->next; } old_n->next = n; }else{ aln->ft[b] = n; }*/ n = 0; n = malloc(sizeof(struct feature)); n->next = 0; n->color = 0; n->type = malloc(sizeof(char)*8); n->type[0] = 'w'; n->type[1] = 'u'; n->type[2] = 'm'; n->type[3] = 'a'; n->type[4] = 'n'; n->type[5] = 'b'; n->type[6] = 'e'; n->type[7] = 'r'; n->type[8] = 0; n->start = hv - len_a+i+2; n->end = len[hv]+n->start-1; n->note = malloc(sizeof(char)*(2)); n->note[0] = 'w'; n->note[1] = 0; /* n->note = malloc(sizeof(char)*(notel+1)); for (j = 0;j < aln->lsn[a];j++){ n->note[j] = aln->sn[a][j]; } while(j < notel){ n->note[j] = aln->sn[b][j-aln->lsn[a]]; j++; } n->note[notel] = 0;*/ if(! aln->ft[a]){ aln->ft[a] = n; }else{ n->next = aln->ft[a]; aln->ft[a] = n; } /*if((old_n = aln->ft[a])!= 0){ while(old_n->next!=0){ old_n = old_n->next; } old_n->next = n; }else{ aln->ft[a] = n; }*/ n = 0; // fprintf(stderr,"\nDiagonal found A:%d %d\n",i+1,len[hv]); // fprintf(stderr,"Diagonal found B:%d %d\n",hv - len_a+i+2,len[hv]); } len[hv] = 0; weight[hv] = 0; } // fprintf(stderr,"%d,%d ",hv,hv - len_a+i+2); } //fprintf(stderr,"\n"); free(weight); free(len); free(added); //n =aln->ft[a]; //while(n){ // fprintf(stderr,"%s %s %d-%d\n",n->type,n->note,n->start,n->end); // n = n->next; //} //exit(0); return aln; } float protein_wu_distance_calculation3(struct node* hash[],int* seq,int seqlen,int diagonals,int mode) { struct node* node_p = 0; int i,c; unsigned int hv; int dlen = 0; int *weight = 0; int *len = 0; int* added = 0; weight = malloc(sizeof(int*)*diagonals); len = malloc(sizeof(int*)*diagonals); added = malloc(sizeof(int*)*diagonals); for (i = 0; i < diagonals;i++){ weight[i] = 0; len[i] = 0; added[i] = 0; } c = 1; for (i = seqlen-2;i--;){ for (hv = 0; hv < diagonals;hv++){ added[hv] = 0; } hv = (seq[i] << 5) + seq[i+1]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ added[node_p->pos+c] = 1; weight[node_p->pos+c]++; // len[node_p->pos+c] = 1 + len[node_p->pos+c]; node_p = node_p->next; } } hv = (seq[i] << 5) + seq[i+2]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ added[node_p->pos+c] = 1; weight[node_p->pos+c]++; // len[node_p->pos+c] = 1 + len[node_p->pos+c]; node_p = node_p->next; } } hv = (seq[i+1] << 5) + seq[i+2]; //printf("3:%d\n",hv); if (hash[hv]){ node_p = hash[hv]; while(node_p){ added[node_p->pos+c] = 1; weight[node_p->pos+c]++; // len[node_p->pos+c] = 1 + len[node_p->pos+c]; node_p = node_p->next; } } // fprintf(stderr,"pos_a:%d ",i+1); for (hv = 0; hv < diagonals ;hv++){ len[hv] += added[hv]; if(!added[hv] && len[hv]){ if (len[hv] > dlen){ dlen = len[hv]; } len[hv] = 0; weight[hv] = 0; } // fprintf(stderr,"%d,%d ",hv,(hv - (len_a))+i+3); } // fprintf(stderr,"\n"); c++; } i++; //fprintf(stderr,"pos_a:%d ",i); for (hv = 0; hv < diagonals;hv++){ if(len[hv]){ if (len[hv] > dlen){ dlen = len[hv]; } len[hv] = 0; weight[hv] = 0; } } free(weight); free(len); free(added); return dlen; } float** protein_wu_distance(struct alignment* si,float** dm,struct parameters* param, int nj) { struct bignode* hash[1024]; int*p =0; int i,j,a,b; unsigned int hv; float min; float cutoff; for (i = 0;i < 1024;i++){ hash[i] = 0; } if (nj){ dm = malloc (sizeof(float*)*numprofiles); for (i = numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(numprofiles)); for (j = numprofiles;j--;){ dm[i][j] = 0.0f; } } }else{ dm = malloc (sizeof(float*)*numseq); for (i = numseq;i--;){ dm[i] = malloc (sizeof (float)*(numseq)); for (j = numseq;j--;){ dm[i][j] = 0.0f; } } } fprintf(stderr,"Distance Calculation:\n"); b = (numseq*(numseq-1))/2; a = 1; for (i = 0; i < numseq-1;i++){ p = si->s[i]; for (j = si->sl[i]-2;j--;){ //for(j = 0; j < si->sl[i]-2;j++){ //hv = (p[j+1] << 5) + p[j+2]; //hash[hv] = big_insert_hash(hash[hv],j); hv = (p[j] << 5) + p[j+1]; hash[hv] = big_insert_hash(hash[hv],j); hv = (p[j] << 5) + p[j+2]; hash[hv] = big_insert_hash(hash[hv],j); } for (j = i+1; j < numseq;j++){ min = (si->sl[i] > si->sl[j]) ? si->sl[j] :si->sl[i]; cutoff = param->internal_gap_weight *min + param->zlevel; //cutoff = param->zlevel; p = si->s[j]; dm[i][j] = protein_wu_distance_calculation(hash,p,si->sl[j],si->sl[j]+si->sl[i],cutoff); //fprintf(stderr,"%d-%d:%f\n",i,j,dm[i][j]); //exit(0); //dm[i][j] /= min; //dm[i][j] /= (si->sl[i] > si->sl[j]) ? si->sl[j] :si->sl[i]; dm[j][i] = dm[i][j]; fprintf(stderr,"\r%8.0f percent done",(float)a /(float)b * 100); a++; } for (j = 1024;j--;){ if (hash[j]){ big_remove_nodes(hash[j]); hash[j] = 0; } } } return dm; } float protein_wu_distance_calculation(struct bignode* hash[],const int* seq,const int seqlen,const int diagonals,const float mode) { struct bignode* node_p; unsigned int* d = 0; unsigned int* tmp = 0; float out = 0.0; register int i,j; register int c; register int num; register unsigned int hv; d = malloc(sizeof(unsigned int)*diagonals); //for (i = diagonals;i--;){ for (i = 0;i < diagonals;i++){ d[i] = 0; } for (i = seqlen-2;i--;){ //for(i = 0; i < seqlen-2;i++){ /*hv = (seq[i+1] << 5) + seq[i+2]; node_p = hash[hv]; while(node_p){ tmp = node_p->pos; for(j = 0;j < node_p->num;j++){ d[tmp[j]]++; } node_p = node_p->next; }*/ hv = (seq[i] << 5) + seq[i+1]; //printf("3:%d\n",hv); node_p = hash[hv]; while(node_p){ tmp = node_p->pos; num = node_p->num; for(j = 0;j < num;j++){ c = tmp[j]; d[c]++; c++; d[c]++; } node_p = node_p->next; } hv = (seq[i] << 5) + seq[i+2]; node_p = hash[hv]; while(node_p){ tmp = node_p->pos; num = node_p->num; for(j = 0;j < num;j++){ c = tmp[j]; d[c]++; } node_p = node_p->next; } d++; } //exit(0); d -= (seqlen-2); for (i = diagonals;i--;){ //d[i] /= minlen; //fprintf(stderr,"%d ",d[i]); if(d[i] > mode){ out += d[i]; // printf("%f %d\n",d[i]/ minlen,d[i]); } } free(d); return out; } float** dna_distance(struct alignment* si,float** dm,struct parameters* param, int nj) { struct bignode* hash[1024]; int *p = 0; int i,j,a,b; unsigned int hv; fprintf(stderr,"Distance Calculation:\n"); for (i = 0;i < 1024;i++){ hash[i] = 0; } if (nj){ dm = malloc (sizeof(float*)*numprofiles); for (i = numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(numprofiles)); for (j = numprofiles;j--;){ dm[i][j] = 0.0f; } } }else{ dm = malloc (sizeof(float*)*numseq); for (i = numseq;i--;){ dm[i] = malloc (sizeof (float)*(numseq)); for (j = numseq;j--;){ dm[i][j] = 0.0f; } } } b = (numseq*(numseq-1))/2; a = 1; for (i = 0; i < numseq-1;i++){ p = si->s[i]; for (j = si->sl[i]-5;j--;){ hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+2]&3)<<4) + ((p[j+3]&3)<<2) + (p[j+4]&3);//ABCDE hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+2]&3)<<4) + ((p[j+3]&3)<<2) + (p[j+5]&3);//ABCDF hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+2]&3)<<4) + ((p[j+4]&3)<<2) + (p[j+5]&3);//ABCEF hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+3]&3)<<4) + ((p[j+4]&3)<<2) + (p[j+5]&3);//ABDEF hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+2]&3)<<6) + ((p[j+3]&3)<<4) + ((p[j+4]&3)<<2) + (p[j+5]&3);//ACDEF hash[hv] = big_insert_hash(hash[hv],j); } for (j = i+1; j < numseq;j++){ //min = (si->sl[i] > si->sl[j]) ?si->sl[j] :si->sl[i]; dm[i][j] = dna_distance_calculation(hash,si->s[j],si->sl[j],si->sl[j]+si->sl[i],param->zlevel); dm[i][j] /= (si->sl[i] > si->sl[j]) ?si->sl[j] :si->sl[i]; dm[j][i] = dm[i][j]; fprintf(stderr,"\r%8.0f percent done",(float)a /(float)b * 100); a++; } for (j = 1024;j--;){ if (hash[j]){ big_remove_nodes(hash[j]); hash[j] = 0; } } } return dm; } float dna_distance_calculation(struct bignode* hash[],int* p,int seqlen,int diagonals,float mode) { struct bignode* node_p; float out = 0.0; unsigned int* tmp = 0; unsigned int* d = 0; int i,j; unsigned int hv; d = malloc(sizeof(int)*diagonals); for (i = 0;i < diagonals;i++){ d[i] = 0; } for (i = seqlen-5;i--;){ hv = ((p[i]&3)<<8) + ((p[i+1]&3)<<6) + ((p[i+2]&3)<<4) + ((p[i+3]&3)<<2) + (p[i+4]&3);//ABCDE if (hash[hv]){ node_p = hash[hv]; while(node_p){ tmp = node_p->pos; for(j = 0;j < node_p->num;j++){ d[tmp[j]]++; } node_p = node_p->next; } } hv = ((p[i]&3)<<8) + ((p[i+1]&3)<<6) + ((p[i+2]&3)<<4) + ((p[i+3]&3)<<2) + (p[i+5]&3);//ABCDF if (hash[hv]){ node_p = hash[hv]; while(node_p){ tmp = node_p->pos; for(j = 0;j < node_p->num;j++){ d[tmp[j]]++; } node_p = node_p->next; } } hv = ((p[i]&3)<<8) + ((p[i+1]&3)<<6) + ((p[i+2]&3)<<4) + ((p[i+4]&3)<<2) + (p[i+5]&3);//ABCEF if (hash[hv]){ node_p = hash[hv]; while(node_p){ tmp = node_p->pos; for(j = 0;j < node_p->num;j++){ d[tmp[j]]++; } node_p = node_p->next; } } hv = ((p[i]&3)<<8) + ((p[i+1]&3)<<6) + ((p[i+3]&3)<<4) + ((p[i+4]&3)<<2) + (p[i+5]&3);//ABDEF if (hash[hv]){ node_p = hash[hv]; while(node_p){ tmp = node_p->pos; for(j = 0;j < node_p->num;j++){ d[tmp[j]]++; } node_p = node_p->next; } } hv = ((p[i]&3)<<8) + ((p[i+2]&3)<<6) + ((p[i+3]&3)<<4) + ((p[i+4]&3)<<2) + (p[i+5]&3);//ACDEF if (hash[hv]){ node_p = hash[hv]; while(node_p){ tmp = node_p->pos; for(j = 0;j < node_p->num;j++){ d[tmp[j]]++; } node_p = node_p->next; } } d++; } //exit(0); d -= (seqlen-5); for (i = diagonals;i--;){ //d[i] /= minlen; //printf("%d ",d[i]); if(d[i] > mode){ //fprintf(stderr,"%f %d\n",d[i]/ minlen,d[i]); out += d[i]; } } free(d); return out; } kalign2_dp.c0000644001210100001440000014374111577654215012456 0ustar olifriusers/* kalign2_dp.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" int* f_only_pp_dyn(int* path, struct dp_matrix *dp,const float* fprof1,const float* fprof2,const int len_a,const int len_b,int fdim,int stride) { // unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; register int f = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; fprof1 += len_a * stride; s[len_b].a = 0.0; s[len_b].ga = -FLOATINFTY; s[len_b].gb = -FLOATINFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -FLOATINFTY; //s[j].ga = 0; s[j].ga = s[j+1].a;//+prof2[29]; if (s[j+1].ga > s[j].ga){ s[j].ga = s[j+1].ga ; } s[j].gb = -FLOATINFTY; tracep[j] = 8; } s[0].a = -FLOATINFTY; s[0].ga = -FLOATINFTY; s[0].gb = -FLOATINFTY; i = len_a; while(--i){ fprof1 -= stride; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -FLOATINFTY; s[len_b].ga = -FLOATINFTY; //s[len_b].gb = 0; s[len_b].gb = pa;//+prof1[29]; if(pgb > s[len_b].gb){ s[len_b].gb = pgb; } tracep[len_b] = 16; j = len_b; fprof2 += len_b * stride; while(--j){ fprof2 -= stride; ca = s[j].a; c = 1; if((pga) > pa){ pa = pga; c = 2; } if((pgb) > pa){ pa = pgb; c = 4; } for (f = 0; f < fdim;f++){ // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); pa += fprof1[f] * fprof2[f+fdim]; } s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a; if (s[j+1].ga > s[j].ga){ s[j].ga = s[j+1].ga; c |= 8; } pgb = s[j].gb; s[j].gb = ca; if(pgb > s[j].gb){ s[j].gb = pgb; c |= 16; } tracep[j] = c; pa = ca; } fprof2 -= stride; //LAST CELL (0) ca = s[0].a; c = 1; if((pga) > pa){ pa = pga; c = 2; } if((pgb) > pa){ pa = pgb; c = 4; } for (f = 0; f < fdim;f++){ // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); pa += fprof1[f] * fprof2[f+fdim]; } s[0].a = pa; s[0].ga = -FLOATINFTY; pgb = s[0].gb; s[0].gb = ca; if(pgb> s[0].gb){ s[0].gb = pgb; c |= 16; } tracep[0] = c; } fprof1 -= stride; tracep = trace[0]; j = len_b; fprof2 += len_b * stride; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; //s[j].gb = -INFTY; s[len_b].gb = pa;//+prof1[29]; if(pgb > s[len_b].gb){ s[len_b].gb = pgb; } while(--j){ fprof2 -= stride; ca = s[j].a; c = 1; if((pga) > pa){ pa = pga; c = 2; } if((pgb) > pa){ pa = pgb; c = 4; } for (f = 0; f < fdim;f++){ pa += fprof1[f] * fprof2[f+fdim]; // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); } s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a; if (s[j+1].ga > s[j].ga){ s[j].ga = s[j+1].ga; c |= 8; } pgb = s[j].gb; s[j].gb = -FLOATINFTY; tracep[j] = c; pa = ca; } fprof2 -= stride; ca = s[0].a; c = 1; if((pga) > pa){ pa = pga; c = 2; } if((pgb) > pa){ pa = pgb; c = 4; } for (f = 0; f < fdim;f++){ pa += fprof1[f] * fprof2[f+fdim]; // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); } s[0].a = pa; s[0].ga = s[1].a; if (s[1].ga > s[0].ga){ s[0].ga = s[1].ga; c |= 8; } pgb = s[0].gb; s[0].gb = ca; if(pgb> s[0].gb){ s[0].gb = pgb; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); ca = c; i = 0; j = 0; f = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(f){ case 0: if (trace[i][j] & 2){ f = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ f = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ f = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ f = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* fpp_dyn(int* path, struct dp_matrix *dp,const float* prof1,const float* prof2,const float* fprof1,const float* fprof2,const int len_a,const int len_b,int fdim,int stride) { unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; register int f = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; fprof1 += len_a * stride; s[len_b].a = 0; s[len_b].ga = -FLOATINFTY; s[len_b].gb = -FLOATINFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -FLOATINFTY; //s[j].ga = 0; s[j].ga = s[j+1].a+prof2[29];//+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; } s[j].gb = -FLOATINFTY; tracep[j] = 8; } s[0].a = -FLOATINFTY; s[0].ga = -FLOATINFTY; s[0].gb = -FLOATINFTY; i = len_a; while(--i){ prof1 -= 64; fprof1 -= stride; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -FLOATINFTY; s[len_b].ga = -FLOATINFTY; //s[len_b].gb = 0; s[len_b].gb = pa+prof1[29];//+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; prof2 += len_b << 6; fprof2 += len_b * stride; while(--j){ prof2 -= 64; fprof2 -= stride; ca = s[j].a; c = 1; if((pga += prof2[91]) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2 -= 32; for (f = 0; f < fdim;f++){ // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); pa += fprof1[f] * fprof2[f+fdim]; } s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]; if (s[j+1].ga+prof2[28] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } prof2 -= 64; fprof2 -= stride; //LAST CELL (0) ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2 -= 32; for (f = 0; f < fdim;f++){ // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); pa += fprof1[f] * fprof2[f+fdim]; } s[0].a = pa; s[0].ga = -FLOATINFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; fprof1 -= stride; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[0]; j = len_b; prof2 += len_b << 6; fprof2 += len_b * stride; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; //s[j].gb = -INFTY; s[len_b].gb = pa+prof1[29];//+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ prof2 -= 64; fprof2 -= stride; ca = s[j].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2-=32; for (f = 0; f < fdim;f++){ pa += fprof1[f] * fprof2[f+fdim]; // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); } s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; c |= 8; } pgb = s[j].gb; s[j].gb = -FLOATINFTY; tracep[j] = c; pa = ca; } prof2 -= 64; fprof2 -= stride; ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2-=32; for (f = 0; f < fdim;f++){ pa += fprof1[f] * fprof2[f+fdim]; // fprintf(stderr,"%d %d: %d\n",i,j,fprof1[pga] * fprof2[pga+fdim]); } s[0].a = pa; s[0].ga = s[1].a+prof2[27]+prof2[29]; if (s[1].ga+prof2[29] > s[0].ga){ s[0].ga = s[1].ga+prof2[29]; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb +prof1[29]> s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); f = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(f){ case 0: if (trace[i][j] & 2){ f = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ f = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ f = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ f = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } /* int* dna_pp_dyn(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b) { struct states* s = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a * 22; s[len_b].a = 0; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -INFTY; //s[j].ga = 0; s[j].ga = s[j+1].a+prof2[10];//+prof2[29]; if (s[j+1].ga+prof2[10] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[10]; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = -INFTY; i = len_a; while(--i){ prof1 -= 22; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -INFTY; s[len_b].ga = -INFTY; //s[len_b].gb = 0; s[len_b].gb = pa+prof1[10];//+prof1[29]; if(pgb+prof1[10] > s[len_b].gb){ s[len_b].gb = pgb+prof1[10]; } tracep[len_b] = 16; j = len_b; prof2 += len_b *22; while(--j){ prof2 -= 22; ca = s[j].a; c = 1; if((pga += prof2[30]) > pa){ pa = pga; c = 2; } if((pgb += prof1[30]) > pa){ pa = pgb; c = 4; } prof2 += 11; for (pga = 8;pga--;){ pa += prof1[pga]*prof2[pga]; } prof2 -= 11; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[8]; if (s[j+1].ga+prof2[9] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[9]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[8]; if(pgb+prof1[9] > s[j].gb){ s[j].gb = pgb+prof1[9]; c |= 16; } tracep[j] = c; pa = ca; } prof2 -= 22; //LAST CELL (0) ca = s[0].a; c = 1; if((pga+=prof2[30]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[30]) > pa){ pa = pgb; c = 4; } prof2 += 11; for (pga = 8;pga--;){ pa += prof1[pga]*prof2[pga]; } prof2 -= 11; s[0].a = pa; s[0].ga = -INFTY; pgb = s[0].gb; s[0].gb = ca+prof1[8]+prof1[10]; if(pgb+prof1[10] > s[0].gb){ s[0].gb = pgb+prof1[10]; c |= 16; } tracep[0] = c; } prof1 -= 22; tracep = trace[0]; j = len_b; prof2 += len_b *22; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -INFTY; s[j].ga = -INFTY; //s[j].gb = -INFTY; s[len_b].gb = pa+prof1[10];//+prof1[29]; if(pgb+prof1[10] > s[len_b].gb){ s[len_b].gb = pgb+prof1[10]; } while(--j){ prof2 -= 22; ca = s[j].a; c = 1; if((pga+=prof2[30]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[30]) > pa){ pa = pgb; c = 4; } prof2+=11; for (pga = 8;pga--;){ pa += prof1[pga]*prof2[pga]; } prof2-=11; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[2]+prof2[10]; if (s[j+1].ga+prof2[10] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[10]; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } prof2 -= 22; ca = s[0].a; c = 1; if((pga+=prof2[30]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[30]) > pa){ pa = pgb; c = 4; } prof2+=11; for (pga = 8;pga--;){ pa += prof1[pga]*prof2[pga]; } prof2-=11; s[0].a = pa; s[0].ga = s[1].a+prof2[8]+prof2[10]; if (s[1].ga+prof2[10] > s[0].ga){ s[0].ga = s[1].ga+prof2[10]; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[8]+prof1[10]; if(pgb +prof1[10]> s[0].gb){ s[0].gb = pgb+prof1[10]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); ca = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ ca = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ ca = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ ca = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* pp_dyn2(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b) { unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; s[len_b].a = 0; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -INFTY; s[j].ga = s[j+1].a+prof2[29];//+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = -INFTY; i = len_a; while(--i){ prof1 -= 64; c = 1; for (j = 23; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -INFTY; s[len_b].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; prof2 += len_b << 6; while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga += prof2[91]) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]; if (s[j+1].ga+prof2[28] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } prof2 -= 64; //LAST CELL (0) ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[0].a = pa; s[0].ga = -INFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; c = 1; for (j = 23; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[0]; j = len_b; prof2 += len_b << 6; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -INFTY; s[j].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } prof2 -= 64; ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[0].a = pa; s[0].ga = s[1].a+prof2[27]+prof2[29]; if (s[1].ga+prof2[29] > s[0].ga){ s[0].ga = s[1].ga+prof2[29]; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb +prof1[29]> s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } ca = c; int ga = 1; int gb = 1; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ if(i ==0 || j == 0){ path[c] |= 128; } if(i ==len_a || j == len_b){ path[c] |= 64; } switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; }else if (trace[i][j] & 4){ ca = 2; } path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ ca = 1; }else{ path[c-(gb-1)] |= gb << 16; gb = 0; ca = 0; } path[c] |= 1; j++; gb++; break; case 2: if(trace[i][j] & 16){ ca = 2; }else{ path[c-(ga-1)] |= ga << 16; ga = 0; ca = 0; } path[c] |= 2; i++; ga++; break; } c++; } if (ca == 1){ path[c-(gb-1)] |= (gb-1) << 16; } if(ca == 2){ path[c-(ga-1)] |= (ga-1) << 16; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* ps_dyn2(int* path, struct dp_matrix *dp,const int* prof1,const int* seq2,const int len_a,const int len_b,int sip) { struct states* s = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; const int open = gpo * sip; const int ext = gpe *sip; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; s[len_b].a = 0; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -INFTY; s[j].ga = s[j+1].a-tgpe; if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = -INFTY; i = len_a; while(--i){ prof1 -= 64; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -INFTY; s[len_b].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; while(--j){ ca = s[j].a; c = 1; if((pga -= open) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32 + seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-open; if (s[j+1].ga-ext > s[j].ga){ s[j].ga = s[j+1].ga-ext; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } ca = s[0].a; c = 1; if((pga-=open) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32+seq2[0]]; s[0].a = pa; s[0].ga = -INFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; tracep = trace[0]; j = len_b; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -INFTY; s[j].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ ca = s[j].a; c = 1; if((pga-=open) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32+seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-(open+tgpe); if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } ca = s[0].a; c = 1; if((pga-=open) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32+seq2[0]]; s[0].a = pa; s[0].ga = s[1].a-(open+tgpe); if (s[1].ga-tgpe > s[0].ga){ s[0].ga = s[1].ga-tgpe; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } ca = c; int ga = 1; int gb = 1; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ if(i ==0 || j == 0){ path[c] |= 128; } if(i ==len_a || j == len_b){ path[c] |= 64; } switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; }else if (trace[i][j] & 4){ ca = 2; } path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ ca = 1; }else{ path[c-(gb-1)] |= gb << 16; gb = 0; ca = 0; } path[c] |= 1; j++; gb++; break; case 2: if(trace[i][j] & 16){ ca = 2; }else{ path[c-(ga-1)] |= ga << 16; ga = 0; ca = 0; } path[c] |= 2; i++; ga++; break; } c++; } if (ca == 1){ path[c-(gb-1)] |= (gb-1) << 16; } if(ca == 2){ path[c-(ga-1)] |= (ga-1) << 16; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* ss_dyn2(int**subm,int* path, struct dp_matrix *dp,const int* seq1,const int* seq2,const int len_a,const int len_b) { struct states* s = 0; int *subp = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; s[len_b].a = 0; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -INFTY; s[j].ga = s[j+1].a-tgpe; if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = -INFTY; i = len_a; while(--i){ tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -INFTY; s[len_b].ga = -INFTY; s[len_b].gb = pa-tgpe; if(pgb-tgpe > s[len_b].gb){ s[len_b].gb = pgb-tgpe; } tracep[len_b] = 16; j = len_b; subp = subm[seq1[i]]; while(--j){ ca = s[j].a; c = 1; if((pga -= gpo) > pa){ pa = pga; c = 2; } if((pgb -= gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-gpo; if (s[j+1].ga-gpe > s[j].ga){ s[j].ga = s[j+1].ga-gpe; c |= 8; } pgb = s[j].gb; s[j].gb = ca-gpo; if(pgb-gpe > s[j].gb){ s[j].gb = pgb-gpe; c |= 16; } tracep[j] = c; pa = ca; } ca = s[0].a; c = 1; if((pga-=gpo) > pa){ pa = pga; c = 2; } if((pgb-=gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[0]]; s[0].a = pa; s[0].ga = -INFTY; pgb = s[0].gb; s[0].gb = ca-(gpo+tgpe); if(pgb-tgpe > s[0].gb){ s[0].gb = pgb-tgpe; c |= 16; } tracep[0] = c; } subp = subm[seq1[0]]; tracep = trace[0]; j = len_b; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -INFTY; s[j].ga = -INFTY; s[j].gb = pa-tgpe; if(pgb-tgpe > s[j].gb){ s[j].gb = pgb-tgpe; } while(--j){ ca = s[j].a; c = 1; if((pga-=gpo) > pa){ pa = pga; c = 2; } if((pgb-=gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-(gpo+tgpe); if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } ca = s[0].a; c = 1; if((pga-=gpo) > pa){ pa = pga; c = 2; } if((pgb-=gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[0]]; s[0].a = pa; s[0].ga = s[1].a-(gpo+tgpe); if (s[1].ga-tgpe > s[0].ga){ s[0].ga = s[1].ga-tgpe; c |= 8; } pgb = s[0].gb; s[0].gb = ca-(gpo+tgpe); if(pgb-tgpe > s[0].gb){ s[0].gb = pgb-tgpe; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } ca = c; int ga = 1; int gb = 1; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ if(i ==0 || j == 0){ path[c] |= 128; } if(i ==len_a || j == len_b){ path[c] |= 64; } switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; }else if (trace[i][j] & 4){ ca = 2; } path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ ca = 1; }else{ path[c-(gb-1)] |= gb << 16; gb = 0; ca = 0; } path[c] |= 1; j++; gb++; break; case 2: if(trace[i][j] & 16){ ca = 2; }else{ path[c-(ga-1)] |= ga << 16; ga = 0; ca = 0; } path[c] |= 2; i++; ga++; break; } c++; } if (ca == 1){ path[c-(gb-1)] |= (gb-1) << 16; } if(ca == 2){ path[c-(ga-1)] |= (ga-1) << 16; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* aapp_dyn(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b,const int mmbonus) { unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; s[len_b].a = 0; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -INFTY; s[j].ga = s[j+1].a+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = -INFTY; i = len_a; while(--i){ prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[i]; pa = s[len_b].a + mmbonus; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -INFTY; s[len_b].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; prof2 += len_b << 6; while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga += prof2[91]) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]; if (s[j+1].ga+prof2[28] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca+ mmbonus; } prof2 -= 64; //LAST CELL (0) ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[0].a = pa; s[0].ga = -INFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[0]; j = len_b; prof2 += len_b << 6; pa = s[j].a+ mmbonus; pga = s[j].ga; pgb = s[j].gb; s[j].a = -INFTY; s[j].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca+ mmbonus; } prof2 -= 64; ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[0].a = pa; s[0].ga = s[1].a+prof2[27]+prof2[29]; if (s[1].ga+prof2[29] > s[0].ga){ s[0].ga = s[1].ga+prof2[29]; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb +prof1[29]> s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); ca = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ ca = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ ca = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ ca = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; }*/ int* pp_dyn(int* path, struct dp_matrix *dp,const float* prof1,const float* prof2,const int len_a,const int len_b) { unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; register int f = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; s[len_b].a = 0.0; s[len_b].ga = -FLOATINFTY; s[len_b].gb = -FLOATINFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -FLOATINFTY; s[j].ga = s[j+1].a+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -FLOATINFTY; s[0].ga = -FLOATINFTY; s[0].gb = -FLOATINFTY; i = len_a; while(--i){ prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -FLOATINFTY; s[len_b].ga = -FLOATINFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; prof2 += len_b << 6; while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga += prof2[91]) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]; if (s[j+1].ga+prof2[28] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } prof2 -= 64; //LAST CELL (0) ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2 -= 32; s[0].a = pa; s[0].ga = -FLOATINFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[0]; j = len_b; prof2 += len_b << 6; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2-=32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; c |= 8; } pgb = s[j].gb; s[j].gb = -FLOATINFTY; tracep[j] = c; pa = ca; } prof2 -= 64; ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (f = freq[0];--f;){ pa += prof1[freq[f]]*prof2[freq[f]]; } prof2-=32; s[0].a = pa; s[0].ga = s[1].a+prof2[27]+prof2[29]; if (s[1].ga+prof2[29] > s[0].ga){ s[0].ga = s[1].ga+prof2[29]; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb +prof1[29]> s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); f = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(f){ case 0: if (trace[i][j] & 2){ f = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ f = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ f = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ f = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* ps_dyn(int* path, struct dp_matrix *dp,const float* prof1,const int* seq2,const int len_a,const int len_b,int sip) { struct states* s = 0; char** trace = 0; char* tracep = 0; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; register int f = 0; const float open = gpo * sip; const float ext = gpe *sip; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; s[len_b].a = 0.0; s[len_b].ga = -FLOATINFTY; s[len_b].gb = -FLOATINFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -FLOATINFTY; //s[j].ga = 0; s[j].ga = s[j+1].a-tgpe;//-topen; if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; } s[j].gb = -FLOATINFTY; tracep[j] = 8; } s[0].a = -FLOATINFTY; s[0].ga = -FLOATINFTY; s[0].gb = -FLOATINFTY; i = len_a; while(--i){ prof1 -= 64; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -FLOATINFTY; s[len_b].ga = -FLOATINFTY; //s[len_b].gb = 0; s[len_b].gb = pa+prof1[29];//+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; while(--j){ ca = s[j].a; c = 1; if((pga -= open) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32 + seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-open; if (s[j+1].ga-ext > s[j].ga){ s[j].ga = s[j+1].ga-ext; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } //LAST CELL (0) ca = s[0].a; c = 1; if((pga-=open) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32+seq2[0]]; s[0].a = pa; s[0].ga = -FLOATINFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; tracep = trace[0]; j = len_b; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; //s[j].gb = -INFTY; s[len_b].gb = pa+prof1[29];//+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ ca = s[j].a; c = 1; if((pga-=open) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32+seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-(open+tgpe); if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } ca = s[0].a; c = 1; if((pga-=open) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } pa += prof1[32+seq2[0]]; s[0].a = pa; s[0].ga = s[1].a-(open+tgpe); if (s[1].ga-tgpe > s[0].ga){ s[0].ga = s[1].ga-tgpe; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); f = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(f){ case 0: if (trace[i][j] & 2){ f = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ f = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ f = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ f = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } int* ss_dyn(float**subm,int* path, struct dp_matrix *dp,const int* seq1,const int* seq2,const int len_a,const int len_b) { struct states* s = 0; const float *subp = 0; char** trace = 0; char* tracep = 0; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; register int f = 0; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; s[len_b].a = 0.0; s[len_b].ga = -FLOATINFTY; s[len_b].gb = -FLOATINFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -FLOATINFTY; //s[j].ga = 0; s[j].ga = s[j+1].a-tgpe;//-gpo; if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; } s[j].gb = -FLOATINFTY; tracep[j] = 8; } s[0].a = -FLOATINFTY; s[0].ga = -FLOATINFTY; s[0].gb = -FLOATINFTY; i = len_a; while(--i){ tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -FLOATINFTY; s[len_b].ga = -FLOATINFTY; //s[len_b].gb = 0; s[len_b].gb = pa-tgpe;//-gpo; if(pgb-tgpe > s[len_b].gb){ s[len_b].gb = pgb-tgpe; } tracep[len_b] = 16; j = len_b; subp = subm[seq1[i]]; while(--j){ ca = s[j].a; c = 1; if((pga -= gpo) > pa){ pa = pga; c = 2; } if((pgb -= gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-gpo; if (s[j+1].ga-gpe > s[j].ga){ s[j].ga = s[j+1].ga-gpe; c |= 8; } pgb = s[j].gb; s[j].gb = ca-gpo; if(pgb-gpe > s[j].gb){ s[j].gb = pgb-gpe; c |= 16; } tracep[j] = c; pa = ca; } //LAST CELL (0) ca = s[0].a; c = 1; if((pga-=gpo) > pa){ pa = pga; c = 2; } if((pgb-=gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[0]]; s[0].a = pa; s[0].ga = -FLOATINFTY; pgb = s[0].gb; s[0].gb = ca-(gpo+tgpe); if(pgb-tgpe > s[0].gb){ s[0].gb = pgb-tgpe; c |= 16; } tracep[0] = c; } subp = subm[seq1[0]]; tracep = trace[0]; j = len_b; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; s[j].gb = pa-tgpe;//-gpo; if(pgb-tgpe > s[j].gb){ s[j].gb = pgb-tgpe; } //s[j].gb = -INFTY; while(--j){ ca = s[j].a; c = 1; if((pga-=gpo) > pa){ pa = pga; c = 2; } if((pgb-=gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a-(gpo+tgpe); if (s[j+1].ga-tgpe > s[j].ga){ s[j].ga = s[j+1].ga-tgpe; c |= 8; } pgb = s[j].gb; s[j].gb = -FLOATINFTY; tracep[j] = c; pa = ca; } ca = s[0].a; c = 1; if((pga-=gpo) > pa){ pa = pga; c = 2; } if((pgb-=gpo) > pa){ pa = pgb; c = 4; } pa += subp[seq2[0]]; s[0].a = pa; s[0].ga = s[1].a-(gpo+tgpe); if (s[1].ga-tgpe > s[0].ga){ s[0].ga = s[1].ga-tgpe; c |= 8; } pgb = s[0].gb; s[0].gb = ca-(gpo+tgpe); if(pgb-tgpe > s[0].gb){ s[0].gb = pgb-tgpe; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } f = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(f){ case 0: if (trace[i][j] & 2){ f = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ f = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ f = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ f = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ f = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } kalign2_feature.c0000644001210100001440000010245211577654214013477 0ustar olifriusers/* kalign2_feature.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_feature.h" static int stride; static int dim; static int gpo_pos; static int gpe_pos; static int tgpe_pos; int** feature_hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,struct feature_matrix* fm) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; stride = (26+fm->mdim)*2 + 3; dim = 26+fm->mdim; gpo_pos = (dim << 1) + 0; gpe_pos = (dim << 1) + 1; tgpe_pos = (dim << 1) + 2; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq){ profile[a] = make_unified_profile(profile[a],aln,a,submatrix,fm); } set_unified_gap_penalties(profile[a],len_a,aln->nsip[b]); if (b < numseq){ profile[b] = make_unified_profile(profile[b],aln,b,submatrix,fm); } set_unified_gap_penalties(profile[b],len_b,aln->nsip[a]); hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; //dim = 26; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(len_a < len_b){ // fprintf(stderr,"normal\n"); map[c] = feature_hirsch_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ // fprintf(stderr,"goofy\n"); hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = feature_hirsch_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*stride*(map[c][0]+2)); profile[c] = feature_hirschberg_update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); free_feature_matrix(fm); return map; } float* feature_hirschberg_update(const float* profa,const float* profb,float* newp,int* path,int sipa,int sipb) { int i,j,c; for (i = stride; i--;){ newp[i] = profa[i] + profb[i]; } profa += stride; profb += stride; newp += stride; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = stride; i--;){ newp[i] = profa[i] + profb[i]; } profa += stride; profb += stride; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = stride; i--;){ newp[i] = profb[i]; } profb += stride; if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += sipa;//1; i = tgpe*sipa; }else{ newp[24] += sipa;//1; i = gpe*sipa; } for (j = dim; j < dim+23;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += sipa;//1; i = tgpe*sipa; newp[23] += sipa;//1; i += gpo*sipa; }else{ newp[23] += sipa;//1; i = gpo*sipa; } for (j = dim; j < dim+23;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += sipa;//1; i = tgpe*sipa; newp[23] += sipa;//1; i += gpo*sipa; }else{ newp[23] += sipa;//1; i = gpo*sipa; } for (j = dim; j < dim+23;j++){ newp[j] -=i; } } } } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = stride; i--;){ newp[i] = profa[i]; } profa+=stride; if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += sipb;//1; i = tgpe*sipb; }else{ newp[24] += sipb;//1; i = gpe*sipb; } for (j = dim; j < dim+23;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += sipb;//1; i = tgpe*sipb; newp[23] += sipb;//1; i += gpo*sipb; }else{ newp[23] += sipb;//1; i = gpo*sipb; } for (j = dim; j < dim+23;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += sipb;//1; i = tgpe*sipb; newp[23] += sipb;//1; i += gpo*sipb; }else{ newp[23] += sipb;//1; i = gpo*sipb; } for (j = dim; j < dim+23;j++){ newp[j] -=i; } } } } newp += stride; c++; } for (i = stride; i--;){ newp[i] = profa[i] + profb[i]; } newp -= (path[0]+1) * stride; return newp; } float* make_unified_profile(float* prof,struct alignment* aln, int num,float** subm,struct feature_matrix* fm) { struct feature* f = aln->ft[num]; int i,j,c; int* seq = aln->s[num]; //detemine minimim width of profile... //stride = (26+fm->mdim)*2 + 3; int len = aln->sl[num]; prof = malloc(sizeof(float)*(len+2)*stride); prof += (stride *(len+1)); for (i = 0;i < stride;i++){ prof[i] = 0; } prof[23+dim] = -gpo; prof[24+dim] = -gpe; prof[25+dim] = -tgpe; i = len; while(i--){ prof -= stride; for (j = 0;j < stride;j++){ prof[j] = 0; } c = seq[i]; prof[c] += 1; prof += dim; for(j = 0; j < 23;j++){ prof[j] = subm[c][j]; } prof[23] = -gpo; prof[24] = -gpe; prof[25] = -tgpe; prof -= dim; } prof -= stride; for (i = 0;i < stride;i++){ prof[i] = 0; } prof[23+dim] = -gpo; prof[24+dim] = -gpe; prof[25+dim] = -tgpe; while(f){ if(f->color != -1){ if(f->start < len && f->end < len){ for (i = f->start;i <= f->end;i++){ prof[i*stride+26 + f->color] += 1; //prof[i*stride+dim+26 + f->color] += 75; //fprintf(stderr,"FOUND on %d : %s %s\n",num,f->type,f->note); for ( j = 0 ; j < fm->mdim ;j++){ prof[i*stride+dim+26+j] += fm->m[f->color][j]; } } } } f = f->next; } //exit(0); return prof; } void set_unified_gap_penalties(float* prof,int len,int nsip) { int i; prof += (stride *(len+1)); prof[gpo_pos] = prof[dim+23]*nsip; prof[gpe_pos] = prof[dim+24]*nsip; prof[tgpe_pos] = prof[dim+25]*nsip; i = len+1; while(i--){ prof -= stride; prof[gpo_pos] = prof[dim+23]*nsip; prof[gpe_pos] = prof[dim+24]*nsip; prof[tgpe_pos] = prof[dim+25]*nsip; } } int* feature_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; //fprintf(stderr,"starta:%d enda:%d startb:%d endb:%d mid:%d\n",hm->starta,hm->enda,hm->startb,hm->endb,mid); if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = feature_foward_hirsch_pp_dyn(prof1,prof2,hm); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = feature_backward_hirsch_pp_dyn(prof1,prof2,hm); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = feature_hirsch_align_two_pp_vector(prof1,prof2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* feature_hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path, float input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -FLOATINFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; prof1+= (stride * (old_cor[4]+1)); //prof2 += stride * (hm->startb); //i = hm->startb; prof2 += stride * (old_cor[2]); i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; prof2 += stride; //fprintf(stderr,"%d %d %d \n",f[i].a,b[i].a,max); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga+prof2[gpo_pos]-sub > max){ max = f[i].a+b[i].ga+prof2[gpo_pos]-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[gpo_pos] -sub> max){ max = f[i].a+b[i].gb+prof1[gpo_pos]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a+prof2[gpo_pos]-sub > max){ max = f[i].ga+b[i].a+prof2[gpo_pos]-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[tgpe_pos]-sub > max){ max = f[i].gb+b[i].gb+prof1[tgpe_pos]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[gpe_pos]-sub > max){ max = f[i].gb+b[i].gb+prof1[gpe_pos]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[gpo_pos]-sub > max){ max = f[i].gb+b[i].a+prof1[gpo_pos]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[gpo_pos]-sub > max){ max = f[i].a+b[i].gb+prof1[gpo_pos]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[tgpe_pos]-sub > max){ max = f[i].gb+b[i].gb+prof1[tgpe_pos]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[gpe_pos]-sub > max){ max = f[i].gb+b[i].gb+prof1[gpe_pos]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (stride * (old_cor[4]+1)); //prof2 -= hm->endb << 6; prof2 -= old_cor[3] * stride; //fprintf(stderr,"Transition:%d at:%d\n",transition,c); //if(transition == -1){ // exit(0); //} j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = feature_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; } return hirsch_path; } struct states* feature_foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { unsigned int freq[dim]; struct states* s = hm->f; //const int starta = hm->starta; //const int enda = hm->enda; //const int startb = hm->startb; //const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->starta) * stride; prof2 += (hm->startb) * stride; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; if(hm->startb == 0){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=stride; s[j].a = -FLOATINFTY; //s[j].ga = s[j-1].a+prof2[tgpe_pos]; //if (s[j-1].ga+prof2[tgpe_pos] > s[j].ga){ // s[j].ga = s[j-1].ga+prof2[tgpe_pos]; //} if(s[j-1].ga > s[j-1].a){ s[j].ga = s[j-1].ga+prof2[tgpe_pos]; }else{ s[j].ga = s[j-1].a+prof2[tgpe_pos]; } s[j].gb = -FLOATINFTY; } prof2+=stride; }else{ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=stride; s[j].a = -FLOATINFTY; //s[j].ga = s[j-1].a+prof2[gpo_pos]; //if (s[j-1].ga+prof2[gpe_pos] > s[j].ga){ // s[j].ga = s[j-1].ga+prof2[gpe_pos]; //} if(s[j-1].ga+prof2[gpe_pos] > s[j-1].a+prof2[gpo_pos]){ s[j].ga = s[j-1].ga+prof2[gpe_pos]; }else{ s[j].ga = s[j-1].a+prof2[gpo_pos]; } s[j].gb = -FLOATINFTY; // prof2+=64; } prof2+=stride; } prof2 -= (hm->endb-hm->startb) * stride; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; s[hm->endb].gb = -FLOATINFTY; for (i = hm->starta;i < hm->enda;i++){ prof1 += stride; c = 1; for (j = 0;j < dim; j++){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; if(hm->startb == 0){ s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; //s[hm->startb].gb = pa+prof1[tgpe_pos]; //if(pgb+prof1[tgpe_pos] > s[hm->startb].gb){ // s[hm->startb].gb = pgb+prof1[tgpe_pos]; //} if(pgb > pa){ s[hm->startb].gb = pgb+prof1[tgpe_pos]; }else{ s[hm->startb].gb = pa+prof1[tgpe_pos]; } }else{ s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; //s[hm->startb].gb = pa+prof1[gpo_pos]; //if(pgb+prof1[gpe_pos] > s[hm->startb].gb){ // s[hm->startb].gb = pgb+prof1[gpe_pos]; //} if(pgb+prof1[gpe_pos] > pa+prof1[gpo_pos]){ s[hm->startb].gb = pgb+prof1[gpe_pos]; }else{ s[hm->startb].gb = pa+prof1[gpo_pos]; } } for (j = hm->startb+1; j <= hm->endb;j++){ prof2 += stride; ca = s[j].a; /*pga += prof2[-37]; pga = pa - pga; pa = pa -((pga>>31)&pga); pgb += prof1[-37]; pa = pa -(((pa - pgb)>>31)&(pa -pgb));*/ //fprintf(stderr,"%d %d %d %p %d\n",i,j,gpo_pos-stride,prof2); if((pga += prof2[gpo_pos-stride]) > pa){ pa = pga; } if((pgb += prof1[gpo_pos-stride]) > pa){ pa = pgb; } prof2 += dim; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= dim; s[j].a = pa; pga = s[j].ga; //s[j].ga = s[j-1].a+prof2[gpo_pos]; //if (s[j-1].ga+prof2[gpe_pos] > s[j].ga){ // s[j].ga = s[j-1].ga+prof2[gpe_pos]; //} if(s[j-1].ga+prof2[gpe_pos] > s[j-1].a+prof2[gpo_pos]){ s[j].ga = s[j-1].ga+prof2[gpe_pos]; }else{ s[j].ga = s[j-1].a+prof2[gpo_pos]; } pgb = s[j].gb; //s[j].gb = ca+prof1[gpo_pos]; //if(pgb+prof1[gpe_pos] > s[j].gb){ // s[j].gb = pgb+prof1[gpe_pos]; //} if(pgb+prof1[gpe_pos] > ca+prof1[gpo_pos]){ s[j].gb = pgb+prof1[gpe_pos]; }else{ s[j].gb = ca+prof1[gpo_pos]; } pa = ca; } prof2 -= (hm->endb-hm->startb) * stride; } prof1 -= stride * (hm->enda); return s; } struct states* feature_backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { unsigned int freq[dim]; struct states* s = hm->b; //const int starta = hm->starta; //const int enda = hm->enda; //const int startb = hm->startb; //const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->enda+1) * stride; prof2 += (hm->endb+1) * stride; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(hm->endb == hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= stride; s[j].a = -FLOATINFTY; //s[j].ga = s[j+1].a+prof2[tgpe_pos]; //if (s[j+1].ga+prof2[tgpe_pos] > s[j].ga){ // s[j].ga = s[j+1].ga+prof2[tgpe_pos]; //} if(s[j+1].ga > s[j+1].a){ s[j].ga = s[j+1].ga+prof2[tgpe_pos]; }else{ s[j].ga = s[j+1].a+prof2[tgpe_pos]; } s[j].gb = -FLOATINFTY; } prof2 -= stride; }else{ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= stride; s[j].a = -FLOATINFTY; //s[j].ga = s[j+1].a+prof2[gpo_pos]; //if (s[j+1].ga+prof2[gpe_pos] > s[j].ga){ // s[j].ga = s[j+1].ga+prof2[gpe_pos]; //} if(s[j+1].ga+prof2[gpe_pos] > s[j+1].a+prof2[gpo_pos]){ s[j].ga = s[j+1].ga+prof2[gpe_pos]; }else{ s[j].ga = s[j+1].a+prof2[gpo_pos]; } s[j].gb = -FLOATINFTY; // prof2 -= 64; } prof2 -= stride; } s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = -FLOATINFTY; // prof2 -= (endb -startb) << 6; i = hm->enda-hm->starta; while(i--){ prof1 -= stride; c = 1; for (j = 0;j < dim; j++){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; if(hm->endb == hm->len_b){ //s[hm->endb].gb = pa+prof1[tgpe_pos]; //if(pgb+prof1[tgpe_pos] > s[hm->endb].gb){ // s[hm->endb].gb = pgb+prof1[tgpe_pos]; //} if(pgb > pa){ s[hm->endb].gb = pgb+prof1[tgpe_pos]; }else{ s[hm->endb].gb = pa+prof1[tgpe_pos]; } }else{ //s[hm->endb].gb = pa+prof1[gpo_pos]; //if(pgb+prof1[gpe_pos] > s[hm->endb].gb){ // s[hm->endb].gb = pgb+prof1[gpe_pos]; //} if(pgb+prof1[gpe_pos] > pa+prof1[gpo_pos]){ s[hm->endb].gb = pgb+prof1[gpe_pos]; }else{ s[hm->endb].gb = pa+prof1[gpo_pos]; } } //j = endb-startb; prof2 += (hm->endb-hm->startb) * stride; //while(j--){ for(j = hm->endb-1;j >= hm->startb;j--){ prof2 -= stride; ca = s[j].a; if((pga += prof2[stride+ gpo_pos]) > pa){ pa = pga; } if((pgb += prof1[stride+gpo_pos]) > pa){ pa = pgb; } prof2 += dim; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= dim; s[j].a = pa; pga = s[j].ga; //s[j].ga = s[j+1].a+prof2[gpo_pos]; //if (s[j+1].ga+prof2[gpe_pos] > s[j].ga){ // s[j].ga = s[j+1].ga+prof2[gpe_pos]; //} if(s[j+1].ga+prof2[gpe_pos] > s[j+1].a+prof2[gpo_pos]){ s[j].ga = s[j+1].ga+prof2[gpe_pos]; }else{ s[j].ga = s[j+1].a+prof2[gpo_pos]; } pgb = s[j].gb; //s[j].gb = ca+prof1[gpo_pos]; //if(pgb+prof1[gpe_pos] > s[j].gb){ // s[j].gb = pgb+prof1[gpe_pos]; //} if(pgb+prof1[gpe_pos] > ca+prof1[gpo_pos]){ s[j].gb = pgb+prof1[gpe_pos]; }else{ s[j].gb = ca+prof1[gpo_pos]; } pa = ca; } } return s; } struct feature_matrix* get_feature_matrix(struct feature_matrix* fm, struct alignment* aln,struct parameters* param) { struct utype_ufeat* utf = 0; struct feature* n = 0; struct feature* p = 0; int i = 0; int j = 0; char* requested_feature = param->feature_type; utf = get_unique_features(aln,utf); fm = malloc(sizeof(struct feature_matrix)); if (byg_start(requested_feature,"allALL")!= -1){ n = utf->f; i = 0; while(n){ n->color = i; i++; n = n->next; } }else if(byg_start(requested_feature,"maxplpMAXPLP")!= -1){ n = utf->f; i = 0; while(n){ if(byg_start("SIGNAL PEPTIDE",n->note)!= -1){ n->color = 0; } if(byg_start("TRANSMEMBRANE",n->note)!= -1){ n->color = 1; } if(byg_start("TRANSLOCATED LOOP",n->note)!= -1){ n->color = 2; } if(byg_start("CYTOPLASMIC LOOP",n->note)!= -1){ n->color = 3; } n = n->next; } i = 4; }else{ n = utf->f; i = 0; while(n){ if(check_identity(requested_feature,n->type)!= -1){ //fprintf(stderr,"%s %s\n",requested_feature,n->type); n->color = i; i++; }else{ n->color = -1; } n = n->next; } } /*if(!i){ fprintf(stderr,"WARNING: no feature of type '%s' was found in the input file.\n",requested_feature); fprintf(stderr," \nAvailable features are:\n\n"); fprintf(stderr," Type Feature\n"); fprintf(stderr," -----------------------------\n"); n = utf->f; while(n){ fprintf(stderr," %s %s\n",n->type,n->note); n = n->next; } free_utf(utf); free_aln(aln); free(fm); return 0; }*/ if(byg_start(requested_feature,"maxplp")!= -1){ fm->mdim = 4; fm->stride = fm->mdim << 1; fm->m = malloc(sizeof(float*)*fm->mdim); for (i = 0;i < fm->mdim;i++){ fm->m[i] = malloc(sizeof(float)*fm->mdim); } /*1: 0.60 0.20 0.15 0.05 2: 0.20 0.60 0.10 0.10 3: 0.15 0.10 0.50 0.25 4: 0.05 0.10 0.25 0.60*/ fm->m[0][0] = 60; fm->m[0][1] = 20; fm->m[0][2] = 15; fm->m[0][3] = 5; fm->m[1][0] = 20; fm->m[1][1] = 60; fm->m[1][2] = 10; fm->m[1][3] = 10; fm->m[2][0] = 15; fm->m[2][1] = 10; fm->m[2][2] = 50; fm->m[2][3] = 25; fm->m[3][0] = 5; fm->m[3][1] = 10; fm->m[3][2] = 25; fm->m[3][3] = 60; }else if(byg_start(requested_feature,"wumanber")!= -1){ fm->mdim = i; fm->stride = fm->mdim << 1; fm->m = malloc(sizeof(float*)*fm->mdim); for (i = 0;i < fm->mdim;i++){ fm->m[i] = malloc(sizeof(float)*fm->mdim); for (j = 0;j < fm->mdim;j++){ fm->m[i][j] = 0; } } for (i = 0;i < fm->mdim;i++){ fm->m[i][i] = 100; } // fprintf(stderr,"WU %d \n",fm->mdim); }else{ fm->mdim = i; fm->stride = fm->mdim << 1; fm->m = malloc(sizeof(float*)*fm->mdim); for (i = 0;i < fm->mdim;i++){ fm->m[i] = malloc(sizeof(float)*fm->mdim); for (j = 0;j < fm->mdim;j++){ fm->m[i][j] = param->diff_feature_score; } } for (i = 0;i < fm->mdim;i++){ fm->m[i][i] = param->same_feature_score; } /*for (i = 0;i < fm->mdim;i++){ for (j = 0;j < fm->mdim;j++){ fprintf(stderr,"%f ",fm->m[i][j]); } fprintf(stderr,"\n"); }*/ } //float fr = 0.0; for (i = numseq;i--;){ n = aln->ft[i]; // fprintf(stderr,"SEQUENCE %d\n",i); while(n){ p = utf->f; while(p){ if(check_identity(requested_feature,n->type)!= -1){ if(check_identity(n->note,p->note)!= -1){ n->color = p->color; // fr += n->end - n->start+1; // fprintf(stderr,"SEQ:%d FEATURE FOUND:%s %s %d-%d color:%d \n",i,n->note,p->note,n->start,n->end,n->color); break; } } p = p->next; } n = n->next; } } // fprintf(stderr,"%f\n",fr); //float res = 0.0; //for (i = 0; i < numseq;i++){ // res += aln->sl[i]; //} //fprintf(stdout,"%f %f %f\n",fr,res,fr/res); //exit(0); /* n = utf->t; fprintf(stderr,"TYPES: we use:%d\n",i); while(n){ fprintf(stderr,"%s\n",n->type); n = n->next; } n = utf->f; fprintf(stderr,"Features:\n"); i = 0; while(n){ fprintf(stderr,"%d: %s:%s col:%d\n",i,n->type,n->note,n->color); i++; n = n->next; } fprintf(stderr,"REQUESTED FEATURE:%s\n",requested_feature); for (i = 0;i < fm->mdim;i++){ for (j = 0;j < fm->mdim;j++){ fprintf(stderr,"%d ",fm->m[i][j]); } fprintf(stderr,"\n"); } fprintf(stderr,"\n");*/ free_utf(utf); return fm; } struct utype_ufeat* get_unique_features(struct alignment* aln,struct utype_ufeat* utf) { int i; utf = malloc(sizeof(struct utype_ufeat)*1); utf->t = 0; utf->f = 0; for (i =0; i < numseq;i++){ utf = traverse_ft(utf,aln->ft[i]); } return utf; } struct utype_ufeat* traverse_ft(struct utype_ufeat* utf,struct feature* n) { if (n != NULL){ utf->t = add_unique_type(utf->t,n); utf->f = add_unique_feature(utf->f,n); traverse_ft(utf,n->next); } return utf; } struct feature* add_unique_feature(struct feature *n, struct feature *toadd) { int i; if (n == NULL){ n = (struct feature*) malloc(sizeof(struct feature)); n->type = malloc(sizeof(char)* (strlen(toadd->type)+1)); for ( i= 0;i < strlen(toadd->type);i++){ n->type[i] = toadd->type[i]; } n->type[i] = 0; n->note = malloc(sizeof(char)* (strlen(toadd->note)+1)); for ( i= 0;i < strlen(toadd->note);i++){ n->note[i] = toadd->note[i]; } n->note[i] = 0; n->start = toadd->end - toadd->start; n->end = 0; n->next = 0; }else{ if((check_identity(toadd->note,n->note)== -1)){ n->next = add_unique_feature(n->next,toadd); }else{ n->start += toadd->end - toadd->start; } } return n; } struct feature* add_unique_type(struct feature *n, struct feature *toadd) { int i; if (n == NULL){ n = (struct feature*) malloc(sizeof(struct feature)); n->type = malloc(sizeof(char)* (strlen(toadd->type)+1)); for ( i= 0;i < strlen(toadd->type);i++){ n->type[i] = toadd->type[i]; } n->type[i] = 0; n->note = malloc(sizeof(char)* (strlen(toadd->note)+1)); for ( i= 0;i < strlen(toadd->note);i++){ n->note[i] = toadd->note[i]; } n->note[i] = 0; n->start = 0; n->end = 0; n->next = 0; }else{ if((check_identity(toadd->type,n->type)== -1)){ n->next = add_unique_type(n->next,toadd); } } return n; } kalign2_feature.h0000644001210100001440000000317411577654214013505 0ustar olifriusers/* feature.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include float* feature_hirschberg_update(const float* profa,const float* profb,float* newp,int* path,int sipa,int sipb); float* make_unified_profile(float* prof,struct alignment* aln, int num,float** subm,struct feature_matrix* fm); void set_unified_gap_penalties(float* prof,int len,int nsip); int* feature_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path); int* feature_hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path, float input_states[],int old_cor[]); struct states* feature_foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); struct states* feature_backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); kalign2.h0000644001210100001440000003262411577654215011775 0ustar olifriusers/* kalign2.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include #include #include #define INFTY FLT_MAX #define FLOATINFTY FLT_MAX #define NODESIZE 16 #ifdef MEMORY #define tmalloc malloc #endif extern unsigned int numseq; extern unsigned int numprofiles; extern float gpo; extern float gpe; extern float tgpe; struct feature_matrix{ float** m; int mdim; int stride; }; struct utype_ufeat{ struct feature *t; struct feature *f; }; struct parameters{ char **infile; char *input; char *outfile; char* format; //int reformat; char* feature_type; char* alignment_type; char* feature_mode; char* distance; char* tree; char* sort; char* sub_matrix; char* print_tree; char* print_svg_tree; float gpo; float gpe; float tgpe; float secret; float zlevel; float same_feature_score; float diff_feature_score; int reformat; int id; int aa; int alter_gaps; int ntree; int help_flag; int quiet; int dna; float alter_range; int alter_weight; float internal_gap_weight; int smooth_window; float gap_inc; }; struct node{ struct node *next; int pos; }; struct names{ int* start; int* end; int* len; }; struct bignode{ struct bignode *next; unsigned int pos[NODESIZE]; unsigned int num; }; struct bignode* big_insert_hash(struct bignode *n,const unsigned int pos); void big_remove_nodes(struct bignode *n); void big_print_nodes(struct bignode *n); struct alignment{ //struct node** seq; struct feature** ft; struct sequence_info** si; unsigned int** sip; unsigned int* nsip; unsigned int* sl; unsigned int* lsn; int** s; char**seq; char** sn; }; struct sequence_info{ struct sequence_info* next; char* name; char* value; }; struct feature{ struct feature *next; char* type; char* note; int start; int end; int color; }; struct hirsch_mem{ struct states* f; struct states* b; int starta; int startb; int enda; int endb; int size; int len_a; int len_b; }; struct dp_matrix{ struct states* s; void* tb_mem; char** tb; int x; int y; }; struct states{ float a; float ga; float gb; float x; }; struct aln_tree_node{ struct aln_tree_node** links; int* internal_lables; int* path; int* profile; int* seq; int len; int done; int num; }; struct tree_node{ struct tree_node* left; struct tree_node*right; int label; int edge; }; struct ntree_data{ struct aln_tree_node* realtree; struct alignment* aln; float** profile; int** map; float**submatrix; int* tree; int ntree; }; struct alignment* sort_sequences(struct alignment* aln,int* tree,char* sort); struct aln_tree_node* real_upgma(float **dm,int ntree); int* readtree(struct aln_tree_node* p,int* tree); struct parameters* interface(struct parameters* param,int argc,char **argv); void parameter_message(struct parameters* param); struct dp_matrix* dp_matrix_alloc(struct dp_matrix *dp,int x,int y); struct dp_matrix* dp_matrix_realloc(struct dp_matrix *dp,int x,int y); void dp_matrix_free(struct dp_matrix *dp); struct alignment* detect_and_read_sequences(struct alignment* aln,struct parameters* param); void output(struct alignment* aln,struct parameters* param); int* upgma(float **dm,int* tree); int* nj(float **dm,int* tree); void print_simple_phylip_tree(struct aln_tree_node* p); struct alignment* make_dna(struct alignment* aln); float** read_matrix(float** subm,struct parameters* param); int* f_only_pp_dyn(int* path, struct dp_matrix *dp,const float* fprof1,const float* fprof2,const int len_a,const int len_b,int fdim,int stride); int* fpp_dyn(int* path, struct dp_matrix *dp,const float* prof1,const float* prof2,const float* fprof1,const float* fprof2,const int len_a,const int len_b,int fdim,int stride); int* dna_pp_dyn(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b); int* pp_dyn(int* path, struct dp_matrix *dp,const float* prof1,const float* prof2,const int len_a,const int len_b); int* ps_dyn(int* path, struct dp_matrix *dp,const float* prof1,const int* seq2,const int len_a,const int len_b,int sip); int* ss_dyn(float**subm,int* path, struct dp_matrix *dp,const int* seq1,const int* seq2,const int len_a,const int len_b); int* mirror_path(int* path); float* make_profile(float* prof,int* seq,int len, float** subm); float* dna_make_profile(float* prof,int* seq,int len, float** subm); float* update(const float*profa, const float* profb,float* newp,int* path,int sipa,int sipb); float* update_only_a(const float* profa, const float* profb,float* newp,int* path,int sipa,int sipb); float* dna_update(const float*profa,const float* profb,float* newp,int* path,int sipa,int sipb); float* dna_update_only_a(const float* profa, const float* profb, float* newp,int* path,int sipa,int sipb); void set_gap_penalties(float* prof,int len,int nsip,float strength,int nsip_c); void dna_set_gap_penalties(float* prof,int len,int nsip,float strength,int nsip_c); float** protein_pairwise_alignment_distance(struct alignment* aln,float** dm,struct parameters* param,float**subm, int nj); float get_distance_from_pairwise_alignment(int* path,int* seq1,int* seq2); float** protein_wu_distance2(struct alignment* si,float** dm,struct parameters* param); float protein_wu_distance_calculation2(struct node* hash[],int* seq,int seqlen,int diagonals,int mode); float** protein_wu_distance(struct alignment* si,float** dm,struct parameters* param, int nj); //float protein_wu_distance_calculation(struct node* hash[],int* seq,int seqlen,int diagonals,int mode); float protein_wu_distance_calculation(struct bignode* hash[], const int* seq, const int seqlen,const int diagonals, const float mode); float** dna_distance(struct alignment* si,float** dm,struct parameters* param,int nj); float dna_distance_calculation(struct bignode* hash[],int* p,int seqlen,int diagonals,float mode); int byg_detect(int* text,int n); int check_identity(char* n,char*m); int byg_count(char* pattern,char*text); int byg_start(char* pattern,char*text); int byg_end(char* pattern,char*text); struct node* insert(struct node *n, int pos); struct node* insert_hash(struct node *n, int pos); void remove_nodes(struct node *n); #ifndef MEMORY void* tmalloc(int size); #endif struct alignment* aln_alloc(struct alignment* aln); void free_aln(struct alignment* aln); void free_param(struct parameters* param); void free_ft(struct feature* n); int* pp_dyn2(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b); int* ps_dyn2(int* path, struct dp_matrix *dp,const int* prof1,const int* seq2,const int len_a,const int len_b,int sip); int* ss_dyn2(int**subm,int* path, struct dp_matrix *dp,const int* seq1,const int* seq2,const int len_a,const int len_b); float* make_profile2(float* prof, int* seq,int len, float** subm); void set_gap_penalties2(float* prof,int len,int nsip,int window,float strength); float* update2(const float* profa,const float* profb,float* newp,int* path,int sipa,int sipb,float internal_gap_weight); struct feature_matrix* get_feature_matrix(struct feature_matrix* fm, struct alignment* aln,struct parameters*param); void free_utf(struct utype_ufeat* utf); void free_feature_matrix(struct feature_matrix* fm); struct utype_ufeat* get_unique_features(struct alignment* aln,struct utype_ufeat* utf); struct utype_ufeat* traverse_ft(struct utype_ufeat* utf,struct feature* n); struct feature* add_unique_feature(struct feature *n, struct feature *toadd); struct feature* add_unique_type(struct feature *n, struct feature *toadd); int** default_alignment(struct alignment* aln,int* tree, float**submatrix, int** map); int** feature_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,struct feature_matrix* fm); int** test_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,float internal_gap_weight,int window,float strength); struct ntree_data* ntree_alignment(struct ntree_data* ntree_data); struct ntree_data* ntree_sub_alignment(struct ntree_data* ntree_data,int* tree,int num); float* make_feature_profile(float* prof,struct feature* f,int len,struct feature_matrix* fm); float* feature_update(const float* profa, const float* profb,float* newp,int* path,int stride); void printtree(struct aln_tree_node* p); struct ntree_data* alignntree(struct ntree_data* ntree_data,struct aln_tree_node* p); //int** alignntree(struct alignment* aln,int** submatrix, struct aln_tree_node* p,int** map,int ntree); void ntreeify(struct aln_tree_node* p,int ntree); struct tree_node* simpleinsert(struct tree_node* p,int target, int new_edge,int leaf_label); void printsimpleTree(struct tree_node* p); int* ticker(int* milometer,int elements); int* readsimpletree(struct tree_node* p,int* tree); int add_label_simpletree(struct tree_node* p,int* nodes,int i); //int** find_best_topology(struct alignment* aln,int**submatrix,int** map,int* leaves,int* nodes,int ntree); void free_real_tree(struct aln_tree_node* p); struct ntree_data* find_best_topology(struct ntree_data* ntree_data,int* leaves,int* nodes); void freesimpletree(struct tree_node* p); struct aln_tree_node* real_nj(float **dm,int ntree); //int** alter_gaps_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,int n,float range,int weight); //void add_feature_information_from_alignment(int* path,int* fprof1,int* fprof2,int weight); struct alignment* protein_wu_sw(struct node* hash[],struct alignment* aln,int a,int b); float protein_wu_distance_calculation3(struct node* hash[],int* seq,int seqlen,int diagonals,int mode); float* make_wu_profile(float* prof,float* wu,int len); //int** aa_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,int mmbonus); //int* aapp_dyn(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b,const int mmbonus); int** hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength); int** hirschberg_alignment_against_a(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength); //int* foward_pp_dyn(int* path, struct dp_matrix *dp,const float* prof1,const float* prof2,const int len_a,const int len_b); //int* backward_pp_dyn(int* path, struct dp_matrix *dp,const float* prof1,const float* prof2,const int len_a,const int len_b); struct hirsch_mem* hirsch_mem_alloc(struct hirsch_mem* hm,int x); struct hirsch_mem* hirsch_mem_realloc(struct hirsch_mem* hm,int x); void hirsch_mem_free(struct hirsch_mem* hm); int* mirror_hirsch_path(int* hirsch_path,int len_a,int len_b); int* add_gap_info_to_hirsch_path(int* hirsch_path,int len_a,int len_b); //DNA alignment via hirsch/Myer Miller int** dna_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,float strength); int** dna_alignment_against_a(struct alignment* aln,int* tree,float**submatrix, int** map,float strength); struct alignment* make_seq(struct alignment* aln,int a,int b,int* path); void update_gaps(int old_len,int*gis,int new_len,int *newgaps); //void print_alignment(struct alignment* aln); struct alignment* sort_in_relation(struct alignment* aln,char* sort); void quickSort(struct alignment* aln, int array_size); void q_sort(struct alignment* aln, int left, int right); void smooth_gaps(float* prof,int len,int window,float strength); int** advanced_hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength,float internal_gap_weight); int** simple_hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map); float* simple_make_profile(float* prof, int* seq,int len, float** subm); float* simple_update(float* profa,float* profb, float* newp,int* path); int* simple_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path); int* simple_hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); struct states* simple_foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); struct states* simple_backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); int** feature_hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,struct feature_matrix* fm); void profile_alignment_main(struct alignment* aln,struct parameters* param,float** submatrix); void increase_gaps(float* prof,int len,int window,float strength); struct names* names_alloc(struct names* n); void names_free(struct names* n); void print_tree(struct aln_tree_node* p,struct alignment* aln,char* outfile); void print_newick_tree(struct aln_tree_node* p,struct alignment* aln, FILE *fout); void print_phyloxml_tree(struct aln_tree_node* p,struct alignment* aln,FILE *fout); struct alignment* phylo (struct alignment* aln,char* outfile); kalign2_hirschberg.c0000644001210100001440000020111111577654215014155 0ustar olifriusers/* kalign2_hirschberg.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_hirschberg.h" #define MAX(a, b) (a > b ? a : b) #define MAX3(a,b,c) MAX(MAX(a,b),c) //#include int** hirschberg_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq){ profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix); }else{ set_gap_penalties(profile[a],len_a,aln->nsip[b],strength,aln->nsip[a]); //smooth_gaps(profile[a],len_a,window,strength); //increase_gaps(profile[a],len_a,window,strength); } if (b < numseq){ profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix); }else{ set_gap_penalties(profile[b],len_b,aln->nsip[a],strength,aln->nsip[b]); //smooth_gaps(profile[b],len_b,window,strength); //increase_gaps(profile[b],len_b,window,strength); } hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(a < numseq){ if(b < numseq){ map[c] = hirsch_ss_dyn(submatrix,aln->s[a],aln->s[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_ps_dyn(profile[b],aln->s[a],hm,map[c],aln->nsip[b]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } }else{ if(b < numseq){ map[c] = hirsch_ps_dyn(profile[a],aln->s[b],hm,map[c],aln->nsip[a]); }else{ if(len_a < len_b){ map[c] = hirsch_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } } } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*64*(map[c][0]+2)); profile[c] = update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } int** hirschberg_alignment_against_a(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq){ profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix); }else{ set_gap_penalties(profile[a],len_a,aln->nsip[b],0,aln->nsip[a]); //smooth_gaps(profile[a],len_a,window,strength); //increase_gaps(profile[a],len_a,window,strength); } if (b < numseq){ profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix); }else{ set_gap_penalties(profile[b],len_b,aln->nsip[a],0,aln->nsip[b]); //smooth_gaps(profile[b],len_b,window,strength); //increase_gaps(profile[b],len_b,window,strength); } hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(a < numseq){ if(b < numseq){ map[c] = hirsch_ss_dyn(submatrix,aln->s[a],aln->s[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_ps_dyn(profile[b],aln->s[a],hm,map[c],aln->nsip[b]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } }else{ if(b < numseq){ map[c] = hirsch_ps_dyn(profile[a],aln->s[b],hm,map[c],aln->nsip[a]); }else{ if(len_a < len_b){ map[c] = hirsch_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } } } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*64*(map[c][0]+2)); profile[c] = update_only_a(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } int* hirsch_ss_dyn(float**subm, const int* seq1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; //fprintf(stderr,"Forward:%d-%d %d-%d\n",hm->starta,hm->enda,hm->startb,hm->endb); hm->f = foward_hirsch_ss_dyn(subm,seq1,seq2,hm); hm->starta = mid; hm->enda = old_cor[1]; //fprintf(stderr,"Backward:%d-%d %d-%d\n",hm->starta,hm->enda,hm->startb,hm->endb); hm->b = backward_hirsch_ss_dyn(subm,seq1,seq2,hm); hirsch_path = hirsch_align_two_ss_vector(subm,seq1,seq2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* hirsch_align_two_ss_vector(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; //i = hm->startb; i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; // fprintf(stderr,"%d-%d %f\n",hm->startb,hm->endb,sub); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga-gpo-sub > max){ max = f[i].a+b[i].ga-gpo-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb -gpo-sub > max){ max = f[i].a+b[i].gb - gpo-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a - gpo-sub > max){ max = f[i].ga+b[i].a - gpo-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb - tgpe-sub > max){ max = f[i].gb+b[i].gb -tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb - gpe -sub> max){ max = f[i].gb+b[i].gb - gpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a - gpo-sub > max){ max = f[i].gb+b[i].a - gpo-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb-gpo-sub > max){ max = f[i].a+b[i].gb - gpo-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb -tgpe-sub > max){ max = f[i].gb+b[i].gb - tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb - gpe-sub > max){ max = f[i].gb+b[i].gb - gpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; } return hirsch_path; } struct states* foward_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm) { struct states* s = hm->f; float *subp = 0; const int starta = hm->starta; const int enda = hm->enda; const int startb =hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register float xa = 0; register float xga = 0; register int i = 0; register int j = 0; s[startb].a = s[0].a; s[startb].ga = s[0].ga; s[startb].gb = s[0].gb; if(startb){ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga - gpe,s[j-1].a-gpo); s[j].gb = -FLOATINFTY; } }else{ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga,s[j-1].a)-tgpe; s[j].gb = -FLOATINFTY; } } s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; s[endb].gb = -FLOATINFTY; seq2--; for (i = starta;i < enda;i++){ subp = subm[seq1[i]]; pa = s[startb].a; pga = s[startb].ga; pgb = s[startb].gb; s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; xa = s[startb].a; xga = s[startb].ga; if(startb){ s[startb].gb = MAX(pgb - gpe,pa - gpo); }else{ s[startb].gb = MAX(pgb,pa) - tgpe; } for (j = startb+1; j < endb;j++){ ca = s[j].a; pa = MAX3(pa,pga-gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = MAX(s[j-1].ga-gpe,s[j-1].a-gpo); s[j].ga = MAX(xga-gpe,xa-gpo); pgb = s[j].gb; s[j].gb = MAX(pgb-gpe ,ca-gpo); pa = ca; xa = s[j].a; xga = s[j].ga; } ca = s[j].a; pa = MAX3(pa,pga-gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j-1].ga-gpe,s[j-1].a-gpo); if (endb != hm->len_b){ s[j].gb = MAX(s[j].gb-gpe ,ca-gpo); }else{ s[j].gb = MAX(s[j].gb,ca)-tgpe; } } return s; } struct states* backward_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm) { struct states* s = hm->b; float *subp = 0; const int starta = hm->starta; const int enda = hm->enda; const int startb =hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register float xa = 0; register float xga = 0; register int i = 0; register int j = 0; s[endb].a = s[0].a ; s[endb].ga = s[0].ga; s[endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(endb != hm->len_b){ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga-gpe,s[j+1].a-gpo); s[j].gb = -FLOATINFTY; } }else{ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga,s[j+1].a)-tgpe; s[j].gb = -FLOATINFTY; } } s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; s[startb].gb = -FLOATINFTY; i = enda-starta; seq1+= starta; while(i--){ subp = subm[seq1[i]]; pa = s[endb].a; pga = s[endb].ga; pgb = s[endb].gb; s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; xa = s[endb].a; xga = s[endb].ga; if(endb != hm->len_b){ s[endb].gb = MAX(pgb-gpe,pa-gpo); }else{ s[endb].gb = MAX(pgb,pa)-tgpe; } for(j = endb-1;j > startb;j--){ ca = s[j].a; pa = MAX3(pa,pga - gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = MAX(s[j+1].ga-gpe,s[j+1].a-gpo); s[j].ga = MAX(xga-gpe,xa-gpo); pgb = s[j].gb; s[j].gb = MAX(pgb-gpe,ca-gpo); pa = ca; xa = s[j].a; xga = s[j].ga; } ca = s[j].a; pa = MAX3(pa,pga - gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j+1].ga-gpe,s[j+1].a-gpo); if(startb){ s[j].gb = MAX(s[j].gb-gpe,ca-gpo); }else{ s[j].gb = MAX(s[j].gb,ca)-tgpe; } } return s; } int* hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path,int sip) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = foward_hirsch_ps_dyn(prof1,seq2,hm,sip); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = backward_hirsch_ps_dyn(prof1,seq2,hm,sip); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = hirsch_align_two_ps_vector(prof1,seq2,hm,hirsch_path,input_states,old_cor,sip); return hirsch_path; } int* hirsch_align_two_ps_vector(const float* prof1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[],int sip) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; const float open = gpo * sip; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; prof1+= ((old_cor[4]+1)<<6); //i = hm->startb; i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].a-sub> max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga-open-sub > max){ max = f[i].a+b[i].ga-open-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[27]-sub > max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a-open-sub > max){ max = f[i].ga+b[i].a-open-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[-37]-sub > max){ max = f[i].gb+b[i].a+prof1[-37]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[27]-sub > max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= ((old_cor[4]+1)<<6); //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; } return hirsch_path; } struct states* foward_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip) { struct states* s = hm->f; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register float xa = 0; register float xga = 0; register int i = 0; register int j = 0; const float open = gpo * sip; const float ext = gpe *sip; const float text = tgpe * sip; prof1 += (hm->starta)<< 6; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; if(hm->startb){ for (j = hm->startb+1; j < hm->endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga-ext,s[j-1].a-open); s[j].gb = -FLOATINFTY; } }else{ for (j = hm->startb+1; j < hm->endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga,s[j-1].a) - text; s[j].gb = -FLOATINFTY; } } s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; s[hm->endb].gb = -FLOATINFTY; seq2--; for (i = hm->starta;i < hm->enda;i++){ prof1 += 64; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; xa = s[hm->startb].a; xga = s[hm->startb].ga; if(hm->startb){ s[hm->startb].gb = MAX(pgb+prof1[28],pa+prof1[27]); }else{ s[hm->startb].gb = MAX(pgb,pa)+prof1[29]; } for (j = hm->startb+1; j < hm->endb;j++){ ca = s[j].a; pa = MAX3(pa,pga -open,pgb + prof1[-37]); pa += prof1[32 + seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = MAX(s[j-1].ga-ext,s[j-1].a-open); s[j].ga = MAX(xga-ext,xa-open); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[28],ca+prof1[27]); pa = ca; xa = s[j].a; xga = s[j].ga; } ca = s[j].a; pa = MAX3(pa,pga -open,pgb + prof1[-37]); pa += prof1[32 + seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j-1].ga-ext,s[j-1].a-open); if (hm->endb != hm->len_b){ s[j].gb = MAX(s[j].gb+prof1[28] ,ca+prof1[27]); }else{ s[j].gb = MAX(s[j].gb,ca)+ prof1[29]; } } prof1 -= hm->enda << 6; return s; } struct states* backward_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip) { struct states* s = hm->b; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register float xa = 0; register float xga = 0; register int i = 0; register int j = 0; const float open = gpo * sip; const float ext = gpe *sip; const float text = tgpe * sip; prof1 += (hm->enda+1) << 6; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; if(hm->endb != hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga-ext,s[j+1].a-open); s[j].gb = -FLOATINFTY; } }else{ for(j = hm->endb-1;j > hm->startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga,s[j+1].a)-text; s[j].gb = -FLOATINFTY; } } s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = -FLOATINFTY; i = hm->enda-hm->starta; while(i--){ prof1 -= 64; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; xa = s[hm->endb].a; xga = s[hm->endb].ga; if(hm->endb != hm->len_b){ s[hm->endb].gb = MAX(pgb+prof1[28],pa+prof1[27]); }else{ s[hm->endb].gb = MAX(pgb,pa) +prof1[29]; } for(j = hm->endb-1;j > hm->startb;j--){ ca = s[j].a; pa = MAX3(pa,pga - open,pgb +prof1[91]); pa += prof1[32 + seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = MAX(s[j+1].ga-ext,s[j+1].a-open); s[j].ga = MAX(xga-ext,xa-open); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[28],ca+prof1[27]); pa = ca; xa = s[j].a; xga = s[j].ga; } ca = s[j].a; pa = MAX3(pa,pga - open,pgb +prof1[91]); pa += prof1[32 + seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j+1].ga-ext,s[j+1].a-open); if(hm->startb){ s[j].gb = MAX(s[j].gb+prof1[28], ca+prof1[27]); }else{ s[j].gb = MAX(s[j].gb,ca)+prof1[29]; } } return s; } int* hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; //fprintf(stderr,"starta:%d enda:%d startb:%d endb:%d mid:%d\n",hm->starta,hm->enda,hm->startb,hm->endb,mid); if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = foward_hirsch_pp_dyn(prof1,prof2,hm); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = backward_hirsch_pp_dyn(prof1,prof2,hm); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = hirsch_align_two_pp_vector(prof1,prof2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; prof1+= ((old_cor[4]+1) << 6); //prof2 += 64 * (hm->startb); //i = hm->startb; prof2 += old_cor[2] << 6; i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; prof2 += 64; //fprintf(stderr,"%d %d %d \n",f[i].a,b[i].a,max); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga+prof2[27]-sub > max){ max = f[i].a+b[i].ga+prof2[27]-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[27] -sub> max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a+prof2[-37]-sub > max){ max = f[i].ga+b[i].a+prof2[-37]-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[-37]-sub > max){ max = f[i].gb+b[i].a+prof1[-37]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[27]-sub > max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (old_cor[4]+1)<<6; //prof2 -= hm->endb << 6; prof2 -= old_cor[3] << 6; //fprintf(stderr,"Transition:%d at:%d\n",transition,c); //if(transition == -1){ // exit(0); //} j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; //fprintf(stderr,"Using this for start:%ld %ld %ld\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; } return hirsch_path; } struct states* foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { unsigned int freq[26]; struct states* s = hm->f; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register float xa = 0; register float xga = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->starta) << 6; prof2 += (hm->startb) << 6; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; if(hm->startb){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga+prof2[28],s[j-1].a+prof2[27]); s[j].gb = -FLOATINFTY; } prof2+=64; }else{ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga,s[j-1].a)+prof2[29]; s[j].gb = -FLOATINFTY; } prof2+=64; } prof2 -= (hm->endb-hm->startb) << 6; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; s[hm->endb].gb = -FLOATINFTY; for (i = hm->starta;i < hm->enda;i++){ prof1 += 64; c = 1; for (j = 0;j < 26; j++){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; xa = s[hm->startb].a; xga = s[hm->startb].ga; if(hm->startb){ s[hm->startb].gb = MAX(pgb+prof1[28],pa+prof1[27]); }else{ s[hm->startb].gb = MAX(pgb,pa)+ prof1[29]; } for (j = hm->startb+1; j < hm->endb;j++){ prof2 += 64; ca = s[j].a; pa = MAX3(pa,pga + prof2[-37],pgb + prof1[-37]); prof2 += 32; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; //s[j].ga = MAX(s[j-1].ga+prof2[28],s[j-1].a+prof2[27]); s[j].ga = MAX(xga+prof2[28],xa+prof2[27]); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[28] ,ca+prof1[27]); pa = ca; xa = s[j].a; xga = s[j].ga; } prof2 += 64; ca = s[j].a; pa = MAX3(pa,pga + prof2[-37],pgb + prof1[-37]); prof2 += 32; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; s[j].ga = -FLOATINFTY; if (hm->endb != hm->len_b){ s[j].gb = MAX(s[j].gb+prof1[28] ,ca+prof1[27]); }else{ s[j].gb = MAX(s[j].gb,ca)+ prof1[29]; } prof2 -= (hm->endb-hm->startb) << 6; } prof1 -= (hm->enda) << 6; return s; } struct states* backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { unsigned int freq[26]; struct states* s = hm->b; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register float xa = 0; register float xga = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->enda+1) << 6; prof2 += (hm->endb+1) << 6; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; if(hm->endb != hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga+prof2[28],s[j+1].a+prof2[27]); s[j].gb = -FLOATINFTY; } prof2 -= 64; }else{ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga,s[j+1].a)+prof2[29]; s[j].gb = -FLOATINFTY; } prof2 -= 64; } s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = -FLOATINFTY; i = hm->enda-hm->starta; while(i--){ prof1 -= 64; c = 1; for (j = 0;j < 26; j++){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; xa = s[hm->endb].a; xga = s[hm->endb].ga; if(hm->endb != hm->len_b){ s[hm->endb].gb = MAX(pgb+prof1[28] ,pa+prof1[27]); }else{ s[hm->endb].gb = MAX(pgb,pa)+prof1[29]; } prof2 += (hm->endb-hm->startb) << 6; for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; ca = s[j].a; pa = MAX3(pa,pga + prof2[91],pgb + prof1[91]); prof2 += 32; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; //s[j].ga = MAX(s[j+1].ga+prof2[28], s[j+1].a+prof2[27]); s[j].ga = MAX(xga+prof2[28], xa+prof2[27]); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[28], ca+prof1[27]); pa = ca; xa = s[j].a; xga = s[j].ga; } prof2 -= 64; ca = s[j].a; pa = MAX3(pa,pga + prof2[91],pgb + prof1[91]); prof2 += 32; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; //pga = s[j].ga; s[j].ga = -FLOATINFTY;//MAX(s[j+1].ga+prof2[28], s[j+1].a+prof2[27]); //pgb = s[j].gb; if(hm->startb){ s[j].gb = MAX(s[j].gb+prof1[28], ca+prof1[27]); }else{ s[j].gb = MAX(s[j].gb,ca)+prof1[29]; } //pa = ca; } return s; } int* mirror_hirsch_path(int* hirsch_path,int len_a,int len_b) { int* np = 0; int i; np =malloc(sizeof(int)*(len_a+2)); for(i =0; i < len_a+2;i++){ np[i] = -1; } for(i = 1; i <= len_b;i++){ if(hirsch_path[i] != -1){ np[hirsch_path[i]] = i; } } free(hirsch_path); return np; } int* add_gap_info_to_hirsch_path(int* hirsch_path,int len_a,int len_b) { int i,j; int a = 0; int b = 0; int* np = 0; np =malloc(sizeof(int)*(len_a+len_b+2)); for(i =0; i < len_a+len_b+2;i++){ np[i] = 0; } j = 1; b = -1; if(hirsch_path[1] == -1){ np[j] = 2; j++; }else{ if(hirsch_path[1] != 1){ for ( a = 0;a < hirsch_path[1] -1;a++){ np[j] = 1; j++; } np[j] = 0; j++; }else{ np[j] = 0; j++; } } b = hirsch_path[1]; /*for ( i= 0;i <= len_a;i++){ fprintf(stderr,"%d,",hirsch_path[i]); } fprintf(stderr,"\n");*/ for(i = 2; i <= len_a;i++){ if(hirsch_path[i] == -1){ np[j] = 2; j++; }else{ if(hirsch_path[i]-1 != b && b != -1){ for ( a = 0;a < hirsch_path[i] - b-1;a++){ np[j] = 1; j++; } np[j] = 0; j++; }else{ np[j] = 0; j++; } } b = hirsch_path[i]; } if(hirsch_path[len_a] < len_b && hirsch_path[len_a] != -1){ // fprintf(stderr,"WARNING:%d %d\n",hirsch_path[len_a],len_b); for ( a = 0;a < len_b - hirsch_path[len_a];a++){ np[j] = 1; j++; } } np[0] = j-1; np[j] = 3; np = realloc(np,sizeof(int)* (np[0]+2)); //for ( i= 0;i <= np[0];i++){ // fprintf(stderr,"%d,",np[i]); //} //fprintf(stderr,"\n"); free(hirsch_path); //add gap info.. i = 2; while(np[i] != 3){ if ((np[i-1] &3) && !(np[i] & 3)){ if(np[i-1] & 8){ np[i-1] += 8; }else{ np[i-1] |= 16; } }else if (!(np[i-1] & 3) &&(np[i] &3)){ np[i] |= 4; }else if ((np[i-1] & 1) && (np[i] & 1)){ np[i] |= 8; }else if ((np[i-1] & 2) && (np[i] & 2)){ np[i] |= 8; } i++; } //add terminal gap... i = 1; while(np[i] != 0){ np[i] |= 32; i++; } j = i; i = np[0]; while(np[i] != 0){ np[i] |= 32; i--; } //for ( i= 0;i <= np[0];i++){ // fprintf(stderr,"%d,",np[i]); //} //fprintf(stderr,"\n"); return np; } /* int* foward_pp_dyn(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b) { unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; s = dp->s; trace = dp->tb; trace[0][0] = 32; s[0].a = 0; s[0].ga = -INFTY; s[0].gb = -INFTY; //init of first row; tracep = trace[0]; for (j = 1; j < len_b;j++){ s[j].a = -INFTY; s[j].ga = s[j-1].a+prof2[29]; if (s[j-1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j-1].ga+prof2[29]; } s[j].gb = -INFTY; tracep[j] = 8; } s[len_b].a = -INFTY; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; for ( i = 1;i s[0].gb){ s[0].gb = pgb+prof1[29]; } tracep[0] = 16; for (j = 1; j < len_b;j++){ prof2 += 64; ca = s[j].a; c = 1; if((pga += prof2[-37]) > pa){ pa = pga; c = 2; } if((pgb += prof1[-37]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j-1].a+prof2[27]; if (s[j-1].ga+prof2[28] > s[j].ga){ s[j].ga = s[j-1].ga+prof2[28]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } prof2 += 64; //LAST CELL (0) ca = s[len_b].a; c = 1; if((pga+=prof2[-37]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[-37]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[len_b].a = pa; s[len_b].ga = -INFTY; pgb = s[len_b].gb; s[len_b].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; c |= 16; } tracep[len_b] = c; prof2 -= len_b << 6; } prof1 += 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[len_a]; pa = s[0].a; pga = s[0].ga; pgb = s[0].gb; s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = pa+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; } tracep[0] = 16; for (j = 1;j< len_b;j++){ prof2 += 64; ca = s[j].a; c = 1; if((pga+=prof2[-37]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[-37]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j-1].a+prof2[27]+prof2[29]; if (s[j-1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j-1].ga+prof2[29]; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } prof2 += 64; ca = s[len_b].a; c = 1; if((pga+=prof2[-37]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[-37]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[len_b].a = pa; s[len_b].ga = s[len_b-1].a+prof2[27]+prof2[29]; if (s[len_b-1].ga+prof2[29] > s[len_b].ga){ s[len_b].ga = s[len_b-1].ga+prof2[29]; c |= 8; } pgb = s[len_b].gb; s[len_b].gb = ca+prof1[27]+prof1[29]; if(pgb +prof1[29]> s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; c |= 16; } tracep[len_b] = c; pgb = s[len_b].gb; c = 2; if(s[len_b].ga > pgb){ pgb = s[len_b].ga; c = 1; } if(s[len_b].a >= pgb){ pgb = s[len_b].a; c = 0; } ca = c; i = len_a; j = len_b; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; if(i-1!= 0){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ ca = 2; if(j-1!= 0){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i--; j--; break; case 1: if(trace[i][j] & 8){ ca = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j--; break; case 2: if(trace[i][j] & 16){ ca = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i--; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; j = path[0]; for(i =0 ;i < path[0]/2;i++){ c = path[i+1]; path[i+1] = path[j-i]; path[j -i] = c; } return path; } int* backward_pp_dyn(int* path, struct dp_matrix *dp,const int* prof1,const int* prof2,const int len_a,const int len_b) { unsigned int freq[26]; struct states* s = 0; char** trace = 0; char* tracep = 0; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1+= 64; prof2 += 64; s = dp->s; trace = dp->tb; trace[len_a][len_b] = 32; prof1 += len_a << 6; s[len_b].a = 0; s[len_b].ga = -INFTY; s[len_b].gb = -INFTY; //init of first row; tracep = trace[len_a]; j = len_b; while(--j){ s[j].a = -INFTY; s[j].ga = s[j+1].a+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; } s[j].gb = -INFTY; tracep[j] = 8; } s[0].a = -INFTY; s[0].ga = -INFTY; s[0].gb = -INFTY; i = len_a; while(--i){ prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[i]; pa = s[len_b].a; pga = s[len_b].ga; pgb = s[len_b].gb; s[len_b].a = -INFTY; s[len_b].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } tracep[len_b] = 16; j = len_b; prof2 += len_b << 6; while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga += prof2[91]) > pa){ pa = pga; c = 2; } if((pgb += prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]; if (s[j+1].ga+prof2[28] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[28]; c |= 8; } pgb = s[j].gb; s[j].gb = ca+prof1[27]; if(pgb+prof1[28] > s[j].gb){ s[j].gb = pgb+prof1[28]; c |= 16; } tracep[j] = c; pa = ca; } prof2 -= 64; //LAST CELL (0) ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[0].a = pa; s[0].ga = -INFTY; pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb+prof1[29] > s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; } prof1 -= 64; c = 1; for (j = 26; j--;){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; tracep = trace[0]; j = len_b; prof2 += len_b << 6; pa = s[j].a; pga = s[j].ga; pgb = s[j].gb; s[j].a = -INFTY; s[j].ga = -INFTY; s[len_b].gb = pa+prof1[29]; if(pgb+prof1[29] > s[len_b].gb){ s[len_b].gb = pgb+prof1[29]; } while(--j){ prof2 -= 64; ca = s[j].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[27]+prof2[29]; if (s[j+1].ga+prof2[29] > s[j].ga){ s[j].ga = s[j+1].ga+prof2[29]; c |= 8; } pgb = s[j].gb; s[j].gb = -INFTY; tracep[j] = c; pa = ca; } prof2 -= 64; ca = s[0].a; c = 1; if((pga+=prof2[91]) > pa){ pa = pga; c = 2; } if((pgb+=prof1[91]) > pa){ pa = pgb; c = 4; } prof2+=32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2-=32; s[0].a = pa; s[0].ga = s[1].a+prof2[27]+prof2[29]; if (s[1].ga+prof2[29] > s[0].ga){ s[0].ga = s[1].ga+prof2[29]; c |= 8; } pgb = s[0].gb; s[0].gb = ca+prof1[27]+prof1[29]; if(pgb +prof1[29]> s[0].gb){ s[0].gb = pgb+prof1[29]; c |= 16; } tracep[0] = c; pgb = s[0].gb; c = 2; if(s[0].ga > pgb){ pgb = s[0].ga; c = 1; } if(s[0].a >= pgb){ pgb = s[0].a; c = 0; } //fprintf(stderr,"SCORE:%d\n",ca); ca = c; i = 0; j = 0; c = 1; while(trace[i][j] < 32){ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b); switch(ca){ case 0: if (trace[i][j] & 2){ ca = 1; if(i+1!= len_a){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } }else if (trace[i][j] & 4){ ca = 2; if(j+1!= len_b){ path[c+1] |= 16; // fprintf(stderr,"GAP_CLOSE\n"); }else{ path[c+1] |= 32+16; } } //path[c] = 0; i++; j++; break; case 1: if(trace[i][j] & 8){ ca = 1; if(i!=0 && i!= len_a){ // / fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(i!=0 && i!= len_a){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 1; j++; break; case 2: if(trace[i][j] & 16){ ca = 2; if(j !=0 && j != len_b){ // fprintf(stderr,"GAP_EXT\n"); if(!(path[c]&16)){ path[c] |= 8; } }else{ if(!(path[c]&16)){ path[c] |= 32+8; } } }else{ ca = 0; if(j!=0 && j != len_b){ // fprintf(stderr,"GAP_OPEN\n"); path[c] |= 4; }else{ path[c] |= 32+4; } } path[c] |= 2; i++; break; } c++; } path[0] = c-1; path[c] = 3; path[c+1] = pgb; return path; } */ kalign2_hirschberg_dna.c0000644001210100001440000014460111577654214015010 0ustar olifriusers/* kalign2_hirschberg_dna.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_hirschberg_dna.h" #define MAX(a, b) (a > b ? a : b) #define MAX3(a,b,c) MAX(MAX(a,b),c) int** dna_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,float strength) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%0.2f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq){ profile[a] = dna_make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = dna_make_profile(profile[b],aln->s[b],len_b,submatrix); } fprintf(stderr,"Saving mem...\n"); dna_set_gap_penalties(profile[a],len_a,aln->nsip[b],strength,aln->nsip[a]); dna_set_gap_penalties(profile[b],len_b,aln->nsip[a],strength,aln->nsip[b]); hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(a < numseq){ if(b < numseq){ map[c] = hirsch_dna_ss_dyn(submatrix,aln->s[a],aln->s[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_dna_ps_dyn(profile[b],aln->s[a],hm,map[c],aln->nsip[b]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } }else{ if(b < numseq){ map[c] = hirsch_dna_ps_dyn(profile[a],aln->s[b],hm,map[c],aln->nsip[a]); }else{ if(len_a < len_b){ map[c] = hirsch_dna_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_dna_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } } } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*22*(map[c][0]+2)); profile[c] = dna_update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); //free(profile[numprofiles-1]); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } int** dna_alignment_against_a(struct alignment* aln,int* tree,float**submatrix, int** map,float strength) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%0.2f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq){ profile[a] = dna_make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = dna_make_profile(profile[b],aln->s[b],len_b,submatrix); } dna_set_gap_penalties(profile[a],len_a,1,strength,1);//aln->nsip[b]); dna_set_gap_penalties(profile[b],len_b,1,strength,1);//aln->nsip[a]); hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(a < numseq){ if(b < numseq){ map[c] = hirsch_dna_ss_dyn(submatrix,aln->s[a],aln->s[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_dna_ps_dyn(profile[b],aln->s[a],hm,map[c],aln->nsip[b]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } }else{ if(b < numseq){ map[c] = hirsch_dna_ps_dyn(profile[a],aln->s[b],hm,map[c],1);//aln->nsip[a]); }else{ if(len_a < len_b){ map[c] = hirsch_dna_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_dna_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } } } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*22*(map[c][0]+2)); profile[c] = dna_update_only_a(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); //free(profile[numprofiles-1]); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } int* hirsch_dna_ss_dyn(float**subm, const int* seq1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; //fprintf(stderr,"Forward:%d-%d %d-%d\n",hm->starta,hm->enda,hm->startb,hm->endb); hm->f = foward_hirsch_dna_ss_dyn(subm,seq1,seq2,hm); hm->starta = mid; hm->enda = old_cor[1]; //fprintf(stderr,"Backward:%d-%d %d-%d\n",hm->starta,hm->enda,hm->startb,hm->endb); hm->b = backward_hirsch_dna_ss_dyn(subm,seq1,seq2,hm); hirsch_path = hirsch_align_two_dna_ss_vector(subm,seq1,seq2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* hirsch_align_two_dna_ss_vector(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; float middle = (hm->endb - hm->startb)/2 + hm->startb; float sub = 0.0; i = hm->startb; c = -1; for(i = hm->startb; i < hm->endb;i++){ sub = abs(middle -i); sub /= 1000; // fprintf(stderr,"%d-%d %f\n",hm->startb,hm->endb,sub); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga-gpo-sub > max){ max = f[i].a+b[i].ga-gpo-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb -gpo-sub > max){ max = f[i].a+b[i].gb - gpo-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a - gpo-sub > max){ max = f[i].ga+b[i].a - gpo-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb - tgpe-sub > max){ max = f[i].gb+b[i].gb -tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb - gpe -sub> max){ max = f[i].gb+b[i].gb - gpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a - gpo-sub > max){ max = f[i].gb+b[i].a - gpo-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } i = hm->endb; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb-gpo-sub > max){ max = f[i].a+b[i].gb - gpo-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb -tgpe-sub > max){ max = f[i].gb+b[i].gb - tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb - tgpe-sub > max){ max = f[i].gb+b[i].gb - tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; } return hirsch_path; } struct states* foward_hirsch_dna_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm) { struct states* s = hm->f; float *subp = 0; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; s[startb].a = s[0].a; s[startb].ga = s[0].ga; s[startb].gb = s[0].gb; if(startb == 0){ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga,s[j-1].a)-tgpe; s[j].gb = -FLOATINFTY; } }else{ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga - gpe,s[j-1].a-gpo); s[j].gb = -FLOATINFTY; } } s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; s[endb].gb = -FLOATINFTY; seq2--; for (i = starta;i < enda;i++){ subp = subm[seq1[i]]; pa = s[startb].a; pga = s[startb].ga; pgb = s[startb].gb; s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; if(startb == 0){ s[startb].gb = MAX(pgb,pa) - tgpe; }else{ s[startb].gb = MAX(pgb - gpe,pa - gpo); } for (j = startb+1; j < endb;j++){ ca = s[j].a; pa = MAX3(pa,pga-gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = MAX(s[j-1].ga-gpe,s[j-1].a-gpo); pgb = s[j].gb; s[j].gb = MAX(pgb-gpe ,ca-gpo); pa = ca; } ca = s[j].a; pa = MAX3(pa,pga-gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j-1].ga-gpe,s[j-1].a-gpo); if (endb != hm->len_b){ s[j].gb = MAX(s[j].gb-gpe ,ca-gpo); }else{ s[j].gb = MAX(s[j].gb,ca)-tgpe; } } return s; } struct states* backward_hirsch_dna_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm) { struct states* s = hm->b; float *subp = 0; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; s[endb].a = s[0].a ; s[endb].ga = s[0].ga; s[endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(endb == hm->len_b){ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga,s[j+1].a)-tgpe; s[j].gb = -FLOATINFTY; } }else{ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga-gpe,s[j+1].a-gpo); s[j].gb = -FLOATINFTY; } } s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; s[startb].gb = -FLOATINFTY; i = enda-starta; seq1+= starta; while(i--){ subp = subm[seq1[i]]; pa = s[endb].a; pga = s[endb].ga; pgb = s[endb].gb; s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; if(endb == hm->len_b){ s[endb].gb = MAX(pgb,pa)-tgpe; }else{ s[endb].gb = MAX(pgb-gpe,pa-gpo); } for(j = endb-1;j > startb;j--){ ca = s[j].a; pa = MAX3(pa,pga - gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = MAX(s[j+1].ga-gpe,s[j+1].a-gpo); pgb = s[j].gb; s[j].gb = MAX(pgb-gpe,ca-gpo); pa = ca; } ca = s[j].a; pa = MAX3(pa,pga - gpo,pgb-gpo); pa += subp[seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j+1].ga-gpe,s[j+1].a-gpo); if(startb){ s[j].gb = MAX(s[j].gb-gpe,ca-gpo); }else{ s[j].gb = MAX(s[j].gb,ca)-tgpe; } } return s; } int* hirsch_dna_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path,int sip) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = foward_hirsch_dna_ps_dyn(prof1,seq2,hm,sip); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = backward_hirsch_dna_ps_dyn(prof1,seq2,hm,sip); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = hirsch_align_two_dna_ps_vector(prof1,seq2,hm,hirsch_path,input_states,old_cor,sip); return hirsch_path; } int* hirsch_align_two_dna_ps_vector(const float* prof1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[],int sip) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; const int open = gpo * sip; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; float middle = (hm->endb - hm->startb)/2 + hm->startb; float sub = 0.0; prof1+= (22 * (old_cor[4]+1)); i = hm->startb; c = -1; for(i = hm->startb; i < hm->endb;i++){ sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].a-sub> max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga-open-sub > max){ max = f[i].a+b[i].ga-open-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[8]-sub > max){ max = f[i].a+b[i].gb+prof1[8]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a-open-sub > max){ max = f[i].ga+b[i].a-open-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[10]-sub > max){ max = f[i].gb+b[i].gb+prof1[10]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[9]-sub > max){ max = f[i].gb+b[i].gb+prof1[9]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[8-22]-sub > max){ max = f[i].gb+b[i].a+prof1[8-22]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } i = hm->endb; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[8]-sub > max){ max = f[i].a+b[i].gb+prof1[8]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[10]-sub > max){ max = f[i].gb+b[i].gb+prof1[10]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[9]-sub > max){ max = f[i].gb+b[i].gb+prof1[0]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (22 * (old_cor[4]+1)); //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; } return hirsch_path; } struct states* foward_hirsch_dna_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip) { //unsigned int freq[26]; struct states* s = hm->f; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; const float open = gpo * sip; const float ext = gpe *sip; const float text = tgpe * sip; prof1 += (starta) * 22; s[startb].a = s[0].a; s[startb].ga = s[0].ga; s[startb].gb = s[0].gb; if(startb == 0){ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga,s[j-1].a) - text; s[j].gb = -FLOATINFTY; } }else{ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga-ext,s[j-1].a-open); s[j].gb = -FLOATINFTY; } } s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; s[endb].gb = -FLOATINFTY; seq2--; for (i = starta;i < enda;i++){ prof1 += 22; pa = s[startb].a; pga = s[startb].ga; pgb = s[startb].gb; s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; if(startb == 0){ s[startb].gb = MAX(pgb,pa)+prof1[10]; }else{ s[startb].gb = MAX(pgb+prof1[9],pa+prof1[8]); } for (j = startb+1; j < endb;j++){ ca = s[j].a; pa = MAX3(pa,pga -open,pgb + prof1[-14]); pa += prof1[11 + seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = MAX(s[j-1].ga-ext,s[j-1].a-open); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[9],ca+prof1[8]); pa = ca; } ca = s[j].a; pa = MAX3(pa,pga -open,pgb + prof1[-14]); pa += prof1[11 + seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j-1].ga-ext,s[j-1].a-open); if (hm->endb != hm->len_b){ s[j].gb = MAX(s[j].gb+prof1[9] ,ca+prof1[8]); }else{ s[j].gb = MAX(s[j].gb,ca)+ prof1[10]; } } prof1 -= 22 * (enda); return s; } struct states* backward_hirsch_dna_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip) { //unsigned int freq[26]; struct states* s = hm->b; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; const float open = gpo * sip; const float ext = gpe *sip; const float text = tgpe * sip; prof1 += (enda+1) * 22; s[endb].a = s[0].a; s[endb].ga = s[0].ga; s[endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(endb == hm->len_b){ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga,s[j+1].a)-text; s[j].gb = -FLOATINFTY; } }else{ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga-ext,s[j+1].a-open); s[j].gb = -FLOATINFTY; } } s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; s[startb].gb = -FLOATINFTY; i = enda-starta; while(i--){ prof1 -= 22; pa = s[endb].a; pga = s[endb].ga; pgb = s[endb].gb; s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; if(endb == hm->len_b){ s[endb].gb = MAX(pgb,pa) +prof1[10]; }else{ s[endb].gb = MAX(pgb+prof1[9],pa+prof1[8]); } for(j = endb-1;j > startb;j--){ ca = s[j].a; pa = MAX3(pa,pga - open,pgb +prof1[30]); pa += prof1[11 + seq2[j]]; s[j].a = pa; pga = s[j].ga; s[j].ga = MAX(s[j+1].ga-ext,s[j+1].a-open); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[9],ca+prof1[8]); pa = ca; } ca = s[j].a; pa = MAX3(pa,pga - open,pgb +prof1[30]); pa += prof1[11 + seq2[j]]; s[j].a = pa; s[j].ga = -FLOATINFTY;//MAX(s[j+1].ga-ext,s[j+1].a-open); if(hm->startb){ s[j].gb = MAX(s[j].gb+prof1[9], ca+prof1[8]); }else{ s[j].gb = MAX(s[j].gb,ca)+prof1[10]; } } return s; } int* hirsch_dna_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; //fprintf(stderr,"starta:%d enda:%d startb:%d endb:%d mid:%d\n",hm->starta,hm->enda,hm->startb,hm->endb,mid); if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = foward_hirsch_dna_pp_dyn(prof1,prof2,hm); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = backward_hirsch_dna_pp_dyn(prof1,prof2,hm); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = hirsch_align_two_dna_pp_vector(prof1,prof2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* hirsch_align_two_dna_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path, float input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; float middle = (hm->endb - hm->startb)/2 + hm->startb; float sub = 0.0; prof1+= (22 * (old_cor[4]+1)); prof2 += (22 * (hm->startb)); i = hm->startb; c = -1; for(i = hm->startb; i < hm->endb;i++){ sub = abs(middle -i); sub /= 1000; prof2 += 22; if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga+prof2[8]-sub > max){ max = f[i].a+b[i].ga+prof2[8]-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[8]-sub > max){ max = f[i].a+b[i].gb+prof1[8]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a+prof2[-14]-sub > max){ max = f[i].ga+b[i].a+prof2[-14]-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[10]-sub > max){ max = f[i].gb+b[i].gb+prof1[10]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[9]-sub > max){ max = f[i].gb+b[i].gb+prof1[9]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[-14]-sub > max){ max = f[i].gb+b[i].a+prof1[-14]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } i = hm->endb; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[8]-sub > max){ max = f[i].a+b[i].gb+prof1[8]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[10]-sub > max){ max = f[i].gb+b[i].gb+prof1[10]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[9]-sub > max){ max = f[i].gb+b[i].gb+prof1[9]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (22 * (old_cor[4]+1)); prof2 -= (hm->endb *22); //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0.0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0.0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0.0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0.0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_dna_pp_dyn(prof1,prof2,hm,hirsch_path); break; } return hirsch_path; } struct states* foward_hirsch_dna_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { struct states* s = hm->f; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; prof1 += (hm->starta) * 22; prof2 += (hm->startb) * 22; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; if(hm->startb == 0){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=22; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga,s[j-1].a)+prof2[10]; s[j].gb = -FLOATINFTY; } prof2 += 22; }else{ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=22; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j-1].ga+prof2[9],s[j-1].a+prof2[8]); s[j].gb = -FLOATINFTY; } prof2 += 22; } prof2 -= (hm->endb-hm->startb) * 22; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; s[hm->endb].gb = -FLOATINFTY; for (i = hm->starta;i < hm->enda;i++){ prof1 += 22; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; if(hm->startb == 0){ s[hm->startb].gb = MAX(pgb,pa)+ prof1[10]; }else{ s[hm->startb].gb = MAX(pgb+prof1[9],pa+prof1[8]); } for (j = hm->startb+1; j < hm->endb;j++){ prof2 += 22; ca = s[j].a; pa = MAX3(pa,pga + prof2[-14],pgb + prof1[-14]); prof2 += 11; pa += prof1[0]*prof2[0]; pa += prof1[1]*prof2[1]; pa += prof1[2]*prof2[2]; pa += prof1[3]*prof2[3]; pa += prof1[4]*prof2[4]; pa += prof1[5]*prof2[5]; pa += prof1[6]*prof2[6]; pa += prof1[7]*prof2[7]; prof2 -= 11; s[j].a = pa; pga = s[j].ga; s[j].ga = MAX(s[j-1].ga+prof2[9],s[j-1].a+prof2[8]); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[9] ,ca+prof1[8]); pa = ca; } prof2 += 22; ca = s[j].a; pa = MAX3(pa,pga + prof2[-14],pgb + prof1[-14]); prof2 += 11; pa += prof1[0]*prof2[0]; pa += prof1[1]*prof2[1]; pa += prof1[2]*prof2[2]; pa += prof1[3]*prof2[3]; pa += prof1[4]*prof2[4]; pa += prof1[5]*prof2[5]; pa += prof1[6]*prof2[6]; pa += prof1[7]*prof2[7]; prof2 -= 11; s[j].a = pa; s[j].ga = -FLOATINFTY; if (hm->endb != hm->len_b){ s[j].gb = MAX(s[j].gb+prof1[9] ,ca+prof1[8]); }else{ s[j].gb = MAX(s[j].gb,ca)+ prof1[10]; } prof2 -= (hm->endb-hm->startb) * 22; } prof1 -= 22 * (hm->enda); return s; } struct states* backward_hirsch_dna_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm) { struct states* s = hm->b; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; prof1 += (hm->enda+1) * 22; prof2 += (hm->endb+1) * 22; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(hm->endb == hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 22; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga,s[j+1].a)+prof2[10]; s[j].gb = -FLOATINFTY; } prof2 -= 22; }else{ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 22; s[j].a = -FLOATINFTY; s[j].ga = MAX(s[j+1].ga+prof2[9],s[j+1].a+prof2[8]); s[j].gb = -FLOATINFTY; } prof2 -= 22; } s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = -FLOATINFTY; i = hm->enda-hm->starta; while(i--){ prof1 -= 22; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; if(hm->endb == hm->len_b){ s[hm->endb].gb = MAX(pgb,pa)+prof1[10]; }else{ s[hm->endb].gb = MAX(pgb+prof1[9] ,pa+prof1[8]); } //j = endb-startb; prof2 += (hm->endb-hm->startb) *22; //while(j--){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 22; ca = s[j].a; pa = MAX3(pa,pga + prof2[30],pgb + prof1[30]); prof2 += 11; pa += prof1[0]*prof2[0]; pa += prof1[1]*prof2[1]; pa += prof1[2]*prof2[2]; pa += prof1[3]*prof2[3]; pa += prof1[4]*prof2[4]; pa += prof1[5]*prof2[5]; pa += prof1[6]*prof2[6]; pa += prof1[7]*prof2[7]; prof2 -= 11; s[j].a = pa; pga = s[j].ga; s[j].ga = MAX(s[j+1].ga+prof2[9], s[j+1].a+prof2[8]); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[9], ca+prof1[8]); pa = ca; } prof2 -= 22; ca = s[j].a; pa = MAX3(pa,pga + prof2[30],pgb + prof1[30]); prof2 += 11; pa += prof1[0]*prof2[0]; pa += prof1[1]*prof2[1]; pa += prof1[2]*prof2[2]; pa += prof1[3]*prof2[3]; pa += prof1[4]*prof2[4]; pa += prof1[5]*prof2[5]; pa += prof1[6]*prof2[6]; pa += prof1[7]*prof2[7]; prof2 -= 11; s[j].a = pa; //pga = s[j].ga; s[j].ga = -FLOATINFTY;//MAX(s[j+1].ga+prof2[28], s[j+1].a+prof2[27]); //pgb = s[j].gb; if(hm->startb){ s[j].gb = MAX(s[j].gb+prof1[9], ca+prof1[8]); }else{ s[j].gb = MAX(s[j].gb,ca)+prof1[10]; } } return s; } kalign2_hirschberg_dna.h0000644001210100001440000000441111577654215015010 0ustar olifriusers/* kalign2_hirschberg_dna.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ int* hirsch_dna_ss_dyn(float**subm, const int* seq1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path); int* hirsch_align_two_dna_ss_vector(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); struct states* foward_hirsch_dna_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm); struct states* backward_hirsch_dna_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm); int* hirsch_dna_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path,int sip); int* hirsch_align_two_dna_ps_vector(const float* prof1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[],int sip); struct states* foward_hirsch_dna_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip); struct states* backward_hirsch_dna_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip); int* hirsch_dna_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path); int* hirsch_align_two_dna_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); struct states* foward_hirsch_dna_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); struct states* backward_hirsch_dna_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); kalign2_hirschberg.h0000644001210100001440000000464211577654215014174 0ustar olifriusers/* kalign2_hirschberg.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ int* hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm, int* hirsch_path); struct states* foward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); struct states* backward_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_mem* hm); int* hirsch_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); struct states* backward_hirsch_pp_dynm(const float* prof1,const float* prof2,struct hirsch_mem* hm); struct states* backward_hirsch_pp_dyn0(const float* prof1,const float* prof2,struct hirsch_mem* hm); int* hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path,int sip); struct states* foward_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip); struct states* backward_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_mem* hm,int sip); int* hirsch_align_two_ps_vector(const float* prof1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[],int sip); int* hirsch_ss_dyn(float**subm, const int* seq1,const int* seq2,struct hirsch_mem* hm, int* hirsch_path); struct states* foward_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm); struct states* backward_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm); int* hirsch_align_two_ss_vector(float**subm,const int* seq1,const int* seq2,struct hirsch_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); kalign2_hirschberg_large.c0000644001210100001440000017245011577654215015344 0ustar olifriusers/* kalign2_hirschberg_large.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_hirschberg_large.h" #define MAX(a, b) (a > b ? a : b) #define MAX3(a,b,c) MAX(MAX(a,b),c) //#include float local_gpo; float local_gpe; float local_tgpe; int** hirschberg_large_alignment(struct alignment* aln,int* tree,float**submatrix, int** map,int window,float strength) { struct hirsch_large_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; float** subm = 0; subm = malloc(sizeof(float*)*32); for(i = 0; i < 32;i++){ subm[i] = malloc(sizeof(float)*32); for (j = 0; j < 32;j++){ subm[i][j] = (float)submatrix[i][j]; } } local_gpo = (float)gpo; local_gpe = (float)gpe; local_tgpe = (float)tgpe; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_large_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_large_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq){ profile[a] = make_large_profile(profile[a],aln->s[a],len_a,subm); }else{ set_large_gap_penalties(profile[a],len_a,aln->nsip[b]); } if (b < numseq){ profile[b] = make_large_profile(profile[b],aln->s[b],len_b,subm); }else{ set_large_gap_penalties(profile[b],len_b,aln->nsip[a]); } hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0.0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0.0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(a < numseq){ if(b < numseq){ map[c] = hirsch_large_ss_dyn(subm,aln->s[a],aln->s[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_large_ps_dyn(profile[b],aln->s[a],hm,map[c],aln->nsip[b]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } }else{ if(b < numseq){ map[c] = hirsch_large_ps_dyn(profile[a],aln->s[b],hm,map[c],aln->nsip[a]); }else{ if(len_a < len_b){ map[c] = hirsch_large_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_large_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } } } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(float)*64*(map[c][0]+2)); profile[c] = large_update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_large_mem_free(hm); for (i = 32;i--;){ free(subm[i]); free(submatrix[i]); } free(subm); free(submatrix); return map; } int* hirsch_large_ss_dyn(float**subm, const int* seq1,const int* seq2,struct hirsch_large_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; //fprintf(stderr,"Forward:%d-%d %d-%d\n",hm->starta,hm->enda,hm->startb,hm->endb); hm->f = foward_large_hirsch_ss_dyn(subm,seq1,seq2,hm); hm->starta = mid; hm->enda = old_cor[1]; //fprintf(stderr,"Backward:%d-%d %d-%d\n",hm->starta,hm->enda,hm->startb,hm->endb); hm->b = backward_large_hirsch_ss_dyn(subm,seq1,seq2,hm); hirsch_path = hirsch_large_align_two_ss_vector(subm,seq1,seq2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* hirsch_large_align_two_ss_vector(float**subm,const int* seq1,const int* seq2,struct hirsch_large_mem* hm,int* hirsch_path,float input_states[],int old_cor[]) { struct large_states* f = hm->f; struct large_states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -FLOATINFTY; float max = -FLOATINFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; //i = hm->startb; i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; // fprintf(stderr,"%d-%d %f\n",hm->startb,hm->endb,sub); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga-local_gpo-sub > max){ max = f[i].a+b[i].ga-local_gpo-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb -local_gpo -sub > max){ max = f[i].a+b[i].gb - local_gpo-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a - local_gpo-sub > max){ max = f[i].ga+b[i].a - local_gpo-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(local_gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb - local_tgpe-sub > max){ max = f[i].gb+b[i].gb -local_tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb - local_gpe -sub> max){ max = f[i].gb+b[i].gb - local_gpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a - local_gpo-sub > max){ max = f[i].gb+b[i].a - local_gpo-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(local_gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb-local_gpo-sub > max){ max = f[i].a+b[i].gb - local_gpo-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb -local_tgpe-sub > max){ max = f[i].gb+b[i].gb - local_tgpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb - gpe-sub > max){ max = f[i].gb+b[i].gb - gpe-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(local_gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ss_dyn(subm,seq1,seq2,hm,hirsch_path); break; } return hirsch_path; } struct large_states* foward_large_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_large_mem* hm) { struct large_states* s = hm->f; float *subp = 0; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; s[startb].a = s[0].a; s[startb].ga = s[0].ga; s[startb].gb = s[0].gb; if(startb == 0){ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; //s[j].ga = s[j-1].a-local_tgpe; //if (s[j-1].ga-local_tgpe > s[j].ga){ // s[j].ga = s[j-1].ga-local_tgpe; //} //if(s[j-1].ga > s[j-1].a){ // s[j].ga = s[j-1].ga-local_tgpe; //}else{ // s[j].ga = s[j-1].a-local_tgpe; //} s[j].ga = MAX(s[j-1].ga,s[j-1].a)-local_tgpe; s[j].gb = -FLOATINFTY; } }else{ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; //s[j].ga = s[j-1].a-local_gpo; //if (s[j-1].ga - gpe > s[j].ga){ // s[j].ga = s[j-1].ga-gpe; //} //if(s[j-1].ga - gpe >s[j-1].a-local_gpo){ // s[j].ga = s[j-1].ga-gpe; //}else{ // s[j].ga = s[j-1].a-local_gpo; //} s[j].ga = MAX(s[j-1].ga - local_gpe,s[j-1].a-local_gpo); s[j].gb = -FLOATINFTY; } } s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; s[endb].gb = -FLOATINFTY; seq2--; for (i = starta;i < enda;i++){ subp = subm[seq1[i]]; pa = s[startb].a; pga = s[startb].ga; pgb = s[startb].gb; if(startb == 0){ s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; //s[startb].gb = pa-local_tgpe; //if(pgb - local_tgpe > s[startb].gb){ // s[startb].gb = pgb-local_tgpe; //} //if(pgb > pa){ // s[startb].gb = pgb-local_tgpe; //}else{ // s[startb].gb = pa-local_tgpe; //} s[startb].gb = MAX(pgb,pa) - local_tgpe; }else{ s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; //s[startb].gb = pa-local_gpo; //if(pgb -gpe > s[startb].gb){ // s[startb].gb = pgb -gpe; //} //if(pgb - gpe > pa - local_gpo){ // s[startb].gb = pgb - gpe; //}else{ // s[startb].gb = pa - local_gpo; //} s[startb].gb = MAX(pgb - local_gpe,pa - local_gpo); } for (j = startb+1; j <= endb;j++){ ca = s[j].a; //if((pga -= local_gpo) > pa){ // pa = pga; //} //if((pgb -= local_gpo) > pa){ // pa = pgb; //} pa = MAX3(pa,pga-local_gpo,pgb-local_gpo); pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = s[j-1].a-local_gpo; //if (s[j-1].ga-gpe > s[j].ga){ // s[j].ga = s[j-1].ga-gpe; //} //if(s[j-1].ga-gpe >s[j-1].a-local_gpo){ // s[j].ga = s[j-1].ga-gpe; //}else{ // s[j].ga = s[j-1].a-local_gpo; //} s[j].ga = MAX(s[j-1].ga-local_gpe,s[j-1].a-local_gpo); pgb = s[j].gb; //s[j].gb = ca-local_gpo; //if(pgb-gpe > s[j].gb){ // s[j].gb = pgb-gpe; //} //if(pgb-gpe > ca-local_gpo){ // s[j].gb = pgb-gpe; //}else{ // s[j].gb = ca-local_gpo; //} s[j].gb = MAX(pgb-local_gpe ,ca-local_gpo); pa = ca; } } return s; } struct large_states* backward_large_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_large_mem* hm) { struct large_states* s = hm->b; float *subp = 0; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; s[endb].a = s[0].a ; s[endb].ga = s[0].ga; s[endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(endb == hm->len_b){ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; //s[j].ga = s[j+1].a-local_tgpe; //if (s[j+1].ga-local_tgpe > s[j].ga){ // s[j].ga = s[j+1].ga-local_tgpe; //} //if(s[j+1].ga > s[j+1].a){ // s[j].ga = s[j+1].ga-local_tgpe; //}else{ // s[j].ga = s[j+1].a-local_tgpe; //} s[j].ga = MAX(s[j+1].ga,s[j+1].a)-local_tgpe; s[j].gb = -FLOATINFTY; } }else{ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; //s[j].ga = s[j+1].a-local_gpo; //if (s[j+1].ga-gpe > s[j].ga){ // s[j].ga = s[j+1].ga-gpe; //} //if(s[j+1].ga-gpe > s[j+1].a-local_gpo){ // s[j].ga = s[j+1].ga-gpe; //}else{ // s[j].ga = s[j+1].a-local_gpo; //} s[j].ga = MAX(s[j+1].ga-local_gpe,s[j+1].a-local_gpo); s[j].gb = -FLOATINFTY; } } s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; s[startb].gb = -FLOATINFTY; i = enda-starta; seq1+= starta; while(i--){ subp = subm[seq1[i]]; pa = s[endb].a; pga = s[endb].ga; pgb = s[endb].gb; s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; if(endb == hm->len_b){ //s[endb].gb = pa-local_tgpe; //if(pgb-local_tgpe > s[endb].gb){ // s[endb].gb = pgb-local_tgpe; //} //if(pgb > pa){ // s[endb].gb = pgb-local_tgpe; //}else{ // s[endb].gb = pa-local_tgpe; //} s[endb].gb = MAX(pgb,pa)-local_tgpe; }else{ //s[endb].gb = pa-local_gpo; //if(pgb-gpe > s[endb].gb){ // s[endb].gb = pgb-gpe; //} //if(pgb-gpe > pa-local_gpo){ // s[endb].gb = pgb-gpe; //}else{ // s[endb].gb = pa-local_gpo; //} s[endb].gb = MAX(pgb-local_gpe,pa-local_gpo); } for(j = endb-1;j >= startb;j--){ ca = s[j].a; //if((pga -= local_gpo) > pa){ // pa = pga; //} //if((pgb -= local_gpo) > pa){ // pa = pgb; //} pa = MAX3(pa,pga - local_gpo,pgb-local_gpo); pa += subp[seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = s[j+1].a-local_gpo; //if (s[j+1].ga-gpe > s[j].ga){ // s[j].ga = s[j+1].ga-gpe; //} //if(s[j+1].ga-gpe >s[j+1].a-local_gpo){ // s[j].ga = s[j+1].ga-gpe; //}else{ // s[j].ga = s[j+1].a-local_gpo; //} s[j].ga = MAX(s[j+1].ga-local_gpe,s[j+1].a-local_gpo); pgb = s[j].gb; //s[j].gb = ca-local_gpo; //if(pgb-gpe > s[j].gb){ // s[j].gb = pgb-gpe; //} //if(pgb-gpe > ca-local_gpo){ // s[j].gb = pgb-gpe; //}else{ // s[j].gb = ca-local_gpo; //} s[j].gb = MAX(pgb-local_gpe,ca-local_gpo); pa = ca; } } return s; } int* hirsch_large_ps_dyn(const float* prof1,const int* seq2,struct hirsch_large_mem* hm, int* hirsch_path,int sip) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = foward_large_hirsch_ps_dyn(prof1,seq2,hm,sip); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = backward_large_hirsch_ps_dyn(prof1,seq2,hm,sip); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = hirsch_large_align_two_ps_vector(prof1,seq2,hm,hirsch_path,input_states,old_cor,sip); return hirsch_path; } int* hirsch_large_align_two_ps_vector(const float* prof1,const int* seq2,struct hirsch_large_mem* hm,int* hirsch_path,float input_states[],int old_cor[],int sip) { struct large_states* f = hm->f; struct large_states* b = hm->b; int i,j,c; int transition = -1; const float open = local_gpo * sip; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -FLOATINFTY; float max = -FLOATINFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; prof1+= ((old_cor[4]+1)<<6); //i = hm->startb; i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].a-sub> max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga-open-sub > max){ max = f[i].a+b[i].ga-open-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[27]-sub > max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a-open-sub > max){ max = f[i].ga+b[i].a-open-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(local_gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[27]-sub > max){ max = f[i].gb+b[i].a+prof1[27]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(local_gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[27]-sub > max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= ((old_cor[4]+1)<<6); //fprintf(stderr,"Transition:%d at:%d\n",transition,c); j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_ps_dyn(prof1,seq2,hm,hirsch_path,sip); break; } return hirsch_path; } struct large_states* foward_large_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_large_mem* hm,int sip) { // unsigned int freq[26]; struct large_states* s = hm->f; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; const float open = local_gpo * sip; const float ext = local_gpe *sip; const float text = local_tgpe * sip; prof1 += (starta)<< 6; s[startb].a = s[0].a; s[startb].ga = s[0].ga; s[startb].gb = s[0].gb; if(startb == 0){ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; //s[j].ga = s[j-1].a-text; //if (s[j-1].ga-text > s[j].ga){ // s[j].ga = s[j-1].ga-text; //} //if(s[j-1].ga > s[j-1].a){ // s[j].ga = s[j-1].ga-text; //}else{ // s[j].ga = s[j-1].a-text; //} s[j].ga = MAX(s[j-1].ga,s[j-1].a) - text; s[j].gb = -FLOATINFTY; } }else{ for (j = startb+1; j < endb;j++){ s[j].a = -FLOATINFTY; //s[j].ga = s[j-1].a-open; //if (s[j-1].ga-ext > s[j].ga){ // s[j].ga = s[j-1].ga-ext; //} //if(s[j-1].ga-ext > s[j-1].a-open){ // s[j].ga = s[j-1].ga-ext; //}else{ // s[j].ga = s[j-1].a-open; //} s[j].ga = MAX(s[j-1].ga-ext,s[j-1].a-open); s[j].gb = -FLOATINFTY; } } s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; s[endb].gb = -FLOATINFTY; seq2--; for (i = starta;i < enda;i++){ prof1 += 64; //pa = 1; //for (j = 26; j--;){ // if(prof1[j]){ // freq[pa] = j; // pa++; // } //} //freq[0] = pa; pa = s[startb].a; pga = s[startb].ga; pgb = s[startb].gb; if(startb == 0){ s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; //s[startb].gb = pa+prof1[29]; //if(pgb+prof1[29] > s[startb].gb){ // s[startb].gb = pgb+prof1[29]; //} //if(pgb > pa){ // s[startb].gb = pgb+prof1[29]; //}else{ // s[startb].gb = pa+prof1[29]; //} s[startb].gb = MAX(pgb,pa)+prof1[29]; }else{ s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; //s[startb].gb = pa+prof1[27]; //if(pgb+prof1[28] > s[startb].gb){ // s[startb].gb = pgb+prof1[28]; //} //if(pgb+prof1[28] > pa+prof1[27]){ // s[startb].gb = pgb+prof1[28]; //}else{ // s[startb].gb = pa+prof1[27]; //} s[startb].gb = MAX(pgb+prof1[28],pa+prof1[27]); } for (j = startb+1; j <= endb;j++){ ca = s[j].a; //if((pga -= open) > pa){ // pa = pga; //} //if((pgb += prof1[-37]) > pa){ // pa = pgb; //} pa = MAX3(pa,pga -open,pgb + prof1[-37]); pa += prof1[32 + seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = s[j-1].a-open; //if (s[j-1].ga-ext > s[j].ga){ // s[j].ga = s[j-1].ga-ext; //} //if (s[j-1].ga-ext > s[j-1].a-open){ // s[j].ga = s[j-1].ga-ext; //}else{ // s[j].ga = s[j-1].a-open; //} s[j].ga = MAX(s[j-1].ga-ext,s[j-1].a-open); pgb = s[j].gb; //s[j].gb = ca+prof1[27]; //if(pgb+prof1[28] > s[j].gb){ // s[j].gb = pgb+prof1[28]; //} //if(pgb+prof1[28] > ca+prof1[27]){ // s[j].gb = pgb+prof1[28]; //}else{ // s[j].gb = ca+prof1[27]; //} s[j].gb = MAX(pgb+prof1[28],ca+prof1[27]); pa = ca; } } prof1 -= enda << 6; return s; } struct large_states* backward_large_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_large_mem* hm,int sip) { //unsigned int freq[26]; struct large_states* s = hm->b; const int starta = hm->starta; const int enda = hm->enda; const int startb = hm->startb; const int endb = hm->endb; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; const float open = local_gpo * sip; const float ext = local_gpe *sip; const float text = local_tgpe * sip; prof1 += (enda+1) << 6; s[endb].a = s[0].a; s[endb].ga = s[0].ga; s[endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(endb == hm->len_b){ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; //s[j].ga = s[j+1].a-text; //if (s[j+1].ga-text > s[j].ga){ // s[j].ga = s[j+1].ga-text; //} //if(s[j+1].ga > s[j+1].a){ // s[j].ga = s[j+1].ga-text; //}else{ // s[j].ga = s[j+1].a-text; //} s[j].ga = MAX(s[j+1].ga,s[j+1].a)-text; s[j].gb = -FLOATINFTY; } }else{ for(j = endb-1;j > startb;j--){ s[j].a = -FLOATINFTY; //s[j].ga = s[j+1].a-open; //if (s[j+1].ga-ext > s[j].ga){ // s[j].ga = s[j+1].ga-ext; //} //if(s[j+1].ga-ext > s[j+1].a-open){ // s[j].ga = s[j+1].ga-ext; //}else{ // s[j].ga = s[j+1].a-open; //} s[j].ga = MAX(s[j+1].ga-ext,s[j+1].a-open); s[j].gb = -FLOATINFTY; } } s[startb].a = -FLOATINFTY; s[startb].ga = -FLOATINFTY; s[startb].gb = -FLOATINFTY; i = enda-starta; while(i--){ prof1 -= 64; //pa = 1; //for (j = 26; j--;){ // if(prof1[j]){ // freq[pa] = j; // pa++; // } //} //freq[0] = pa; pa = s[endb].a; pga = s[endb].ga; pgb = s[endb].gb; s[endb].a = -FLOATINFTY; s[endb].ga = -FLOATINFTY; if(endb == hm->len_b){ //s[endb].gb = pa+prof1[29]; //if(pgb+prof1[29] > s[endb].gb){ // s[endb].gb = pgb+prof1[29]; //} //if(pgb > pa){ // s[endb].gb = pgb+prof1[29]; //}else{ // s[endb].gb = pa+prof1[29]; //} s[endb].gb = MAX(pgb,pa) +prof1[29]; }else{ //s[endb].gb = pa+prof1[27]; //if(pgb+prof1[28] > s[endb].gb){ // s[endb].gb = pgb+prof1[28]; //} //if(pgb+prof1[28] > pa+prof1[27]){ // s[endb].gb = pgb+prof1[28]; //}else{ // s[endb].gb = pa+prof1[27]; //} s[endb].gb = MAX(pgb+prof1[28],pa+prof1[27]); } for(j = endb-1;j >= startb;j--){ ca = s[j].a; //if((pga -= open) > pa){ // pa = pga; //} //if((pgb += prof1[91]) > pa){ // pa = pgb; //} pa = MAX3(pa,pga - open,pgb +prof1[91]); pa += prof1[32 + seq2[j]]; s[j].a = pa; pga = s[j].ga; //s[j].ga = s[j+1].a-open; //if (s[j+1].ga-ext > s[j].ga){ // s[j].ga = s[j+1].ga-ext; //} //if (s[j+1].ga-ext > s[j+1].a-open){ // s[j].ga = s[j+1].ga-ext; //}else{ // s[j].ga = s[j+1].a-open; //} s[j].ga = MAX(s[j+1].ga-ext,s[j+1].a-open); pgb = s[j].gb; //s[j].gb = ca+prof1[27]; //if(pgb+prof1[28] > s[j].gb){ // s[j].gb = pgb+prof1[28]; //} //if(pgb+prof1[28] > ca+prof1[27]){ // s[j].gb = pgb+prof1[28]; //}else{ // s[j].gb = ca+prof1[27]; //} s[j].gb = MAX(pgb+prof1[28],ca+prof1[27]); pa = ca; } } return s; } int* hirsch_large_pp_dyn(const float* prof1,const float* prof2,struct hirsch_large_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; float input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; //fprintf(stderr,"starta:%d enda:%d startb:%d endb:%d mid:%d\n",hm->starta,hm->enda,hm->startb,hm->endb,mid); if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = foward_large_hirsch_pp_dyn(prof1,prof2,hm); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = backward_large_hirsch_pp_dyn(prof1,prof2,hm); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = hirsch_large_align_two_pp_vector(prof1,prof2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* hirsch_large_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_large_mem* hm,int* hirsch_path,float input_states[],int old_cor[]) { struct large_states* f = hm->f; struct large_states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -FLOATINFTY; float max = -FLOATINFTY; //float middle = (hm->endb - hm->startb)/2 + hm->startb; float middle = (old_cor[3] - old_cor[2])/2 + old_cor[2]; float sub = 0.0; prof1+= ((old_cor[4]+1) << 6); //prof2 += 64 * (hm->startb); //i = hm->startb; prof2 += old_cor[2] << 6; i = old_cor[2]; c = -1; //for(i = hm->startb; i < hm->endb;i++){ for(i = old_cor[2]; i < old_cor[3];i++){ sub = abs(middle -i); sub /= 1000; prof2 += 64; //fprintf(stderr,"%d %d %d \n",f[i].a,b[i].a,max); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga+prof2[27]-sub > max){ max = f[i].a+b[i].ga+prof2[27]-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[27] -sub> max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a+prof2[27]-sub > max){ max = f[i].ga+b[i].a+prof2[27]-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(local_gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[27]-sub > max){ max = f[i].gb+b[i].a+prof1[27]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(local_gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } //i = hm->endb; i = old_cor[3]; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[27]-sub > max){ max = f[i].a+b[i].gb+prof1[27]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb+prof1[29]-sub > max){ max = f[i].gb+b[i].gb+prof1[29]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb+prof1[28]-sub > max){ max = f[i].gb+b[i].gb+prof1[28]-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (old_cor[4]+1)<<6; //prof2 -= hm->endb << 6; prof2 -= old_cor[3] << 6; //fprintf(stderr,"Transition:%d at:%d\n",transition,c); //if(transition == -1){ // exit(0); //} j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; //fprintf(stderr,"Using this for start:%ld %ld %ld\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = 0; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = 0; hm->b[0].gb = -FLOATINFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -FLOATINFTY; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -FLOATINFTY; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = hirsch_large_pp_dyn(prof1,prof2,hm,hirsch_path); break; } return hirsch_path; } struct large_states* foward_large_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_large_mem* hm) { int freq[26]; /*union print{ __m128i m; int tmp[4]; } output; __m128i xmm1; __m128i xmm2;*/ struct large_states* s = hm->f; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->starta) << 6; prof2 += (hm->startb) << 6; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; /*if(s[hm->startb].ga == -FLOATINFTY && s[hm->startb].a == -FLOATINFTY){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; s[j].gb = -FLOATINFTY; } prof2+=64; }else{ */ if(hm->startb == 0){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; //if (s[j-1].ga > s[j-1].a){ // s[j].ga = s[j-1].ga+prof2[29]; //}else{ // s[j].ga = s[j-1].a+prof2[29]; //} s[j].ga = MAX(s[j-1].ga,s[j-1].a)+prof2[29]; s[j].gb = -FLOATINFTY; } prof2+=64; }else{ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -FLOATINFTY; //if(s[j-1].ga+prof2[28] > s[j-1].a+prof2[27]){ // s[j].ga = s[j-1].ga+prof2[28]; //}else{ // s[j].ga = s[j-1].a+prof2[27]; //} s[j].ga = MAX(s[j-1].ga+prof2[28],s[j-1].a+prof2[27]); s[j].gb = -FLOATINFTY; } prof2+=64; } //} prof2 -= (hm->endb-hm->startb) << 6; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; s[hm->endb].gb = -FLOATINFTY; for (i = hm->starta;i < hm->enda;i++){ prof1 += 64; c = 1; for (j = 0;j < 26; j++){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; //if(pgb == -FLOATINFTY && pa == -FLOATINFTY){ // s[hm->startb].gb = -FLOATINFTY; //}else{ if(hm->startb == 0){ //if(pgb > pa ){ // s[hm->startb].gb = pgb+prof1[29]; //}else{ // s[hm->startb].gb = pa+prof1[29]; //} s[hm->startb].gb = MAX(pgb,pa)+ prof1[29]; }else{ //if(pgb+prof1[28] > pa+prof1[27]){ // s[hm->startb].gb = pgb+prof1[28]; //}else{ // s[hm->startb].gb = pa+prof1[27]; //} s[hm->startb].gb = MAX(pgb+prof1[28],pa+prof1[27]); } //} for (j = hm->startb+1; j <= hm->endb;j++){ prof2 += 64; ca = s[j].a; //if((pga += prof2[-37]) > pa){ // pa = pga; //} //if((pgb += prof1[-37]) > pa){ // pa = pgb; //} //pa = MAX(pgb + prof1[-37],pa); //pa = MAX(pga + prof2[-37],pa); pa = MAX3(pa,pga + prof2[-37],pgb + prof1[-37]); prof2 += 32; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; //if (s[j-1].ga+prof2[28] > s[j-1].a+prof2[27]){ // s[j].ga = s[j-1].ga+prof2[28]; //}else{ // s[j].ga = s[j-1].a+prof2[27]; //} /*xmm1 = _mm_set_epi32 (ca,s[j-1].a,pgb,s[j-1].ga); xmm2 = _mm_set_epi32 (prof1[27],prof2[27],prof1[28],prof2[28]); xmm1 = _mm_add_epi32 (xmm1,xmm2); xmm2 = _mm_srli_si128(xmm1, 8); output.m = _mm_cmpgt_epi32(xmm1,xmm2); output.m = _mm_or_si128( _mm_andnot_si128(output.m,xmm2),_mm_and_si128(output.m,xmm1)); s[j].ga =output.tmp[0]; s[j].gb = output.tmp[1];*/ //output.m = _mm_add_epi32 (xmm1,xmm2); //_mm_store_si128(dst_ptr, xmm3); //fprintf(stderr,"%d %d %d %d %d %d %d %d\n",output.tmp[0],output.tmp[1],output.tmp[2],output.tmp[3],s[j-1].ga+prof2[28],s[j-1].a+prof2[27],pgb+prof1[28] ,ca+prof1[27]); s[j].ga = MAX(s[j-1].ga+prof2[28],s[j-1].a+prof2[27]); pgb = s[j].gb; //if(pgb+prof1[28] > ca+prof1[27]){ // s[j].gb = pgb+prof1[28]; //}else{ // s[j].gb = ca+prof1[27]; //} s[j].gb = MAX(pgb+prof1[28] ,ca+prof1[27]); //fprintf(stderr,"%d %d %d %d\n",output.tmp[0],output.tmp[1],s[j].ga,s[j].gb ); pa = ca; } prof2 -= (hm->endb-hm->startb) << 6; } prof1 -= (hm->enda) << 6; return s; } struct large_states* backward_large_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_large_mem* hm) { int freq[26]; struct large_states* s = hm->b; register float pa = 0; register float pga = 0; register float pgb = 0; register float ca = 0; register int i = 0; register int j = 0; register int c = 0; prof1 += (hm->enda+1) << 6; prof2 += (hm->endb+1) << 6; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; //init of first row; //j = endb-startb; /*if(s[hm->endb].ga == -FLOATINFTY && s[hm->endb].a == -FLOATINFTY){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; s[j].ga = -FLOATINFTY; s[j].gb = -FLOATINFTY; } prof2 -= 64; }else{*/ if(hm->endb == hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; //if(s[j+1].ga > s[j+1].a){ // s[j].ga = s[j+1].ga+prof2[29]; //}else{ // s[j].ga = s[j+1].a+prof2[29]; //} s[j].ga = MAX(s[j+1].ga,s[j+1].a)+prof2[29]; s[j].gb = -FLOATINFTY; } prof2 -= 64; }else{ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -FLOATINFTY; //if(s[j+1].ga+prof2[28] > s[j+1].a+prof2[27]){ // s[j].ga = s[j+1].ga+prof2[28]; //}else{ // s[j].ga = s[j+1].a+prof2[27]; //} s[j].ga = MAX(s[j+1].ga+prof2[28],s[j+1].a+prof2[27]); s[j].gb = -FLOATINFTY; } prof2 -= 64; } //} s[hm->startb].a = -FLOATINFTY; s[hm->startb].ga = -FLOATINFTY; s[hm->startb].gb = -FLOATINFTY; i = hm->enda-hm->starta; while(i--){ prof1 -= 64; c = 1; for (j = 0;j < 26; j++){ if(prof1[j]){ freq[c] = j; c++; } } freq[0] = c; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -FLOATINFTY; s[hm->endb].ga = -FLOATINFTY; //if(pgb == -FLOATINFTY && pa == -FLOATINFTY){ // s[hm->endb].gb = -FLOATINFTY; //}else{ if(hm->endb == hm->len_b){ //if(pgb > pa){ // s[hm->endb].gb = pgb+prof1[29]; //}else{ // s[hm->endb].gb = pa+prof1[29]; //} s[hm->endb].gb = MAX(pgb,pa)+prof1[29]; }else{ //if(pgb+prof1[28] > pa+prof1[27]){ // s[hm->endb].gb = pgb+prof1[28]; //}else{ // s[hm->endb].gb = pa+prof1[27]; //} s[hm->endb].gb = MAX(pgb+prof1[28] ,pa+prof1[27]); } //} //j = endb-startb; prof2 += (hm->endb-hm->startb) << 6; //while(j--){ for(j = hm->endb-1;j >= hm->startb;j--){ prof2 -= 64; ca = s[j].a; //pa = MAX(pga + prof2[91],pa); //pa = MAX(pgb + prof1[91],pa); pa = MAX3(pa,pga + prof2[91],pgb + prof1[91]); //if((pga += prof2[91]) > pa){ // pa = pga; //} //if((pgb += prof1[91]) > pa){ // pa = pgb; //} prof2 += 32; for (c = 1;c < freq[0];c++){ pa += prof1[freq[c]]*prof2[freq[c]]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; //if (s[j+1].ga+prof2[28] > s[j+1].a+prof2[27]){ // s[j].ga = s[j+1].ga+prof2[28]; //}else{ // s[j].ga = s[j+1].a+prof2[27]; //} s[j].ga = MAX(s[j+1].ga+prof2[28], s[j+1].a+prof2[27]); pgb = s[j].gb; s[j].gb = MAX(pgb+prof1[28], ca+prof1[27]); //if(pgb+prof1[28] > ca+prof1[27]){ // s[j].gb = pgb+prof1[28]; //}else{ // s[j].gb = ca+prof1[27]; //} pa = ca; } } return s; } float* make_large_profile(float* prof, int* seq,int len,float** subm) { int i,j,c; prof = malloc(sizeof(float)*(len+2)*64); prof += (64 *(len+1)); for (i = 0;i < 64;i++){ prof[i] = 0.0; } prof[23+32] = -local_gpo; prof[24+32] = -local_gpe; prof[25+32] = -local_tgpe; i = len; while(i--){ prof -= 64; for (j = 0;j < 64;j++){ prof[j] = 0.0; } c = seq[i]; prof[c] += 1.0; prof += 32; for(j = 23;j--;){ prof[j] = subm[c][j]; } prof[23] = -local_gpo; prof[24] = -local_gpe; prof[25] = -local_tgpe; prof -= 32; } prof -= 64; for (i = 0;i < 64;i++){ prof[i] = 0.0; } prof[23+32] = -local_gpo; prof[24+32] = -local_gpe; prof[25+32] = -local_tgpe; return prof; } void set_large_gap_penalties(float* prof,int len,int nsip) { int i; prof += (64 *(len+1)); prof[27] = prof[55]*nsip;//gap open or close prof[28] = prof[56]*nsip;//gap extention prof[29] = prof[57]*nsip;//gap open or close i = len+1; while(i--){ prof -= 64; prof[27] = prof[55]*nsip;//gap open or close prof[28] = prof[56]*nsip;//gap extention prof[29] = prof[57]*nsip;//gap open or close } } float* large_update(float* profa,float* profb,float* newp,int* path,int sipa,int sipb) { int i,j,c; for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; newp += 64; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * local_gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = 64; i--;){ newp[i] = profb[i]; } profb += 64; if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += sipa;//1; i = local_tgpe*sipa; }else{ newp[24] += sipa;//1; i = local_gpe*sipa; } for (j = 32; j < 55;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += sipa;//1; i = local_tgpe*sipa; newp[23] += sipa;//1; i += local_gpo*sipa; }else{ newp[23] += sipa;//1; i = local_gpo*sipa; } for (j = 32; j < 55;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += sipa;//1; i = local_tgpe*sipa; newp[23] += sipa;//1; i += local_gpo*sipa; }else{ newp[23] += sipa;//1; i = local_gpo*sipa; } for (j = 32; j < 55;j++){ newp[j] -=i; } } } } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * local_gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = 64; i--;){ newp[i] = profa[i]; } profa+=64; if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += sipb;//1; i = local_tgpe*sipb; }else{ newp[24] += sipb;//1; i = local_gpe*sipb; } for (j = 32; j < 55;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += sipb;//1; i = local_tgpe*sipb; newp[23] += sipb;//1; i += local_gpo*sipb; }else{ newp[23] += sipb;//1; i = local_gpo*sipb; } for (j = 32; j < 55;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += sipb;//1; i = local_tgpe*sipb; newp[23] += sipb;//1; i += local_gpo*sipb; }else{ newp[23] += sipb;//1; i = local_gpo*sipb; } for (j = 32; j < 55;j++){ newp[j] -=i; } } } } newp += 64; c++; } for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } newp -= (path[0]+1) *64; return newp; } struct hirsch_large_mem* hirsch_large_mem_alloc(struct hirsch_large_mem* hm,int x) { // a=((typeof(a))(((int)(((void *)malloc(c+15))+15))&-16)). hm = (struct hirsch_large_mem *) malloc(sizeof(struct hirsch_large_mem)); hm->starta = 0; hm->startb = 0; hm->enda = 0; hm->endb = 0; hm->size = x; hm->len_a = 0; hm->len_b = 0; hm->f = malloc(sizeof(struct large_states)* (x+1)); hm->b = malloc(sizeof(struct large_states)* (x+1)); return hm; } struct hirsch_large_mem* hirsch_large_mem_realloc(struct hirsch_large_mem* hm,int x) { hm->starta = 0; hm->startb = 0; hm->enda = 0; hm->endb = 0; hm->len_a = 0; hm->len_b = 0; hm->size = x; hm->f = realloc(hm->f,sizeof(struct large_states)* (x+1)); hm->b = realloc(hm->b,sizeof(struct large_states)* (x+1)); return hm; } void hirsch_large_mem_free(struct hirsch_large_mem* hm) { free(hm->f); free(hm->b); free(hm); } kalign2_hirschberg_large.h0000644001210100001440000000607111577654215015344 0ustar olifriusers/* kalign2_hirschberg_large.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ struct large_states{ float a; float ga; float gb; float x; }; struct hirsch_large_mem{ struct large_states* f; struct large_states* b; int starta; int startb; int enda; int endb; int size; int len_a; int len_b; }; int* hirsch_large_pp_dyn(const float* prof1,const float* prof2,struct hirsch_large_mem* hm, int* hirsch_path); struct large_states* foward_large_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_large_mem* hm); struct large_states* backward_large_hirsch_pp_dyn(const float* prof1,const float* prof2,struct hirsch_large_mem* hm); int* hirsch_large_align_two_pp_vector(const float* prof1,const float* prof2,struct hirsch_large_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); int* hirsch_large_ps_dyn(const float* prof1,const int* seq2,struct hirsch_large_mem* hm, int* hirsch_path,int sip); struct large_states* foward_large_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_large_mem* hm,int sip); struct large_states* backward_large_hirsch_ps_dyn(const float* prof1,const int* seq2,struct hirsch_large_mem* hm,int sip); int* hirsch_large_align_two_ps_vector(const float* prof1,const int* seq2,struct hirsch_large_mem* hm,int* hirsch_path,float input_states[],int old_cor[],int sip); int* hirsch_large_ss_dyn(float**subm, const int* seq1,const int* seq2,struct hirsch_large_mem* hm, int* hirsch_path); struct large_states* foward_large_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_large_mem* hm); struct large_states* backward_large_hirsch_ss_dyn(float**subm,const int* seq1,const int* seq2,struct hirsch_large_mem* hm); int* hirsch_large_align_two_ss_vector(float**subm,const int* seq1,const int* seq2,struct hirsch_large_mem* hm,int* hirsch_path,float input_states[],int old_cor[]); float* make_large_profile(float* prof, int* seq,int len,float** subm); void set_large_gap_penalties(float* prof,int len,int nsip); float* large_update(float* profa,float* profb,float* newp,int* path,int sipa,int sipb); struct hirsch_large_mem* hirsch_large_mem_alloc(struct hirsch_large_mem* hm,int x); struct hirsch_large_mem* hirsch_large_mem_realloc(struct hirsch_large_mem* hm,int x); void hirsch_large_mem_free(struct hirsch_large_mem* hm); kalign2_inferface.c0000644001210100001440000003041711577654215013770 0ustar olifriusers/* kalign2_interface.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include #include //#include #include "kalign2.h" #include "kalign2_input.h" void parameter_message(struct parameters* param) { if(param->infile[0]){ if(param->dna){ fprintf(stderr,"Aligning %d RNA/DNA sequences from file %s with these parameters:\n",numseq,param->infile[0]); }else{ fprintf(stderr,"Aligning %d protein sequences from file %s with these parameters:\n",numseq,param->infile[0]); } }else{ if(param->dna){ fprintf(stderr,"Aligning %d RNA/DNA sequences with these parameters:\n",numseq); }else{ fprintf(stderr,"Aligning %d protein sequences with these parameters:\n",numseq); } } // fprintf(stderr," Distance estimation method: %s\n",param->distance); //fprintf(stderr," Tree building method: %s\n",param->tree); //fprintf(stderr," Ntree: %d\n",param->ntree); //fprintf(stderr," %0.8f gap open penalty\n",(float)gpo/5); fprintf(stderr," %0.8f gap open penalty\n",(float)gpo); //fprintf(stderr," %0.8f gap extension\n",(float)gpe/10); fprintf(stderr," %0.8f gap extension\n",(float)gpe); //fprintf(stderr," %0.8f terminal gap penalty\n",(float)tgpe/10); fprintf(stderr," %0.8f terminal gap penalty\n",(float)tgpe); //fprintf(stderr," %0.8f bonus\n",param->secret/10); fprintf(stderr," %0.8f bonus\n",param->secret); /*fprintf(stderr," %0.2f zlevel\n",param->zlevel); fprintf(stderr," %0.2f igw\n",param->internal_gap_weight); fprintf(stderr," %d sw\n",param->smooth_window); fprintf(stderr," %0.2f smo\n", param->gap_inc);*/ if(param->outfile){ fprintf(stderr,"Alignment will be written to file:'%s'.\n\n",param->outfile); }else{ fprintf(stderr,"Alignment will be written to stdout.\n\n"); } } struct parameters* interface(struct parameters* param,int argc,char **argv) { int i,c; param->gpo = -1.0; param->gpe = -1.0; param->tgpe = -1.0; param->secret = -1.0; param->zlevel = 58.8; param->sub_matrix = 0; param->aa = 0; param->infile = malloc(sizeof(char*)*3); param->infile[0] = 0; param->infile[1] = 0; param->infile[2] = 0; param->input = 0; param->outfile = 0; param->format = 0; param->help_flag = 0; param->quiet = 0; param->id = -1; param->distance = "wu"; param->reformat = 0; param->sort = 0; param->print_svg_tree = 0; param->dna = -1; param->feature_type = 0; param->alignment_type = "default"; param->tree = "upgma"; param->ntree = 2; param->print_tree = 0; param->alter_gaps = 0; param->alter_range = 0.5; param->alter_weight = 100; param->internal_gap_weight = 0; param->smooth_window = 1; param->gap_inc = 0.0; param->same_feature_score = 75; param->diff_feature_score = -5; static char license[] = "\n\ Kalign version 2.04, Copyright (C) 2004, 2005, 2006 Timo Lassmann\n\n\ Kalign is free software. You can redistribute it and/or modify\n\ it under the terms of the GNU General Public License as\n\ published by the Free Software Foundation.\n\n"; while (1){ static struct option long_options[] ={ {"gapopen", required_argument, 0,'s'}, {"gpo", required_argument, 0, 's'}, {"gapextension", required_argument, 0, 'e'}, {"gpe", required_argument, 0, 'e'}, {"secret", required_argument, 0, 'm'}, {"bonus", required_argument, 0, 'm'}, {"terminalgapextension", required_argument, 0, 't'}, {"tgpe", required_argument, 0, 't'}, {"zcutoff", required_argument, 0, 0}, {"distance", required_argument, 0, 'd'}, {"ntree", required_argument, 0, 0}, {"tree", required_argument, 0, 0}, {"format", required_argument, 0, 'f'}, {"reformat", 0, 0, 'r'}, {"sort",required_argument,0,'c'}, {"feature", required_argument, 0, 0}, {"type", required_argument, 0, 0}, {"alter_gaps", required_argument, 0, 0}, {"altergaps", required_argument, 0, 0}, {"alter_range", required_argument, 0, 0}, {"alter_weight", required_argument, 0, 0}, {"internal_gap_weight", required_argument, 0, 0}, {"smooth_window", required_argument, 0, 0}, {"gap_inc", required_argument, 0, 'a'}, {"matrix", required_argument, 0, 0}, {"mmbonus", required_argument, 0, 0}, {"nuc", 0, 0, 0}, {"dna", 0, 0, 0}, {"rna", 0, 0, 0}, {"protein", 0, 0, 0}, {"profile", 0, 0, 0}, {"prof", 0, 0, 0}, {"id", required_argument, 0, 0}, {"printtree", required_argument, 0, 0}, {"svgtree", required_argument, 0, 0}, {"svg_tree", required_argument, 0, 0}, {"pairwise", 0, 0, 0}, {"same_feature_score", required_argument, 0, 0}, {"diff_feature_score", required_argument, 0, 0}, {"input", required_argument, 0, 'i'}, {"infile", required_argument, 0, 'i'}, {"in", required_argument, 0, 'i'}, {"output", required_argument, 0, 'o'}, {"outfile", required_argument, 0, 'o'}, {"out", required_argument, 0, 'o'}, {"help", no_argument,0,'h'}, {"quiet", 0, 0, 'q'}, {0, 0, 0, 0} }; int option_index = 0; c = getopt_long_only (argc, argv,"i:o:hqs:e:m:t:z:c:f:d:b:a:r",long_options, &option_index); //c = getopt (argc, argv, "hi:o:"); /* Detect the end of the options. */ if (c == -1){ break; } switch (c){ case 0: if (long_options[option_index].flag != 0){ break; } switch (option_index){ case 0: case 1: fprintf(stderr,"GAGAGA\n"); param->gpo = atof(optarg); break; case 2: case 3: param->gpe = atof(optarg); break; case 4: case 5: param->secret = atof(optarg); break; case 6: case 7: param->tgpe = atof(optarg); break; case 8: param->zlevel = atof(optarg); break; case 9: param->distance = optarg; break; case 10: param->ntree = atoi(optarg); break; case 11: param->tree = optarg; break; case 12: param->format = optarg; break; case 13: param->reformat = 1; break; case 14: param->sort = optarg; break; case 15: param->feature_type = optarg; break; case 16: param->alignment_type = optarg; break; case 17: case 18: param->alter_gaps = atoi(optarg); break; case 19: param->alter_range = atof(optarg); break; case 20: param->alter_weight = atoi(optarg); break; case 21: param->internal_gap_weight = atof(optarg); break; case 22: param->smooth_window = atoi(optarg); break; case 23: param->gap_inc = atof(optarg); break; case 24: param->sub_matrix = optarg; break; case 25: param->aa = atoi(optarg); break; case 26: case 27: case 28: param->dna = 1; break; case 29: param->dna = 0; break; case 30: case 31: param->alignment_type = "profile"; break; case 32: param->id = atoi(optarg); break; case 33: param->print_tree = optarg; break; case 34: case 35: param->print_svg_tree = optarg; break; case 36: param->alignment_type = "pairwise"; break; case 37: param->same_feature_score = atof(optarg);//"pairwise"; break; case 38: param->diff_feature_score = atof(optarg);//lignment_type = "pairwise"; break; default: break; } //printf ("option%d %s",option_index,long_options[option_index].name); //if (optarg){ // printf (" with arg %s\n", optarg); //} break; case 's': param->gpo = atof(optarg); //param->help_flag = 1; break; case 'e': param->gpe = atof(optarg); break; case 'm': param->secret = atof(optarg); break; case 't': param->tgpe = atof(optarg); break; case 'z': param->zlevel = atof(optarg); break; case 'c': param->sort = optarg; break; case 'f': param->format = optarg; break; case 'r': param->reformat = 1; break; case 'd': param->distance = optarg; break; case 'b': param->tree = optarg; break; case 'h': param->help_flag = 1; break; case 'i': param->infile[1] = optarg; break; case 'o': param->outfile = optarg; break; case 'a': param->gap_inc = atof(optarg); break; case 'q': param->quiet = 1; break; case '?': free_param(param); exit(1); break; default: abort (); } } if (optind < argc){ c = 0; if(param->infile[1]){ i = 3; }else{ i = 2; } fprintf(stderr,"EXTRA :%d\n",argc - optind); param->infile = realloc(param->infile,(sizeof(char*) * (argc - optind+i))); for (c = i-1 ; c < (argc - optind+i);c++){ param->infile[c] = 0; } c = i-1; while (optind < argc){ /*switch(c){ case 0: param->infile[c] = argv[optind++]; break; case 1: param->outfile = argv[optind++]; break; default: fprintf(stderr,"Unrecognised junk:%s\n",argv[optind++]); break; }*/ param->infile[c] = argv[optind++]; c++; } } //exit(0); if (byg_start("pairwise",param->alignment_type) != -1){ param->ntree = 1; if(param->tgpe == -1.0){ param->tgpe = 0.0f; } } if(param->gap_inc < 0.0){ fprintf(stderr,"%s\n", usage); fprintf(stderr,"Invalid parameter setting: gap_inc needs to be > 0 \n"); exit(1); } if(param->quiet){ fclose(stderr); } fprintf(stderr,"%s", license); if (param->help_flag){ fprintf(stderr,"%s\n", usage); exit(1); } //exit(0); /*if (!param->infile[1]){ fprintf(stderr,"%s\n", usage); exit(1); }*/ return param; } kalign2_input.c0000644001210100001440000011667711577654214013221 0ustar olifriusers/* kalign2_input.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_input.h" #include struct alignment* detect_and_read_sequences(struct alignment* aln,struct parameters* param) { int feature = 0; char **input = 0; unsigned short int* input_type = 0; unsigned short int* input_numseq = 0; int num_input = 0; int i = 0; int j = 0; int c = 0; int free_read = 1; while(free_read == 1 || param->infile[i]){ num_input++; i++; free_read = 0; } numseq = 0; input = malloc(sizeof(char*) * num_input); input_type = malloc(sizeof(unsigned short int) * num_input); input_numseq = malloc(sizeof(unsigned short int) * num_input); for (i = 0; i < num_input;i++){ input[i] = 0; input_type[i] = 0; input_numseq[i] = 0; } free_read = 0; if(param->quiet){ c = 1; }else{ c = 0; } for (i = c; i < num_input;i++){ if(!param->infile[i]){ fprintf(stderr,"reading from STDIN: "); }else{ fprintf(stderr,"reading from %s: ",param->infile[i]); } input[i] = get_input_into_string(input[i],param->infile[i]); if(input[i]){ free_read++; if (byg_start("",input[i]) != -1){ input_numseq[i] = count_sequences_macsim(input[i]); feature = 1; input_type[i] = 1; }else if (byg_start("outfile && i){ param->outfile = param->infile[i]; fprintf(stderr,"-> output file, in "); //try to set format.... if(!param->format){ if (byg_start("msf",param->outfile) != -1){ param->format = "msf"; }else if (byg_start("clustal",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("aln",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("macsim",param->outfile) != -1){ param->format = "macsim"; }else{ param->format = "fasta"; } if(param->reformat){ fprintf(stderr,"unaligned fasta format\n"); }else if(param->format){ fprintf(stderr,"%s format\n",param->format); }else{ fprintf(stderr,"fasta format\n"); } } } fprintf(stderr,"\n"); } } if(numseq < 2){ fprintf(stderr,"%s\n", usage); if(!numseq){ fprintf(stderr,"\nWARNING: No sequences found.\n\n"); }else{ fprintf(stderr,"\nWARNING: Only one sequence found.\n\n"); } for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_param(param); exit(0); } if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ if( free_read < 2){ fprintf(stderr,"\nWARNING: You are trying to perform a profile - profile alignment but ony one input file was detected.\n\n"); param->alignment_type = "default"; } } if (param->feature_type && !feature){ fprintf(stderr,"\nWARNING: You are trying to perform a feature alignment but the input format(s) do not contain feature information.\n"); for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_param(param); exit(0); } numprofiles = (numseq << 1) - 1; aln = aln_alloc(aln); //numseq = 0; if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ j = 0; for (i = 0; i < num_input;i++){ if(input[i]){ switch(input_type[i]){ case 0: aln = read_alignment(aln,input[i]); break; case 1: aln = read_alignment_macsim_xml(aln,input[i]); break; case 2: aln = read_alignment_uniprot_xml(aln,input[i]); break; case 3: aln = read_alignment_from_swissprot(aln, input[i]); break; case 4: aln = read_alignment_clustal(aln,input[i]); break; case 5: aln = read_alignment_stockholm(aln,input[i]); break; default: aln = read_alignment(aln,input[i]); break; } input[i] = 0; //create partial profile.... aln->nsip[numseq+j] = input_numseq[i]; aln->sip[numseq+j] = malloc(sizeof(int)*aln->nsip[numseq+j]); //fprintf(stderr,"%d %d\n",numseq+j,aln->sl[numseq+j]); j++; } } num_input = j; c = 0; for (i = 0;i < num_input;i++){ // for ( j = 0; j < aln->nsip[numseq+i];j++){ aln->sip[numseq+i][j] = c; c++; // fprintf(stderr,"%d ",aln->sip[numseq+i][j]); } aln->sl[numseq+i] = aln->sl[aln->sip[numseq+i][0]]; // fprintf(stderr,"PROFILE:%d contains: %d long:%d\n",i+numseq,aln->nsip[numseq+i],aln->sl[numseq+i]); // fprintf(stderr,"\n"); } //sanity check -are all input int a,b; for (i = 0;i < num_input;i++){ for ( j = 0; j < aln->nsip[numseq+i]-1;j++){ a = aln->sip[numseq+i][j]; a = aln->sl[a]; for (c = j+1; j < aln->nsip[numseq+i];j++){ b = aln->sip[numseq+i][c]; b = aln->sl[b]; if(a != b){ fprintf(stderr,"Unaligned sequences in input %s.\n",param->infile[i]); for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_aln(aln); free_param(param); exit(0); } } } } //exit(0); /*for (i = 0; i < numseq;i++){ fprintf(stderr,"len%d:%d\n",i,aln->sl[i]); for ( j =0 ; j < aln->sl[i];j++){ //if(aln->s[i][j]> 23 || aln->s[i][j] < 0){ // aln->s[i][j] = -1; //} fprintf(stderr,"%d ",aln->s[i][j]); } // fprintf(stderr,"\n"); } exit(0);*/ }else{ for (i = 0; i < num_input;i++){ if(input[i]){ switch(input_type[i]){ case 0: aln = read_sequences(aln,input[i]); break; case 1: aln = read_sequences_macsim_xml(aln,input[i]); break; case 2: aln = read_sequences_uniprot_xml(aln,input[i]); break; case 3: aln = read_sequences_from_swissprot(aln, input[i]); break; case 4: aln = read_sequences_clustal(aln,input[i]); break; case 5: aln = read_sequences_stockholm(aln,input[i]); break; default: aln = read_sequences(aln,input[i]); break; } /*if (byg_start("",input[i]) != -1){ aln = read_sequences_macsim_xml(aln,input[i]); }else if (byg_start("format && param->outfile){ if (byg_start("msf",param->outfile) != -1){ param->format = "msf"; }else if (byg_start("clustal",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("aln",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("macsim",param->outfile) != -1){ param->format = "macsim"; } fprintf(stderr,"Output file: %s, in %s format.\n",param->outfile,param->format); } free(input); free(input_type); free(input_numseq); return aln; } int count_sequences_macsim(char* string) { int n = 0; n = byg_count("",string); if(!n){ return -1; } return n; } int count_sequences_swissprot(char* string) { int n = 0; n = byg_count("ID ",string); if(!n){ return 0; } return n; } int count_sequences_uniprot(char* string) { int n = 0; n = byg_count(" 2 && f>j && j!= 1){ if(c ==0){ i = j; while(p1[i] != '\n'){ //if (!isspace((int)p1[i])){ // len++; //} i++; } } c++; }else{ if (c){ if(c > n){ n = c; } c =0; } } } if(!n){ return 0; } return n; } int count_sequences_fasta(char* string) { int nbytes; int i; int n = 0; int stop = 0; nbytes = strlen(string); for (i =0;i < nbytes;i++){ if (string[i] == '>'&& stop == 0){ stop = 1; n++; } if (string[i] == '\n'){ stop = 0; } } if(!n){ return 0; } return n; } char* get_input_into_string(char* string,char* infile) { int i = 0; int string_length = 2; char c = 0; FILE *file = 0; if(infile){ if (!(file = fopen( infile, "r" ))){ return 0; fprintf(stderr,"Cannot open file '%s'\n", infile); exit(1); } if (fseek(file,0,SEEK_END) != 0){ (void)fprintf(stderr, "ERROR: fseek failed\n"); (void)exit(EXIT_FAILURE); } i= ftell (file); if (fseek(file,0,SEEK_START) != 0){ (void)fprintf(stderr, "ERROR: fseek failed\n"); (void)exit(EXIT_FAILURE); } string = malloc ((i+1)* sizeof(char)); fread(string,sizeof(char), i, file); string[i] = 0; fclose(file); }else{ if (!isatty(0)){ string = malloc(sizeof(char*)*string_length); while (!feof (stdin)){ c = getc(stdin); if (i == string_length){ string_length <<=1; string = realloc(string,sizeof(char)*string_length); } string[i] = c; i++; } string[i-1] = 0; }else{ return 0; } } return string; } struct alignment* read_sequences_from_swissprot(struct alignment* aln,char* string) { //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; int i,j,c,n; char* p = 0; p = string; /*numseq = byg_count("ID ",p); if(!numseq){ fprintf(stderr,"No sequences found!\n"); exit(1); } aln = (struct alignment *) malloc(sizeof(struct alignment)); numprofiles = (numseq << 1) - 1; aln->ft = 0; aln->si = 0; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for (i = numseq;i--;){ aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ c = 0; while(aln->sl[c]){ c++; } while ((i = byg_end("ID ",p)) != -1){ p+=i; j = byg_start(" ",p); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p[i]; } aln->sn[c][j] = 0; p+= j; j = byg_end("SQ ",p); p+= j; j = byg_end("\n",p); p+= j; j = byg_start("//",p); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if(isalpha((int)p[i])){ aln->s[c][n] = aacode[toupper(p[i])-65]; aln->seq[c][n] = p[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; c++; } free(string); return aln; } struct alignment* read_alignment_from_swissprot(struct alignment* aln,char* string) { //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; int i,j,c,n; char* p = 0; p = string; /*numseq = byg_count("ID ",p); if(!numseq){ fprintf(stderr,"No sequences found!\n"); exit(1); } aln = (struct alignment *) malloc(sizeof(struct alignment)); numprofiles = (numseq << 1) - 1; aln->ft = 0; aln->si = 0; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for (i = numseq;i--;){ aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ c = 0; while(aln->sl[c]){ c++; } fprintf(stderr,"found sequence:\n"); while ((i = byg_end("ID ",p)) != -1){ p+=i; j = byg_start(" ",p); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p[i]; } aln->sn[c][j] = 0; p+= j; j = byg_end("SQ ",p); p+= j; j = byg_end("\n",p); p+= j; j = byg_start("//",p); fprintf(stderr,"found sequence:\n"); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if((int)p[i] > 32){ if(isalpha((int)p[i])){ aln->s[c][n] = aacode[toupper(p[i])-65]; }else{ aln->s[c][n] = -1; } fprintf(stderr,"%c",p[i]); aln->seq[c][n] = p[i]; n++; } } fprintf(stderr,"\n\n"); aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; c++; } free(string); return aln; } struct alignment* read_sequences_macsim_xml(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; char *p = 0; int max = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; /*aln = (struct alignment*) malloc(sizeof(struct alignment)); numseq = byg_count("",string); if(!numseq){ fprintf(stderr,"No sequences found!\n"); exit(1); } numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = malloc(sizeof(struct feature* ) * (numseq)); aln->si = malloc(sizeof(struct sequence_information* ) * (numseq)); aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->ft[i] = 0; aln->si[i] = 0; aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ p = string; if(byg_count("",p)){ while((i = byg_start("",p))!=-1){ p+=i; j = byg_end("",p); for(i = 0; i< j;i++){ p[i] = ' '; } i = byg_start("",p); p+=i; j = byg_end("",p); for(i = 0; i< j;i++){ p[i] = ' '; } } } p = string; c = 0; while(aln->sl[c]){ c++; } while((i = byg_end("",p); i = byg_end("",p); if(i < max){ p +=i; //p1 is at the end of the sequence name tag j = byg_start("",p); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p[i]; } aln->sn[c][j] = 0; } i = byg_end("",p); if(i < max){ aln->ft[c] = read_ft(aln->ft[c],p); } i = byg_end("",p); if(i < max){ p+= i; j = byg_start("",p); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if(isalpha((int)p[i])){ aln->s[c][n] = aacode[toupper(p[i])-65]; aln->seq[c][n] = p[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; } c++; } free(string); return aln; } struct alignment* read_alignment_macsim_xml(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; char *p = 0; int max = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; /*aln = (struct alignment*) malloc(sizeof(struct alignment)); numseq = byg_count("",string); if(!numseq){ fprintf(stderr,"No sequences found!\n"); exit(1); } numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = malloc(sizeof(struct feature* ) * (numseq)); aln->si = malloc(sizeof(struct sequence_information* ) * (numseq)); aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->ft[i] = 0; aln->si[i] = 0; aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ p = string; if(byg_count("",p)){ while((i = byg_start("",p))!=-1){ p+=i; j = byg_end("",p); for(i = 0; i< j;i++){ p[i] = ' '; } i = byg_start("",p); p+=i; j = byg_end("",p); for(i = 0; i< j;i++){ p[i] = ' '; } } } p = string; c = 0; while(aln->sl[c]){ c++; } while((i = byg_end("",p); i = byg_end("",p); if(i < max){ p +=i; //p1 is at the end of the sequence name tag j = byg_start("",p); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p[i]; } aln->sn[c][j] = 0; } i = byg_end("",p); if(i < max){ aln->ft[c] = read_ft(aln->ft[c],p); } i = byg_end("",p); if(i < max){ p+= i; j = byg_start("",p); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if((int)p[i]>32){ if(isalpha((int)p[i])){ aln->s[c][n] = aacode[toupper(p[i])-65]; }else{ aln->s[c][n] = -1; } aln->seq[c][n] = p[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; } c++; } free(string); return aln; } struct feature* read_ft(struct feature* ft,char* p) { int i,j; struct feature *n = 0; struct feature *old_n = 0; char tmp[10]; char* p1 = 0; p1 = p; while((j = byg_end("",p1))!= -1){ i = byg_end("",p1); if(j >i){ break; } n = malloc(sizeof(struct feature)); n->next = 0; n->color = -1; p1+=j;// p1 is at start of entry; i = byg_end("",p1); p1 +=i; //p1 is at the end of the sequence name tag j = byg_start("",p1); n->type = malloc(sizeof(char*)*(j+1)); for (i = 0; i < j;i++){ n->type[i] = p1[i]; } n->type[j] = 0; i = byg_end("",p1); p1+= i; j = byg_start("",p1); for (i = 0; i < j;i++){ tmp[i] = p1[i]; } tmp[j] = 0; n->start = atoi(tmp); i = byg_end("",p1); p1+= i; j = byg_start("",p1); for (i = 0; i < j;i++){ tmp[i] = p1[i]; } tmp[j] = 0; n->end = atoi(tmp); i = byg_end("",p1); p1+= i; j = byg_start("",p1); n->note = malloc(sizeof(char*)*(j+1)); for (i = 0; i < j;i++){ n->note[i] = p1[i]; } n->note[j] = 0; if((old_n = ft)!= 0){ while(old_n->next!=0){ old_n = old_n->next; } old_n->next = n; }else{ ft = n; } n = 0; } return ft; } struct alignment* read_sequences_uniprot_xml(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; char *p1 = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; /*aln = (struct alignment *) malloc(sizeof(struct alignment)); numseq = byg_count("s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->si = 0; aln->ft = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ p1 = string; c = 0; while(aln->sl[c]){ c++; } while((i = byg_end("",p1); p1 +=i; //p1 is at the end of the sequence name tag j = byg_start("",p1); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p1[i]; } aln->sn[c][j] = 0; while((i = byg_end("",p1); p1 +=i; } j = byg_start("",p1); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if(isalpha((int)p1[i])){ aln->s[c][n] = aacode[toupper(p1[i])-65]; aln->seq[c][n] = p1[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; c++; } free(string); return aln; } struct alignment* read_alignment_uniprot_xml(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; char *p1 = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; /*aln = (struct alignment *) malloc(sizeof(struct alignment)); numseq = byg_count("s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->si = 0; aln->ft = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ p1 = string; c = 0; while(aln->sl[c]){ c++; } while((i = byg_end("",p1); p1 +=i; //p1 is at the end of the sequence name tag j = byg_start("",p1); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p1[i]; } aln->sn[c][j] = 0; i = byg_end("",p1); p1 +=i; j = byg_start("",p1); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if((int)p1[i] > 32){ if(isalpha((int)p1[i])){ aln->s[c][n] = aacode[toupper(p1[i])-65]; }else{ aln->s[c][n] = -1; } aln->seq[c][n] = p1[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; c++; } free(string); return aln; } struct alignment* read_sequences_stockholm(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; char *p1 = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; /*aln = (struct alignment*) malloc(sizeof(struct alignment)); p1 = string; while((i = byg_end("\n",p1))!=-1){ p1+=i; if (!(byg_start("//",p1))){ break; } j = byg_end("#",p1); if(j != 1){ numseq++; } } numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = 0; aln->si = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ c = 0; while(aln->sl[c]){ c++; } p1 = string; while((i = byg_end("\n",p1))!=-1){ p1+=i; if (!(byg_start("//",p1))){ break; } j = byg_end("#",p1); if(j != 1){ j = byg_start(" ",p1); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p1[i]; } aln->sn[c][j] = 0; p1+=j; j = byg_start("\n",p1); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if(isalpha((int)p1[i])){ aln->s[c][n] = aacode[toupper(p1[i])-65]; aln->seq[c][n] = p1[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; c++; } } free(string); return aln; } struct alignment* read_alignment_stockholm(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; char *p1 = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; /*aln = (struct alignment*) malloc(sizeof(struct alignment)); p1 = string; while((i = byg_end("\n",p1))!=-1){ p1+=i; if (!(byg_start("//",p1))){ break; } j = byg_end("#",p1); if(j != 1){ numseq++; } } numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = 0; aln->si = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; }*/ c = 0; while(aln->sl[c]){ c++; } p1 = string; while((i = byg_end("\n",p1))!=-1){ p1+=i; if (!(byg_start("//",p1))){ break; } j = byg_end("#",p1); if(j != 1){ j = byg_start(" ",p1); aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p1[i]; } aln->sn[c][j] = 0; p1+=j; j = byg_start("\n",p1); aln->s[c] = malloc(sizeof(int)*(j+1)); aln->seq[c] = malloc(sizeof(char)*(j+1)); n = 0; for (i = 0;i < j;i++){ if((int)p1[i] > 32){ if(isalpha((int)p1[i])){ aln->s[c][n] = aacode[toupper(p1[i])-65]; }else{ aln->s[c][n] = -1; } aln->seq[c][n] = p1[i]; n++; } } aln->s[c][n] = 0; aln->seq[c][n] = 0; aln->sl[c] = n; c++; } } free(string); return aln; } struct alignment* read_sequences_clustal(struct alignment* aln,char* string) { int c = 0; int n = 0; int len = 0; int i = 0; int j = 0; int start = 0; char *p1 = 0; int local_numseq = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; //aln = (struct alignment*) malloc(sizeof(struct alignment)); p1 = string; while((i = byg_end("\n",p1))!=-1){ p1+=i; j = byg_end(" ",p1); n = byg_end("\n",p1); if(n > 2 && n>j && j!= 1){ if(c ==0){ i = j; while(p1[i] != '\n'){ if (!isspace((int)p1[i])){ len++; } i++; } } c++; }else{ if (c){ if(c > local_numseq){ local_numseq = c; } c =0; } } } /*numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = 0; aln->si = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->lsn[i] = 0; aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; aln->sl[i] = 0;*/ start = 0; while(aln->sl[start]){ start++; } for(i =start;i < local_numseq+start;i++){ aln->s[i] = malloc(sizeof(int)*(len+1)); aln->seq[i] = malloc(sizeof(char)*(len+1)); } p1 = string; c = start; while((i = byg_end("\n",p1))!=-1){ p1+=i; j = byg_end(" ",p1); n = byg_end("\n",p1); if(n > 2 && n>j && j!= 1){ if(aln->lsn[c] == 0){ aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p1[i]; } aln->sn[c][j] = 0; } for (i = j;i < n;i++){ if(isalpha((int)p1[i])){ aln->s[c][aln->sl[c]] = aacode[toupper(p1[i])-65]; aln->seq[c][aln->sl[c]] = p1[i]; aln->sl[c]++; } } c++; }else{ if (c != start){ //c =0; c = start; } } } for (i = start; i < local_numseq+start;i++){ aln->s[i][aln->sl[i]] = 0; } free(string); return aln; } struct alignment* read_alignment_clustal(struct alignment* aln,char* string) { int c = 0; int n = 0; int len = 0; int i = 0; int j = 0; int start = 0; char *p1 = 0; int local_numseq = 0; int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,-1,13,14,15,16,17,-1,18,19,20,21,22}; //aln = (struct alignment*) malloc(sizeof(struct alignment)); p1 = string; while((i = byg_end("\n",p1))!=-1){ p1+=i; j = byg_end(" ",p1); n = byg_end("\n",p1); if(n > 2 && n>j && j!= 1){ if(c ==0){ i = j; while(p1[i] != '\n'){ if ((int)p1[i] > 32){ len++; } i++; } } c++; }else{ if (c){ if(c > local_numseq){ local_numseq = c; } c =0; } } } /*numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = 0; aln->si = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; } for(i =0;i < numseq;i++){ aln->lsn[i] = 0; aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; aln->sl[i] = 0;*/ start = 0; while(aln->sl[start]){ start++; } for(i =start;i < local_numseq+start;i++){ aln->s[i] = malloc(sizeof(int)*(len+1)); aln->seq[i] = malloc(sizeof(char)*(len+1)); } p1 = string; c = start; while((i = byg_end("\n",p1))!=-1){ p1+=i; j = byg_end(" ",p1); n = byg_end("\n",p1); if(n > 2 && n>j && j!= 1){ if(aln->lsn[c] == 0){ aln->lsn[c] = j; aln->sn[c] = malloc(sizeof(char)*(j+1)); for (i = 0;i < j;i++){ aln->sn[c][i] = p1[i]; } aln->sn[c][j] = 0; } for (i = j;i < n;i++){ if((int)p1[i] > 32){ if(isalpha((int)p1[i])){ aln->s[c][aln->sl[c]] = aacode[toupper(p1[i])-65]; }else{ aln->s[c][aln->sl[c]] = -1; } aln->seq[c][aln->sl[c]] = p1[i]; aln->sl[c]++; } } c++; }else{ if (c != start){ //c =0; c = start; } } } for (i = start; i < local_numseq+start;i++){ aln->s[i][aln->sl[i]] = 0; aln->seq[i][aln->sl[i]] = 0; } free(string); return aln; } struct alignment* read_sequences(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; int stop = 0; int start = 0; int nbytes; int local_numseq = 0; // O 12 //U17 int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; nbytes = strlen(string); //aln = (struct alignment*) malloc(sizeof(struct alignment)); for (i =0;i < nbytes;i++){ if (string[i] == '>'&& stop == 0){ stop = 1; local_numseq++; } if (string[i] == '\n'){ stop = 0; } } /* if(!numseq){ fprintf(stderr,"No sequences found!\n"); exit(1); } numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq)); aln->seq = malloc(sizeof(char*) * (numseq)); aln->ft = 0; aln->si = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; }*/ start = 0; while(aln->sl[start]){ start++; } j = start; for (i =0;i < nbytes;i++){ if (string[i] == '>' && stop == 0){ stop = 1; aln->sl[j] =c; j++; c = 0; } if (string[i] == '\n'){ if(stop == 1){ aln->lsn[j-1] = n; n = 0; } stop = 0; } if (stop == 1 && string[i] != '\n' && string[i] != 0 && string[i] != '>' ){ n++; } if (stop == 0 && string[i] != '\n' && string[i] != 0 ){ if (isalpha((int)string[i])){ c++; } } } aln->sl[j] = c; for (i =1+start;i < local_numseq+1+start;i++){ if(!aln->sl[i]){ fprintf(stderr,"Sequence %d has a length of 0!!\n",i-1); exit(1); } aln->sl[i-1] = aln->sl[i]; } aln->sl[start+local_numseq] = 0; //for (i = numseq;i--;){ for (i = start; i < local_numseq+start;i++){ aln->s[i] = malloc(sizeof(int)*(aln->sl[i]+1)); aln->seq[i] = malloc(sizeof(char)*(aln->sl[i]+1)); aln->sn[i] = malloc(sizeof(char)*(aln->lsn[i]+1)); //aln->sip[i] = malloc(sizeof(int)*1); //aln->nsip[i] = 1; //aln->sip[i][0] = i; } stop = 0; j = start; for (i =0;i < nbytes;i++){ if (string[i] == '>' && stop == 0 ){ stop = 1; j++; c = 0; } if (string[i] == '\n'){ if(stop == 1){ n = 0; } stop = 0; } if (stop == 1 &&string[i] != '\n' && string[i] != 0 && string[i] != '>' ){ aln->sn[j-1][n] = string[i]; n++; } if (stop == 0 && string[i] != '\n' && string[i] != 0 ){ if(isalpha((int)string[i])){ aln->s[j-1][c] = aacode[toupper(string[i])-65]; aln->seq[j-1][c] = string[i]; c++; } } } for (i = start;i< local_numseq+start;i++){ aln->s[i][aln->sl[i]] = 0; aln->seq[i][aln->sl[i]] = 0; aln->sn[i][aln->lsn[i]] = 0; } free(string); return aln; } struct alignment* read_alignment(struct alignment* aln,char* string) { int c = 0; int n = 0; int i = 0; int j = 0; int stop = 0; int start = 0; int nbytes; int local_numseq = 0; // O 12 //U17 int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; nbytes = strlen(string); //aln = (struct alignment*) malloc(sizeof(struct alignment)); for (i =0;i < nbytes;i++){ if (string[i] == '>'&& stop == 0){ stop = 1; local_numseq++; } if (string[i] == '\n'){ stop = 0; } } /* if(!numseq){ fprintf(stderr,"No sequences found!\n"); exit(1); } numprofiles = (numseq << 1) - 1; aln->s = malloc(sizeof(int*) * (numseq)); aln->seq = malloc(sizeof(char*) * (numseq)); aln->ft = 0; aln->si = 0; aln->sl = malloc(sizeof(int) * (numprofiles)); aln->sip = malloc(sizeof(int*)* numprofiles); aln->nsip = malloc(sizeof(int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; }*/ start = 0; while(aln->sl[start]){ start++; } j = start; for (i =0;i < nbytes;i++){ if (string[i] == '>' && stop == 0){ stop = 1; aln->sl[j] =c; j++; c = 0; } if (string[i] == '\n'){ if(stop == 1){ aln->lsn[j-1] = n; n = 0; } stop = 0; } if (stop == 1 && string[i] != '\n' && string[i] != 0 && string[i] != '>' ){ n++; } if (stop == 0 && string[i] != '\n' && string[i] != 0 ){ if ((int)string[i] > 32){ c++; } } } aln->sl[j] = c; for (i =1+start;i < local_numseq+1+start;i++){ if(!aln->sl[i]){ fprintf(stderr,"Sequence %d has a length of 0!!\n",i-1); exit(1); } aln->sl[i-1] = aln->sl[i]; } aln->sl[start+local_numseq] = 0; //fprintf(stderr,"set to 0 : %d\n",start+local_numseq); //for (i = numseq;i--;){ for (i = start; i < local_numseq+start;i++){ // fprintf(stderr,"len:%d %d\n",i,aln->sl[i]); aln->s[i] = malloc(sizeof(int)*(aln->sl[i]+1)); aln->seq[i] = malloc(sizeof(char)*(aln->sl[i]+1)); aln->sn[i] = malloc(sizeof(char)*(aln->lsn[i]+1)); //aln->sip[i] = malloc(sizeof(int)*1); //aln->nsip[i] = 1; //aln->sip[i][0] = i; } stop = 0; j = start; for (i =0;i < nbytes;i++){ if (string[i] == '>' && stop == 0 ){ stop = 1; j++; c = 0; } if (string[i] == '\n'){ if(stop == 1){ n = 0; } stop = 0; } if (stop == 1 &&string[i] != '\n' && string[i] != 0 && string[i] != '>' ){ aln->sn[j-1][n] = string[i]; n++; } if (stop == 0 && string[i] != '\n' && string[i] != 0 ){ if((int) string[i] > 32 ){ if(isalpha((int)string[i])){ aln->s[j-1][c] = aacode[toupper(string[i])-65]; }else{ aln->s[j-1][c] = -1; } aln->seq[j-1][c] = string[i]; c++; } } } for (i = start;i< local_numseq+start;i++){ aln->s[i][aln->sl[i]] = 0; aln->seq[i][aln->sl[i]] = 0; aln->sn[i][aln->lsn[i]] = 0; } free(string); return aln; } kalign2_input.h0000644001210100001440000001105611577654214013207 0ustar olifriusers/* kalign2_input.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include #include #include #define SEEK_START 0 #define SEEK_END 2 struct alignment* read_sequences(struct alignment* aln,char* string); struct alignment* read_sequences_from_swissprot(struct alignment* aln,char* string); struct alignment* read_sequences_uniprot_xml(struct alignment* aln,char* string); struct alignment* read_sequences_macsim_xml(struct alignment* aln,char* string); struct feature* read_ft(struct feature* ft,char* p); struct alignment* read_sequences_clustal(struct alignment* aln,char* string); struct alignment* read_sequences_stockholm(struct alignment* aln,char* string); struct alignment* read_alignment(struct alignment* aln,char* string); struct alignment* read_alignment_from_swissprot(struct alignment* aln,char* string); struct alignment* read_alignment_uniprot_xml(struct alignment* aln,char* string); struct alignment* read_alignment_macsim_xml(struct alignment* aln,char* string); struct feature* read_ft(struct feature* ft,char* p); struct alignment* read_alignment_clustal(struct alignment* aln,char* string); struct alignment* read_alignment_stockholm(struct alignment* aln,char* string); char* get_input_into_string(char* string,char* infile); int count_sequences_macsim(char* string); int count_sequences_swissprot(char* string); int count_sequences_uniprot(char* string); int count_sequences_stockholm(char* string); int count_sequences_clustalw(char* string); int count_sequences_fasta(char* string); static char usage[] = "\n\ Usage: kalign2 [INFILE] [OUTFILE] [OPTIONS]\n\ \n\ Options:\n\n\ -s, -gapopen Gap open penalty\n\ -gap_open\n\ -gpo\n\ \n\ -e, -gapextension Gap extension penalty\n\ -gap_ext\n\ -gpe\n\ \n\ -t, -terminal_gap_extension_penalty Terminal gap penalties\n\ -tgpe\n\ \n\ -m, -matrix_bonus A constant added to the substitution matrix.\n\ -bonus\n\ \n\ -c, -sort The order in which the sequences appear in the output alignment.\n\ \n\ \n\ -g, -feature Selects feature mode and specifies which features are to be used:\n\ e.g. all, maxplp, STRUCT, PFAM-A....\n\ -same_feature_score Score for aligning same features\n\ -diff_feature_score Penalty for aligning different features\n\ \n\ -d, -distance Distance method.\n\ \n\ \n\ -b, -guide-tree Guide tree method.\n\ -tree \n\ \n\ -z, -zcutoff Parameter used in the wu-manber based distance calculation\n\ \n\ -i, -input The input file.\n\ -infile\n\ -in\n\ \n\ -o, -output The output file.\n\ -outfile\n\ -out\n\ \n\ -a, -gap_inc Parameter increases gap penalties depending on the number of existing gaps\n\ \n\ -f, -format The output format:\n\ \n\ \n\ -q, -quiet Print nothing to STDERR.\n\ Read nothing from STDIN\n\ \n\ Examples:\n\n\ Using pipes:\n\ kalign2 [OPTIONS] < [INFILE] > [OUTFILE]\n\ more [INFILE] | kalign2 [OPTIONS] > [OUTFILE]\n\ \n\ Relaxed gap penalties:\n\ kalign2 -gpo 60 -gpe 9 -tgpe 0 -bonus 0 < [INFILE] > [OUTFILE]\n\ \n\ Feature alignment with pairwise alignment based distance method and NJ guide tree:\n\ kalign2 -in test.xml -distance pair -tree nj -sort gaps -feature STRUCT -format macsim -out test.macsim\n\ "; kalign2_main.c0000644001210100001440000001672311577654215012776 0ustar olifriusers/* kalign2_main.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" unsigned int numseq = 0; unsigned int numprofiles = 0; float gpo = 0; float gpe = 0; float tgpe = 0; int main(int argc,char **argv) { int i; int* tree = 0; int a, b, c; struct alignment* aln = 0; struct parameters* param = 0; struct aln_tree_node* tree2 = 0; param = malloc(sizeof(struct parameters)); param = interface(param,argc,argv); aln = detect_and_read_sequences(aln,param); if(param->ntree > numseq){ param->ntree = numseq; } //DETECT DNA if(param->dna == -1){ for (i = 0; i < numseq;i++){ param->dna = byg_detect(aln->s[i],aln->sl[i]); if(param->dna){ break; } } } //param->dna = 0; //fprintf(stderr,"DNA:%d\n",param->dna); //exit(0); if(param->dna == 1){ //brief sanity check... for (i = 0; i < numseq;i++){ if(aln->sl[i] < 6){ fprintf(stderr,"Dna/Rna alignments are only supported for sequences longer than 6."); free(param); free_aln(aln); exit(0); } } aln = make_dna(aln); } int j; if(param->reformat){ for (i = 0 ;i < numseq;i++){ aln->nsip[i] = i; for (j = 0; j < aln->sl[i];j++){ aln->s[i][j] = 0; } } param->format = "fasta";//param->reformat; output(aln,param); exit(1); } //fast distance calculation; float** submatrix = 0; submatrix = read_matrix(submatrix,param); // sets gap penalties as well..... if(!param->quiet){ parameter_message(param); } if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ profile_alignment_main(aln,param,submatrix); } float** dm = 0; if(param->ntree > 1){ if(byg_start(param->distance,"pairclustalPAIRCLUSTAL") != -1){ if(byg_start(param->tree,"njNJ") != -1){ dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,1); }else{ dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,0); } }else if(byg_start("wu",param->alignment_type) != -1){ dm = protein_wu_distance2(aln,dm,param); // param->feature_type = "wumanber"; }else if(param->dna == 1){ if(byg_start(param->tree,"njNJ") != -1){ dm = dna_distance(aln,dm,param,1); }else{ dm = dna_distance(aln,dm,param,0); } }else{ if(byg_start(param->tree,"njNJ") != -1){ dm = protein_wu_distance(aln,dm,param,1); }else{ dm = protein_wu_distance(aln,dm,param,0); } } /*int j; for (i = 0; i< numseq;i++){ for (j = 0; j< numseq;j++){ fprintf(stderr,"%f ",dm[i][j]); } fprintf(stderr,"\n"); }*/ if(byg_start(param->tree,"njNJ") != -1){ tree2 = real_nj(dm,param->ntree); }else{ tree2 = real_upgma(dm,param->ntree); } if(param->print_tree){ print_tree(tree2,aln,param->print_tree); } } tree = malloc(sizeof(int)*(numseq*3+1)); for ( i = 1; i < (numseq*3)+1;i++){ tree[i] = 0; } tree[0] = 1; if(param->ntree < 2){ tree[0] = 0; tree[1] = 1; c = numseq; tree[2] = c; a = 2; for ( i = 3; i < (numseq-1)*3;i+=3){ tree[i] = c; tree[i+1] = a; c++; tree[i+2] = c; a++; } }else if(param->ntree > 2){ ntreeify(tree2,param->ntree); }else{ tree = readtree(tree2,tree); for (i = 0; i < (numseq*3);i++){ tree[i] = tree[i+1]; } free(tree2->links); free(tree2->internal_lables); free(tree2); } //get matrices... struct feature_matrix* fm = 0; struct ntree_data* ntree_data = 0; int** map = 0; if(param->ntree > 2){ ntree_data = malloc(sizeof(struct ntree_data)); ntree_data->realtree = tree2; ntree_data->aln = aln; ntree_data->profile = 0; ntree_data->map = 0; ntree_data->ntree = param->ntree; ntree_data->submatrix = submatrix; ntree_data->tree = tree; ntree_data = ntree_alignment(ntree_data); map = ntree_data->map; tree = ntree_data->tree; for (i = 0; i < (numseq*3);i++){ tree[i] = tree[i+1]; } free(ntree_data); }else if (param->feature_type){ fm = get_feature_matrix(fm,aln,param); if(!fm){ for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); free_param(param); free(map); free(tree); exit(0); } map = feature_hirschberg_alignment(aln,tree,submatrix,map,fm); //exit(0); //map = feature_alignment(aln,tree,submatrix, map,fm); }else if (byg_start("pairwise",param->alignment_type) != -1){ if(param->dna == 1){ map = dna_alignment_against_a(aln,tree,submatrix, map,param->gap_inc); }else{ map = hirschberg_alignment_against_a(aln,tree,submatrix, map,param->smooth_window,param->gap_inc); } //map = default_alignment(aln,tree,submatrix, map); }else if (byg_start("fast",param->alignment_type) != -1){ map = default_alignment(aln,tree,submatrix, map); }else if(param->dna == 1){ map = dna_alignment(aln,tree,submatrix, map,param->gap_inc); /*}else if (byg_start("test",param->alignment_type) != -1){ map = test_alignment(aln,tree,submatrix, map,param->internal_gap_weight,param->smooth_window,param->gap_inc); }else if (param->aa){ map = aa_alignment(aln,tree,submatrix, map,param->aa); }else if (param->alter_gaps){ map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight); }else if (byg_start("altergaps",param->alignment_type) != -1){ map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight); }else if(byg_start("simple",param->alignment_type) != -1){ map = simple_hirschberg_alignment(aln,tree,submatrix, map);*/ }else if(byg_start("advanced",param->alignment_type) != -1){ map = advanced_hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc,param->internal_gap_weight); }else{ map = hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc); } //clear up sequence array to be reused as gap array.... int *p = 0; for (i = 0; i < numseq;i++){ p = aln->s[i]; for (a = 0; a < aln->sl[i];a++){ p[a] = 0; } } //clear up for (i = 0; i < (numseq-1)*3;i +=3){ a = tree[i]; b = tree[i+1]; aln = make_seq(aln,a,b,map[tree[i+2]]); } //for (i = 0; i < numseq;i++){ // fprintf(stderr,"%s %d\n",aln->sn[i],aln->nsip[i]); //} for (i = 0; i < numseq;i++){ aln->nsip[i] = 0; } aln = sort_sequences(aln,tree,param->sort); //for (i = 0; i < numseq;i++){ // fprintf(stderr,"%d %d %d\n",i,aln->nsip[i],aln->sip[i][0]); //} output(aln,param); /* if(!param->format){ fasta_output(aln,param->outfile); }else{ if (byg_start("msf",param->format) != -1){ msf_output(aln,param->outfile); }else if (byg_start("clustal",param->format) != -1){ clustal_output(aln,param->outfile); }else if (byg_start("macsim",param->format) != -1){ macsim_output(aln,param->outfile,param->infile[0]); } } free_param(param);*/ free(map); free(tree); return 0; } kalign2_mem.c0000644001210100001440000001320711577654215012622 0ustar olifriusers/* kalign2_mem.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" void freesimpletree(struct tree_node* p) { if(p->left){ freesimpletree(p->left); } if(p->right){ freesimpletree(p->right); } free(p); } void free_real_tree(struct aln_tree_node* p) { int i = 0; while(p->links[i]){ free_real_tree(p->links[i]); i++; } free(p->internal_lables); free(p->links); free(p); } void free_feature_matrix(struct feature_matrix* fm) { int i; for (i = 0;i < fm->mdim;i++){ free(fm->m[i]); } free(fm->m); free(fm); } void free_utf(struct utype_ufeat* utf) { free_ft(utf->t); free_ft(utf->f); free(utf); } /*#ifndef MEMORY void* malloc(int size) { void* p; p = (void*)malloc(size); if (!p){ fprintf(stderr,"Out of memory!\n"); exit(0); } return p; } #endif*/ struct names* names_alloc(struct names* n) { int i; n = malloc(sizeof(struct names)); n->start = malloc(sizeof(int)*numseq); n->end = malloc(sizeof(int)*numseq); n->len = malloc(sizeof(int)*numseq); for (i = 0; i < numseq;i++){ n->start[i] = 0; n->end[i] = 0;//aln->lsn[i]; n->len[i] = 0; } return n; } void names_free(struct names* n) { free(n->start); free(n->end); free(n->len); free(n); } struct alignment* aln_alloc(struct alignment* aln) { int i; aln = (struct alignment*) malloc(sizeof(struct alignment)); aln->s = malloc(sizeof(int*) * (numseq )); aln->seq = malloc(sizeof(char*) * (numseq )); aln->ft = malloc(sizeof(struct feature* ) * (numseq)); aln->si = malloc(sizeof(struct sequence_information* ) * (numseq)); aln->sl = malloc(sizeof(unsigned int) * (numprofiles)); aln->sip = malloc(sizeof(unsigned int*)* numprofiles); aln->nsip = malloc(sizeof(unsigned int)* numprofiles); aln->sn = malloc(sizeof(char*) * numseq); aln->lsn = malloc(sizeof(unsigned int) * numseq); for (i =0;i < numprofiles;i++){ aln->sip[i] = 0; aln->nsip[i] = 0; aln->sl[i] = 0; } for(i =0;i < numseq;i++){ aln->lsn[i] = 0; aln->ft[i] = 0; aln->si[i] = 0; aln->sip[i] = malloc(sizeof(int)*1); aln->nsip[i] = 1; aln->sip[i][0] = i; } return aln; } void free_aln(struct alignment* aln) { int i; for (i = numseq;i--;){ free(aln->s[i]); free(aln->seq[i]); free(aln->sn[i]); } if(aln->ft){ for(i = numseq;i--;){ free_ft(aln->ft[i]); } free(aln->ft); } if(aln->si){ free(aln->si); } for (i = numprofiles;i--;){ if(aln->sip[i]){ free(aln->sip[i]); } } free(aln->seq); free(aln->s); free(aln->sn); free(aln->sl); free(aln->lsn); free(aln->sip); free(aln->nsip); free(aln); } void free_param(struct parameters* param) { free(param->infile); free(param); } void free_ft(struct feature* n) { struct feature* old_n = 0; if (n != NULL){ old_n = n; n= n ->next; free(old_n->type); free(old_n->note); free(old_n); free_ft(n); } } struct hirsch_mem* hirsch_mem_alloc(struct hirsch_mem* hm,int x) { // a=((typeof(a))(((int)(((void *)malloc(c+15))+15))&-16)). hm = (struct hirsch_mem *) malloc(sizeof(struct hirsch_mem)); hm->starta = 0; hm->startb = 0; hm->enda = 0; hm->endb = 0; hm->size = x; hm->len_a = 0; hm->len_b = 0; hm->f = malloc(sizeof(struct states)* (x+1)); hm->b = malloc(sizeof(struct states)* (x+1)); return hm; } struct hirsch_mem* hirsch_mem_realloc(struct hirsch_mem* hm,int x) { hm->starta = 0; hm->startb = 0; hm->enda = 0; hm->endb = 0; hm->len_a = 0; hm->len_b = 0; hm->size = x; hm->f = realloc(hm->f,sizeof(struct states)* (x+1)); hm->b = realloc(hm->b,sizeof(struct states)* (x+1)); return hm; } void hirsch_mem_free(struct hirsch_mem* hm) { free(hm->f); free(hm->b); free(hm); } struct dp_matrix* dp_matrix_realloc(struct dp_matrix *dp,int x,int y) { int i; if (x > y){ y = x; }else{ x =y; } if ( x > dp->x || y > dp->y){ //printf("REALLOCING:%d-%d %d-%d\n",x,y,dp->x,dp->y); i = 1; while (i <= y){ i <<= 1; // printf("i:%d y:%d\n",i,y); } y = i-1; i = 1; while (i <= x){ i <<= 1; //printf("i:%d y:%d\n",i,y); } x = i-1; //printf("NEWX:%d NEWY:%d\n",x,y); dp->s = realloc(dp->s,sizeof(struct states)* (y+1)); dp->tb = (char**) realloc (dp->tb,sizeof (char*)*(x+1)); dp->tb_mem = (void*) realloc(dp->tb_mem,sizeof(char) * (x+1) * (y+1)); dp->tb[0] = (char*) dp->tb_mem; for (i = 1; i <= x; i++){ dp->tb[i] = dp->tb[0] +(i*(y+1)); } dp->x = x; dp->y = y; } return dp; } struct dp_matrix* dp_matrix_alloc(struct dp_matrix *dp,int x,int y) { int i; dp = (struct dp_matrix *) malloc(sizeof(struct dp_matrix)); dp->x = x; dp->y = y; dp->s = malloc(sizeof(struct states)* (y+1)); dp->tb = (char**) malloc(sizeof(char*) * (x+1)); dp->tb_mem = (void *) malloc(sizeof(char) * (x+1) * (y+1)); dp->tb[0] = (char*) dp->tb_mem; for ( i = 1; i <= x;i++){ dp->tb[i] = dp->tb[0] +(i*(y+1)); } return dp; } void dp_matrix_free(struct dp_matrix *dp) { free(dp->s); free(dp->tb); free(dp->tb_mem); free(dp); } kalign2_misc.c0000644001210100001440000004150611577654214013001 0ustar olifriusers/* kalign2_misc.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include #include #include "kalign2.h" void print_tree(struct aln_tree_node* p,struct alignment* aln,char* outfile) { FILE *fout = NULL; if ((fout = fopen(outfile, "w")) == NULL){ fprintf(stderr,"can't open output\n"); exit(0); } //fprintf(stderr,"\n\n%s\n",outfile); /*if(byg_start("xml",outfile) != -1){ fprintf(fout," \n"); print_phyloxml_tree(p,aln,fout); fprintf(fout,"\n"); }else{*/ print_newick_tree(p,aln,fout); fprintf(fout,";"); //} fclose(fout); } void print_newick_tree(struct aln_tree_node* p,struct alignment* aln,FILE *fout) { int j; if(p->links[0]){ fprintf(fout,"("); print_newick_tree(p->links[0],aln,fout); } if(p->num < numseq){ //If you want to print the actual names of the sequences for (j = 0; j < aln->lsn[p->num];j++){ if(isspace((int)aln->sn[p->num][j])){ fprintf(fout,"_"); }else{ fprintf(fout,"%c",aln->sn[p->num][j]); } } //If you want to print the just the number of the sequence //fprintf(stdout,"%d",p->num); }else{ fprintf(fout,","); } if(p->links[1]){ print_newick_tree(p->links[1],aln,fout); fprintf(fout,")"); } } void print_phyloxml_tree(struct aln_tree_node* p,struct alignment* aln,FILE *fout) { int j; if(p->links[0]){ fprintf(fout,"\n"); print_phyloxml_tree(p->links[0],aln,fout); } if(p->num < numseq){ //If you want to print the actual names of the sequences fprintf(fout,"\n"); for (j = 0; j < aln->lsn[p->num];j++){ fprintf(fout,"%c",aln->sn[p->num][j]); } fprintf(fout,"\n\n"); //If you want to print the just the number of the sequence //fprintf(stdout,"%d",p->num); }else{ //fprintf(fout,","); } if(p->links[1]){ print_phyloxml_tree(p->links[1],aln,fout); fprintf(fout,"\n"); } } struct alignment* sort_sequences(struct alignment* aln,int* tree,char* sort) { int i,j,a,b,c; int choice = 0; if(sort){ if (byg_start("input",sort) != -1){ choice = 0; }else if (byg_start("tree",sort) != -1){ choice = 1; }else if (byg_start("gaps",sort) != -1){ choice = 2; }else{ choice = 3; } } //fprintf(stderr,"COICE:%d\n",choice); switch(choice){ case 0: for (i = 0; i < numseq;i++){ aln->nsip[i] = i; } break; case 1: c = 0; for (i = 0; i < (numseq-1)*3;i +=3){ //fprintf(stderr,"TREE %d %d %d\n",tree[i],tree[i+1],tree[i+2]); if(tree[i] < numseq){ aln->nsip[c] = tree[i]; c++; } if(tree[i+1] < numseq){ aln->nsip[c] = tree[i+1]; c++; } } break; case 2: for (i = 0; i < numseq;i++){ a = 1000000; b = -1; for (j =0; jnsip[j] < a){ a = aln->nsip[j]; b = j; } } tree[i] = b; aln->nsip[b] = 1000000; } for (i = 0; i < numseq;i++){ aln->nsip[i] = tree[i]; } break; case 3: aln = sort_in_relation(aln,sort); break; default: for (i = 0; i < numseq;i++){ aln->nsip[i] = i; } break; } /*for (i = 0; i < numseq;i++){ fprintf(stderr,"%d\n",aln->nsip[i]); }*/ return aln; } struct alignment* sort_in_relation(struct alignment* aln,char* sort) { int i,j,c; int target = -1; int id = 0; int positions = 0; int posa = 0; int posb = 0; for (i = 0; i < numseq;i++){ if (byg_start(sort,aln->sn[i]) != -1){ target = i; aln->sip[i][0] = 1000; break; } } if(target == -1){ target = 0; aln->sip[0][0] = 1000; } for (i = 0; i < numseq;i++){ if(i != target){ posa = 0; posb =0; c = 0; id = 0; positions = 0; for (j = 0; j < aln->sl[i];j++){ posa += aln->s[i][j]+1; while(posa > posb){ posb += aln->s[target][c]+1; c++; } if(posa == posb){ if((int) aln->seq[i][j] == (int) aln->seq[target][c-1]){ id += 1000; } positions += 1; } } if(positions){ aln->sip[i][0] = id/positions; }else{ aln->sip[i][0] = 0; } } } for (i = 0; i < numseq;i++){ aln->nsip[i] = i; } quickSort(aln, numseq); return aln; } void quickSort(struct alignment* aln, int array_size) { q_sort(aln, 0, array_size - 1); } void q_sort(struct alignment* aln, int left, int right) { int pivot, l_hold, r_hold; int pivot2; l_hold = left; r_hold = right; pivot2 = aln->nsip[left]; pivot = aln->sip[left][0];// numbers[left]; while (left < right){ while ((aln->sip[right][0] <= pivot) && (left < right)){ right--; } if (left != right){ aln->sip[left][0] = aln->sip[right][0]; aln->nsip[left] = aln->nsip[right]; left++; } while ((aln->sip[left][0] >= pivot) && (left < right)){ left++; } if (left != right){ aln->sip[right][0] = aln->sip[left][0]; aln->nsip[right] = aln->nsip[left]; right--; } } aln->sip[left][0] = pivot; aln->nsip[left] = pivot2; pivot = left; left = l_hold; right = r_hold; if (left < pivot){ q_sort(aln, left, pivot-1); } if (right > pivot){ q_sort(aln, pivot+1, right); } } int* readtree(struct aln_tree_node* p,int* tree) { if(p->links[0]){ tree = readtree(p->links[0],tree); } if(p->links[1]){ tree = readtree(p->links[1],tree); } if(p->links[0]){ if(p->links[1]){ tree[tree[0]] = p->links[0]->num; tree[tree[0]+1] = p->links[1]->num; tree[tree[0]+2] = p->num; tree[0] +=3; free(p->links[0]->internal_lables); free(p->links[0]->links); free(p->links[0]); free(p->links[1]->internal_lables); free(p->links[1]->links); free(p->links[1]); } } return tree; } struct alignment* make_dna(struct alignment* aln) { //int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; int i,j; int* p; for(i = 0;i < numseq;i++){ p = aln->s[i]; for (j = 0; j < aln->sl[i];j++){ switch(p[j]){ case 2: //C p[j] = 1; break; case 6: //G p[j] = 2; break; case 17: //T or U p[j] = 3; break; case 12: // N p[j] = 4; break; case 20: // X p[j] = 4; break; case 23://O whatever that is... p[j] = 4; break; } // printf("%d\n",p[j]); } } return aln; } float** read_matrix(float** subm,struct parameters* param) { int i,j; int m_pos = 0; short *matrix_pointer = 0; short blosum50mt[]={ 5, -2, 5, -1, -3, 13, -2, 5, -4, 8, -1, 1, -3, 2, 6, -3, -4, -2, -5, -3, 8, 0, -1, -3, -1, -3, -4, 8, -2, 0, -3, -1, 0, -1, -2, 10, -1, -4, -2, -4, -4, 0, -4, -4, 5, -1, 0, -3, -1, 1, -4, -2, 0, -3, 6, -2, -4, -2, -4, -3, 1, -4, -3, 2, -3, 5, -1, -3, -2, -4, -2, 0, -3, -1, 2, -2, 3, 7, -1, 4, -2, 2, 0, -4, 0, 1, -3, 0, -4, -2, 7, -1, -2, -4, -1, -1, -4, -2, -2, -3, -1, -4, -3, -2, 10, -1, 0, -3, 0, 2, -4, -2, 1, -3, 2, -2, 0, 0, -1, 7, -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 7, 1, 0, -1, 0, -1, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5, 0, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -3, -3, -3, -3, -2, 0, 5, -3, -5, -5, -5, -3, 1, -3, -3, -3, -3, -2, -1, -4, -4, -1, -3, -4, -3, -3, 15, -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, -1, -3, -1, -2, -3, -3, -3, -2, 4, -3, 2, -1, -2, -1, 0, -2, -3, -1, -1, -2, -2, -1, 2, -1, 8, -1, 2, -3, 1, 5, -4, -2, 0, -3, 1, -3, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5}; short blosum62mt[]={ 40, -20, 40, 0, -30, 90, -20, 40, -30, 60, -10, 10, -40, 20, 50, -20, -30, -20, -30, -30, 60, 0, -10, -30, -10, -20, -30, 60, -20, 0, -30, -10, 0, -10, -20, 80, -10, -30, -10, -30, -30, 0, -40, -30, 40, -10, 0, -30, -10, 10, -30, -20, -10, -30, 50, -10, -40, -10, -40, -30, 0, -40, -30, 20, -20, 40, -10, -30, -10, -30, -20, 0, -30, -20, 10, -10, 20, 50, -20, 30, -30, 10, 0, -30, 0, 10, -30, 0, -30, -20, 60, -10, -20, -30, -10, -10, -40, -20, -20, -30, -10, -30, -20, -20, 70, -10, 0, -30, 0, 20, -30, -20, 0, -30, 10, -20, 0, 0, -10, 50, -10, -10, -30, -20, 0, -30, -20, 0, -30, 20, -20, -10, 0, -20, 10, 50, 10, 0, -10, 0, 0, -20, 0, -10, -20, 0, -20, -10, 10, -10, 0, -10, 40, 0, -10, -10, -10, -10, -20, -20, -20, -10, -10, -10, -10, 0, -10, -10, -10, 10, 50, 0, -30, -10, -30, -20, -10, -30, -30, 30, -20, 10, 10, -30, -20, -20, -30, -20, 0, 40, -30, -40, -20, -40, -30, 10, -20, -20, -30, -30, -20, -10, -40, -40, -20, -30, -30, -20, -30, 110, 0, -10, -20, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -20, -10, -10, 0, 0, -10, -20, -10, -20, -30, -20, -30, -20, 30, -30, 20, -10, -20, -10, -10, -20, -30, -10, -20, -20, -20, -10, 20, -10, 70, -10, 10, -30, 10, 40, -30, -20, 0, -30, 10, -30, -10, 0, -10, 30, 0, 0, -10, -20, -30, -10, -20, 40}; short gon250mt[]={ 24, 0, 0, 5, 0, 115, -3, 0, -32, 47, 0, 0, -30, 27, 36, -23, 0, -8, -45, -39, 70, 5, 0, -20, 1, -8, -52, 66, -8, 0, -13, 4, 4, -1, -14, 60, -8, 0, -11, -38, -27, 10, -45, -22, 40, -4, 0, -28, 5, 12, -33, -11, 6, -21, 32, -12, 0, -15, -40, -28, 20, -44, -19, 28, -21, 40, -7, 0, -9, -30, -20, 16, -35, -13, 25, -14, 28, 43, -3, 0, -18, 22, 9, -31, 4, 12, -28, 8, -30, -22, 38, 3, 0, -31, -7, -5, -38, -16, -11, -26, -6, -23, -24, -9, 76, -2, 0, -24, 9, 17, -26, -10, 12, -19, 15, -16, -10, 7, -2, 27, -6, 0, -22, -3, 4, -32, -10, 6, -24, 27, -22, -17, 3, -9, 15, 47, 11, 0, 1, 5, 2, -28, 4, -2, -18, 1, -21, -14, 9, 4, 2, -2, 22, 6, 0, -5, 0, -1, -22, -11, -3, -6, 1, -13, -6, 5, 1, 0, -2, 15, 25, 1, 0, 0, -29, -19, 1, -33, -20, 31, -17, 18, 16, -22, -18, -15, -20, -10, 0, 34, -36, 0, -10, -52, -43, 36, -40, -8, -18, -35, -7, -10, -36, -50, -27, -16, -33, -35, -26, 142, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -22, 0, -5, -28, -27, 51, -40, 22, -7, -21, 0, -2, -14, -31, -17, -18, -19, -19, -11, 41, 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; if(param->sub_matrix){ if(byg_start(param->sub_matrix,"blosum62BLOSUM62") != -1){ matrix_pointer = blosum62mt; //m_pos = 0; //for (i = 0;i < 23;i++){ // for (j = 0;j <= i;j++){ // matrix_pointer[m_pos] = matrix_pointer[m_pos] * 10; // m_pos++; // } //} gpo = 55; gpe = 8; tgpe = 1; } if(byg_start(param->sub_matrix,"blosum50BLOSUM50") != -1){ matrix_pointer = blosum50mt; m_pos = 0; for (i = 0;i < 23;i++){ for (j = 0;j <= i;j++){ matrix_pointer[m_pos] = matrix_pointer[m_pos] * 10; m_pos++; } } gpo = 55; gpe = 8; tgpe = 1; } //vogt.... }else{ if(!param->dna){ // gpo:5.494941 gpe:0.852492 tgpe:0.442410 bonus: 3.408872 z-cutoff: 58.823309 -> 0.829257 accuracy on bb3 gpo = 54.94941; gpe = 8.52492; tgpe = 4.42410; //gpo = 54; //gpe = 8; //tgpe = 4; //-gpo 10.9898 -gpe 0.852492 -tgpe 0.442410 -bonus 0.2 -zcutoff 58.823309 // param->secret = 0.2; matrix_pointer = gon250mt; }else{ //gpo = 400; // gpe = 30; //tgpe = 30; //param->gpo = 43.4; //param->gpe = 3.94; //param->tgpe = 29.26; //gpo = 43.4 *5; gpo = 217; gpe = 39.4; tgpe = 292.6; //param->secret = 28.3; param->zlevel = 61.08; param->internal_gap_weight = 49.14; } } if(param->gpo!= -1){ //param->gpo *= 5; gpo = param->gpo; } if(param->gpe != -1){ //param->gpe *= 10; gpe = param->gpe; } if(param->tgpe != -1){ //param->tgpe *= 10; tgpe = param->tgpe; } // if(param->secret != -1){ // //param->secret *= 10; // }else{ if(param->secret == -1){ if(!param->dna){ param->secret = 0.2; }else{ param->secret = 283.0; } } //fprintf(stderr,"%d %d %d %d\n",gpo,gpe,tgpe, (int)param->secret); subm = malloc(sizeof (float*) * 32); for (i = 32;i--;){ subm[i] = malloc(sizeof(float) * 32); for (j = 32;j--;){ subm[i][j] = param->secret;//0;//gpe << 1;//-5;// better on Balibase } } if(param->dna){ /*subm[0][0] += 10; subm[0][1] += 6; subm[1][0] += 6; subm[1][1] += 10; subm[2][2] += 10; subm[2][3] += 6; subm[3][2] += 6; subm[3][3] += 10;*/ // A C G T . N // A 91 -114 -31 -123 0 -43 subm[0][0] += 91; subm[0][1] += -114; subm[0][2] += -31; subm[0][3] += -123; // C -114 100 -125 -31 0 -43 subm[1][0] += -114; subm[1][1] += 100; subm[1][2] += -125; subm[1][3] += -31; // G -31 -125 100 -114 0 -43 subm[2][0] += -31; subm[2][1] += -125; subm[2][2] += 100; subm[2][3] += -114; // T -123 -31 -114 91 0 -43 subm[3][0] += -123; subm[3][1] += -31; subm[3][2] += -114; subm[3][3] += 91; // . 0 0 0 0 0 0 // N -43 -43 -43 -43 0 -43 /*for (i = 0; i < 4;i++){ for (j = 0;j < 4;j++){ if(i == j){ subm[i][j] += 1; }else{ subm[i][j] -= 3; } } }*/ }else{ m_pos = 0; for (i = 0;i < 23;i++){ for (j = 0;j <= i;j++){ if (i == j){ // subm[i][j] += blosum62mt[m_pos]*10; subm[i][j] += matrix_pointer[m_pos]; }else{ // subm[i][j] += blosum62mt[m_pos]*10; // subm[j][i] += blosum62mt[m_pos]*10; subm[i][j] += matrix_pointer[m_pos]; subm[j][i] += matrix_pointer[m_pos]; } m_pos++; } } /*for (i = 0; i < 23;i++){ for (j = 0; j < 23;j++){ fprintf(stderr,"%d ",subm[i][j]); } fprintf(stderr,"\n"); } fprintf(stderr,"\n");*/ } return subm; } struct alignment* make_seq(struct alignment* aln,int a,int b,int* path) { int c; int i; int posa = 0; int posb = 0; int* gap_a = 0; int* gap_b = 0; gap_a = malloc ((path[0]+1)*sizeof(int)); gap_b = malloc ((path[0]+1)*sizeof(int)); for (i = path[0]+1;i--;){ gap_a[i] = 0; gap_b[i] = 0; } c = 1; while(path[c] != 3){ if (!path[c]){ posa++; posb++; } if (path[c] & 1){ gap_a[posa] += 1; posb++; } if (path[c] & 2){ gap_b[posb] += 1; posa++; } c++; } for (i = aln->nsip[a];i--;){ update_gaps(aln->sl[aln->sip[a][i]],aln->s[aln->sip[a][i]],path[0],gap_a); } for (i = aln->nsip[b];i--;){ update_gaps(aln->sl[aln->sip[b][i]],aln->s[aln->sip[b][i]],path[0],gap_b); } free(gap_a); free(gap_b); free(path); return aln; } void update_gaps(int old_len,int*gis,int new_len,int *newgaps) { unsigned int i,j; int add = 0; int rel_pos = 0; for (i = 0; i <= old_len;i++){ add = 0; for (j = rel_pos;j <= rel_pos + gis[i];j++){ if (newgaps[j] != 0){ add += newgaps[j]; } } rel_pos += gis[i]+1; gis[i] += add; } } int* mirror_path(int* path) { int c = 1; while(path[c] != 3){ if (path[c] & 1){ path[c] += 1; }else if (path[c] & 2){ path[c] -= 1; } c++; } return path; } struct node* insert(struct node *n, int pos) { if (n == NULL){ n = (struct node*) malloc(sizeof(struct node)); n->next = 0; n->pos = pos; }else{ n->next = insert(n->next,pos); } return n; } struct bignode* big_insert_hash(struct bignode *n,const unsigned int pos) { struct bignode* p = 0; if(n){ if(n->num < NODESIZE){ n->pos[n->num] = pos; n->num++; return n; }else{ p = (struct bignode*) malloc(sizeof(struct bignode)); p->pos[0] = pos; p->num = 1; p->next = n; } }else{ p = (struct bignode*) malloc(sizeof(struct bignode)); p->pos[0] = pos; p->num = 1; p->next = n; } return p; } void big_remove_nodes(struct bignode *n) { struct bignode* p; while(n){ p = n; n = n->next; free(p); } } void big_print_nodes(struct bignode *n) { int i; while(n){ for (i = 0; i < n->num;i++){ fprintf(stderr,"%d ",n->pos[i]); } n = n->next; } } struct node* insert_hash(struct node *n, int pos) { struct node* p; p = (struct node*) malloc(sizeof(struct node)); p->pos = pos; p->next = n; return p; } void remove_nodes(struct node *n) { struct node* p; while(n){ p = n; n = n->next; free(p); } } kalign2_output.c0000644001210100001440000003172611577654704013415 0ustar olifriusers/* kalign2_output.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_output.h" void output(struct alignment* aln,struct parameters* param) { if(!param->format){ fasta_output(aln,param->outfile); }else{ if (byg_start(param->format,"alnALNclustalCLUSTALclustalwCLUSTALWclustalWClustalW") != -1){ aln_output(aln,param); }else if (byg_start(param->format,"msfMSFgcgGCGpileupPILEUP") != -1){ msf_output(aln,param->outfile); }else if (byg_start(param->format,"eclu") != -1){ clustal_output(aln,param->outfile); }else if (byg_start("macsim",param->format) != -1){ macsim_output(aln,param->outfile,param->infile[0]); }else{ fasta_output(aln,param->outfile); } } free_param(param); } void macsim_output(struct alignment* aln,char* outfile,char* infile) { int i,j,f; int tmp; struct feature *fn = 0; FILE *fout = NULL; if(outfile){ if ((fout = fopen(outfile, "w")) == NULL){ fprintf(stderr,"can't open output\n"); exit(0); } }else{ fout = stdout; } fprintf(fout,"\n\n\n\n"); if(infile){ fprintf(fout,"%s.kalign",infile); }else{ fprintf(fout,"kalign alignment"); } fprintf(fout,"\n"); for (i =0;i< numseq;i++){ //c = aln->sl[i]; f = aln->nsip[i]; fprintf(fout,"\n"); fprintf(fout,""); for (j =0; j < aln->lsn[f];j++){ if(!iscntrl((int)aln->sn[f][j])){ fprintf(fout,"%c",aln->sn[f][j]); } } fprintf(fout,""); fprintf(fout,"\n"); fprintf(fout,"1aab_\n"); fprintf(fout,"1aab_\n"); fprintf(fout,"0.0.0.0\n"); fprintf(fout,"0\n"); if(aln->ft){ if(aln->ft[f]){ fprintf(fout,"\n"); fn = aln->ft[f]; while(fn){ fprintf(fout,"%s%d%d%s\n",fn->type,fn->start,fn->end,fn->note); fn = fn->next; } fprintf(fout,"\n\n"); } } fprintf(fout,"\n"); for (j = 0; j < aln->sl[f];j++){ tmp = aln->s[f][j]; while (tmp){ fprintf(fout,"-"); tmp--; } fprintf(fout,"%c",aln->seq[f][j]); } tmp =aln->s[f][aln->sl[f]]; while (tmp){ fprintf(fout,"-"); tmp--; } fprintf(fout,"\n"); fprintf(fout,"\n"); fprintf(fout,"\n"); } fprintf(fout,"\n"); fprintf(fout,"\n"); if(outfile){ fclose(fout); } free_aln(aln); } void msf_output(struct alignment* aln,char* outfile) { int i,j,c,f,g; int max = 0; int aln_len = 0; int tmp; char** linear_seq = 0; FILE *fout = NULL; linear_seq = malloc(sizeof(char*)*numseq); aln_len = 0; for (j = 0; j <= aln->sl[0];j++){ aln_len+= aln->s[0][j]; } aln_len += aln->sl[0]; for (i =0;i< numseq;i++){ linear_seq[i] = malloc(sizeof(char)*(aln_len+1)); c = 0; for (j = 0; j < aln->sl[i];j++){ tmp = aln->s[i][j]; while (tmp){ linear_seq[i][c] ='-'; c++; tmp--; } linear_seq[i][c] = aln->seq[i][j]; c++; } tmp =aln->s[i][aln->sl[i]]; while (tmp){ linear_seq[i][c] ='-'; c++; tmp--; } linear_seq[i][c] = 0; } if(outfile){ if ((fout = fopen(outfile, "w")) == NULL){ fprintf(stderr,"can't open output\n"); exit(0); } }else{ fout= stdout; } fprintf(fout,"PileUp\n\n\n\n MSF: %d Type: P Check: 7038 ..\n\n",aln_len); for (j = 0; j< numseq;j++){ if( aln->lsn[j] > max){ max = aln->lsn[j]; } } for (i = 0; i< numseq;i++){ f = aln->nsip[i]; fprintf(fout," Name: "); for (c = 0; c < aln->lsn[f];c++){ if(!iscntrl((int)aln->sn[f][c])){ fprintf(fout,"%c",aln->sn[f][c]); } } while(c < max+3){ fprintf(fout," "); c++; } fprintf(fout,"Len: "); fprintf(fout,"%d",aln_len); fprintf(fout," Check: 2349 Weight: 1.00\n"); } fprintf(fout,"\n\n//\n\n"); for (i = 0; i+60 < aln_len;i +=60){ for (j = 0; j< numseq;j++){ f = aln->nsip[j]; for (c = 0; c < aln->lsn[f];c++){ if(!iscntrl((int)aln->sn[f][c])){ fprintf(fout,"%c",aln->sn[f][c]); } } while(c < max+3){ fprintf(fout," "); c++; } g = 1; for (c = 0; c < 60;c++){ fprintf(fout,"%c",linear_seq[f][c+i]); if (g == 10){ fprintf(fout," "); g = 0; } g++; } fprintf(fout,"\n"); } fprintf(fout,"\n\n"); } for (j = 0; j< numseq;j++){ f = aln->nsip[j]; for (c = 0; c< aln->lsn[f];c++){ if(!iscntrl((int)aln->sn[f][c])){ fprintf(fout,"%c",aln->sn[f][c]); } } while(c < max+3){ fprintf(fout," "); c++; } g = 1; for (c = i; c< aln_len;c++){ fprintf(fout,"%c",linear_seq[f][c]); if (g == 10){ fprintf(fout," "); g = 0; } g++; } fprintf(fout,"\n"); } fprintf(fout,"\n\n"); if(outfile){ fclose(fout); } for (i =0;i< numseq;i++){ free(linear_seq[i]); } free(linear_seq); free_aln(aln); } void clustal_output(struct alignment* aln,char* outfile) { int i,j,c,f; int tmp; int aln_len = 0; char** linear_seq = 0; FILE* fout = NULL; linear_seq = malloc(sizeof(char*)*numseq); aln_len = 0; for (j = 0; j <= aln->sl[0];j++){ aln_len+= aln->s[0][j]; } aln_len += aln->sl[0]; for (i =0;i< numseq;i++){ linear_seq[i] = malloc(sizeof(char)*(aln_len+1)); c = 0; for (j = 0; j < aln->sl[i];j++){ tmp = aln->s[i][j]; while (tmp){ linear_seq[i][c] ='-'; c++; tmp--; } linear_seq[i][c] = aln->seq[i][j]; c++; } tmp =aln->s[i][aln->sl[i]]; while (tmp){ linear_seq[i][c] ='-'; c++; tmp--; } linear_seq[i][c] = 0; } if(outfile){ if ((fout = fopen(outfile, "w")) == NULL){ fprintf(stderr,"can't open output\n"); exit(0); } }else{ fout = stdout; } fprintf(fout,"Kalign (2.0) alignment in ClustalW format\n\n\n"); for (i = 0; i+60 < aln_len;i +=60){ for (j = 0; j< numseq;j++){ f = aln->nsip[j]; for (c = 0; c < aln->lsn[f];c++){ if(!iscntrl((int)aln->sn[f][c])){ fprintf(fout,"%c",aln->sn[f][c]); } } while(c < 18){ fprintf(fout," "); c++; } for (c = 0; c < 60;c++){ fprintf(fout,"%c",linear_seq[f][c+i]); } fprintf(fout,"\n"); } fprintf(fout,"\n\n"); } for (j = 0; j< numseq;j++){ f = aln->nsip[j]; for (c = 0; c< aln->lsn[f];c++){ if(!iscntrl((int)aln->sn[f][c])){ fprintf(fout,"%c",aln->sn[f][c]); } } while(c < 18){ fprintf(fout," "); c++; } for (c = i; c< aln_len;c++){ fprintf(fout,"%c",linear_seq[f][c]); } fprintf(fout,"\n"); } fprintf(fout,"\n\n"); if(outfile){ fclose(fout); } for (i =0;i< numseq;i++){ free(linear_seq[i]); } free(linear_seq); free_aln(aln); } void aln_output(struct alignment* aln,struct parameters* param) { char* outfile = param->outfile; int i,j,c,f; int tmp; int aln_len = 0; //int namestart = 0; int max_name_len = 20; int tmp_len = 0; char** linear_seq = 0; struct names* n; n = get_meaningful_names(aln,param->id); //namestart = get_meaningful_names(aln,param->id); c = -1; for (i = 0; i< numseq;i++){ if(n->len[i] > c){ c = n->len[i]; } /*f = 0; for (j = namestart;j < aln->lsn[i];j++){ if(isspace((int)aln->sn[i][j])){ break; } f++; } if(f > c){ c = f; } }*/ } if(c < max_name_len){ max_name_len = c;//this is know the maximum length of a unique name isdjgbv skj } FILE* fout = NULL; linear_seq = malloc(sizeof(char*)*numseq); aln_len = 0; for (j = 0; j <= aln->sl[0];j++){ aln_len+= aln->s[0][j]; } aln_len += aln->sl[0]; for (i =0;i< numseq;i++){ linear_seq[i] = malloc(sizeof(char)*(aln_len+1)); c = 0; for (j = 0; j < aln->sl[i];j++){ tmp = aln->s[i][j]; while (tmp){ linear_seq[i][c] ='-'; c++; tmp--; } linear_seq[i][c] = aln->seq[i][j]; c++; } tmp =aln->s[i][aln->sl[i]]; while (tmp){ linear_seq[i][c] ='-'; c++; tmp--; } linear_seq[i][c] = 0; } if(outfile){ if ((fout = fopen(outfile, "w")) == NULL){ fprintf(stderr,"can't open output\n"); exit(0); } }else{ fout = stdout; } fprintf(fout,"Kalign (2.0) alignment in ClustalW format\n\n\n"); for (i = 0; i+60 < aln_len;i +=60){ for (j = 0; j< numseq;j++){ f = aln->nsip[j]; tmp_len = (max_name_len < n->len[f]) ? max_name_len:n->len[f]; for (c = 0; c < tmp_len;c++){ if(isspace((int)aln->sn[f][c+n->start[f]])){ break; } if(!iscntrl((int)aln->sn[f][c+n->start[f]])){ fprintf(fout,"%c",aln->sn[f][c+n->start[f]]); } } while(c < max_name_len+5){ fprintf(fout," "); c++; } for (c = 0; c < 60;c++){ fprintf(fout,"%c",linear_seq[f][c+i]); } fprintf(fout,"\n"); } fprintf(fout,"\n\n"); } for (j = 0; j< numseq;j++){ f = aln->nsip[j]; tmp_len = (max_name_len < n->len[f]) ? max_name_len:n->len[f]; for (c = 0; c< tmp_len;c++){ if(isspace((int)aln->sn[f][c+n->start[f]])){ break; } if(!iscntrl((int)aln->sn[f][c+n->start[f]])){ fprintf(fout,"%c",aln->sn[f][c+n->start[f]]); } } while(c < max_name_len + 5){ fprintf(fout," "); c++; } for (c = i; c < aln_len;c++){ fprintf(fout,"%c",linear_seq[f][c]); } fprintf(fout,"\n"); } fprintf(fout,"\n\n"); if(outfile){ fclose(fout); } names_free(n); for (i =0;i< numseq;i++){ free(linear_seq[i]); } free(linear_seq); free_aln(aln); } struct names* get_meaningful_names(struct alignment* aln,int id) { struct names* n = 0; int i,j,c; int min_len = 0; int start = 0; int globalstart = 1000000; n = names_alloc(n); for (i = 0; i < numseq;i++){ n->end[i] = aln->lsn[i]; } if (id == -1){ for(i =0; i < numseq-1;i++){ for (j = i+1; j < numseq;j++){ min_len = (aln->lsn[i] < aln->lsn[j])? aln->lsn[i] : aln->lsn[j]; start = 0; for (c = 0; c < min_len;c++){ if(isalnum((int)aln->sn[i][c]) && isalnum((int)aln->sn[j][c])){ if( aln->sn[i][c] != aln->sn[j][c]){ break; } }else{ if(aln->sn[i][c] == aln->sn[j][c]){ if(aln->sn[i][c] != '_' && aln->sn[i][c] != '-'){ start = c+1; } }else{ break; } } } //fprintf(stderr,"%s\n%s\nstart: %d\n\n",aln->sn[i],aln->sn[j],start); if (start < globalstart){ globalstart = start; } } } for (i = 0; i < numseq;i++){ n->start[i] = globalstart; for (j = n->start[i]; j < aln->lsn[i];j++){ if(!isalnum((int)aln->sn[i][j]) && aln->sn[i][j] != '_' && aln->sn[i][j] != '-'){ n->end[i] = j; break; } } } }else{ for(i =0; i < numseq;i++){ start = 0; min_len = 0; for (j = 0; j < aln->lsn[i];j++){ if((isalnum((int)aln->sn[i][j]) || aln->sn[i][j] == '_' || aln->sn[i][j] == '-')&& start == 0 ){ n->start[i] = j; min_len++; start = 1; }else if ((!isalnum((int)aln->sn[i][j]) && aln->sn[i][j] != '_' && aln->sn[i][j] != '-')&& start == 1) { if(id == min_len){ n->end[i] = j; break; } start = 0; } } if(id > min_len){ fprintf(stderr,"Warning: sequence %d has no %dth word in the identifier line:\n%s\n",i,id,aln->sn[i]); n->start[i] = 0; } } } for (i = 0; i < numseq;i++){ //fprintf(stderr,"%s\n%d-%d\n",aln->sn[i],n->start[i],n->end[i]); n->len[i] = n->end[i] - n->start[i]; } return n; } void fasta_output(struct alignment* aln,char* outfile) { int i,j,c,f; int tmp; FILE *fout = NULL; if(outfile){ if ((fout = fopen(outfile, "w")) == NULL){ fprintf(stderr,"can't open output\n"); exit(0); } }else{ fout = stdout; } for (i = 0; i < numseq;i++){ f = aln->nsip[i]; fprintf(fout,">%s\n",aln->sn[f]); c = 0; for (j = 0; j < aln->sl[f];j++){ tmp = aln->s[f][j]; while (tmp){ fprintf(fout,"-"); c++; if(c == 60 && j != aln->sl[f]-1){ fprintf(fout,"\n"); c = 0; } tmp--; } fprintf(fout,"%c",aln->seq[f][j]); c++; if(c == 60 && j != aln->sl[f]-1){ fprintf(fout,"\n"); c = 0; } } tmp = aln->s[f][aln->sl[f]]; while (tmp){ fprintf(fout,"-"); c++; if(c == 60 && j != aln->sl[f]-1){ fprintf(fout,"\n"); c = 0; } tmp--; } fprintf(fout,"\n"); } if(outfile){ fclose(fout); } free_aln(aln); } kalign2_output.h0000644001210100001440000000237411577654214013413 0ustar olifriusers/* kalign2_output.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include void aln_output(struct alignment* aln,struct parameters* param); void msf_output(struct alignment* aln,char* outfile); void fasta_output(struct alignment* aln,char* outfile); void clustal_output(struct alignment* aln,char* outfile); void macsim_output(struct alignment* aln,char* outfile,char* infile); struct names* get_meaningful_names(struct alignment* aln,int id); kalign2_profile_alignment.c0000644001210100001440000003112211577654215015536 0ustar olifriusers/* kalign2_profile_alignment.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" #include "kalign2_profile_alignment.h" #include "kalign2_hirschberg.h" void profile_alignment_main(struct alignment* aln,struct parameters* param,float** submatrix) { float** dm = 0; int* tree = 0; struct aln_tree_node* tree2 = 0; int i,j; int tmp_numseq; int tmp_numprofiles; local_numseq = 0; local_numprofiles = 0; //determine number of profiles that were inputted.... while(aln->sl[local_numseq+numseq]){ local_numseq++; } local_numprofiles = (local_numseq << 1) - 1; //fprintf(stderr,"%d %d\n",local_numseq,local_numprofiles); for (i = 0;i < numseq;i++){ // fprintf(stderr,"%d %d %d\n",i,aln->s[i][0],aln->s[i][1]); aln->s[i] = assign_gap_codes(aln->s[i],aln->sl[i]); } if(param->dna == 1){ if(byg_start(param->tree,"njNJ") != -1){ dm = dna_profile_distance(aln,dm,param,1); }else{ dm = dna_profile_distance(aln,dm,param,0); } }else{ if(byg_start(param->tree,"njNJ") != -1){ dm = protein_profile_wu_distance(aln,dm,param,1); }else{ dm = protein_profile_wu_distance(aln,dm,param,0); } } /*for ( i=0; i < local_numseq;i++){ for (j = 0;j < local_numseq;j++){ fprintf(stderr,"%f ",dm[i][j]); } fprintf(stderr,"\n"); }*/ tmp_numseq = numseq; tmp_numprofiles = numprofiles; numseq = local_numseq; numprofiles = local_numprofiles; if(byg_start(param->tree,"njNJ") != -1){ tree2 = real_nj(dm,param->ntree); }else{ tree2 = real_upgma(dm,param->ntree); } //WAs here need too add tree2 -> treee..... tree = malloc(sizeof(int)*(numseq*3+1)); for ( i = 1; i < (numseq*3)+1;i++){ tree[i] = 0; } tree[0] = 1; tree = readtree(tree2,tree); for (i = 0; i < (numseq*3);i++){ tree[i] = tree[i+1]+ tmp_numseq; } //exit(0); numseq = tmp_numseq; numprofiles = tmp_numprofiles; int** map = 0; map = hirschberg_profile_alignment(aln,tree,submatrix, map); //clear up sequence array to be reused as gap array.... int *p = 0; for (i = 0; i < numseq;i++){ p = aln->s[i]; for (j = 0; j < aln->sl[i];j++){ p[j] = 0; } } //clear up int a,b,c; for (i = 0; i < (local_numseq-1)*3;i +=3){ a = tree[i]; b = tree[i+1]; c = tree[i+2]; aln = make_seq(aln,a,b,map[c]); } for (i = 0; i < numseq;i++){ aln->nsip[i] = 0; } aln = sort_sequences(aln,tree,param->sort); //for (i = 0; i < numseq;i++){ // fprintf(stderr,"%d %d %d\n",i,aln->nsip[i],aln->sip[i][0]); //} output(aln,param); free(tree2->links); free(tree2->internal_lables); free(tree2); free(map); free(tree); exit(0); } int** hirschberg_profile_alignment(struct alignment* aln,int* tree,float**submatrix, int** map) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; float** profile = 0; profile = malloc(sizeof(float*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (local_numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)local_numseq * 100); //fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i)/(float)local_numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ map[c][j] = -1; } if (a < numseq+local_numseq){ profile[a] = make_profile_from_alignment(profile[a],a,aln,submatrix); } if (b < numseq+local_numseq){ profile[b] = make_profile_from_alignment(profile[b],b,aln,submatrix); } set_gap_penalties(profile[b],len_b,aln->nsip[a],0,aln->nsip[b]); set_gap_penalties(profile[a],len_a,aln->nsip[b],0,aln->nsip[a]); hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0; hm->f[0].ga = -FLOATINFTY; hm->f[0].gb = -FLOATINFTY; hm->b[0].a = 0; hm->b[0].ga = -FLOATINFTY; hm->b[0].gb = -FLOATINFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(len_a < len_b){ map[c] = hirsch_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = hirsch_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != local_numseq-2){ //fprintf(stderr,"updating....\n"); profile[c] = malloc(sizeof(float)*64*(map[c][0]+2)); profile[c] = update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } int* assign_gap_codes(int* seq,int len) { int i; if(seq[0] < 0 && seq[1] < 0){ seq[0] = -2; } for(i = 1; i < len;i++){ if(seq[i-1] < 0 && seq[i] < 0){ seq[i] = -2; } if(seq[i-1] < 0 && seq[i] >= 0){ seq[i-1] = -1; } } i = 0; while(seq[i] < 0){ if(seq[i] == -2){ seq[i] = -3; } i++; } i = len-1; while(seq[i] < 0){ if(seq[i] == -2){ seq[i] = -3; } i--; } return seq; } float* make_profile_from_alignment(float* prof, int num,struct alignment* aln,float** subm) { int i,j,c; int a; int len = aln->sl[num]; int* seq = 0; prof = malloc(sizeof(float)*(len+2)*64); for ( i = 0; i < (len+2)*64;i++){ prof[i] = 0; } for ( a = 0; a < aln->nsip[num];a++){ // fprintf(stderr,"SEQ:%d\n",a); seq = aln->s[aln->sip[num][a]]; prof += (64 *(len+1)); prof[23+32] -= gpo; prof[24+32] -= gpe; prof[25+32] -= tgpe; i = len; while(i--){ prof -= 64; c = seq[i]; if(c >= 0){ // if(i == 0){ // fprintf(stderr,"%d \n",c); // } prof[c] += 1; prof += 32; for(j = 23;j--;){ prof[j] += subm[c][j]; } prof[23] -= gpo; prof[24] -= gpe; prof[25] -= tgpe; prof -= 32; }else if(c == -1){ prof[23] += 1; for (j = 32; j < 55;j++){ prof[j] -= gpo; } }else if(c == -2){ prof[24] += 1; for (j = 32; j < 55;j++){ prof[j] -= gpe; } }else if(c == -3){ prof[25] += 1; for (j = 32; j < 55;j++){ prof[j] -= tgpe; } } } prof -= 64; prof[23+32] -= gpo; prof[24+32] -= gpe; prof[25+32] -= tgpe; } return prof; } float** protein_profile_wu_distance(struct alignment* aln,float** dm,struct parameters* param, int nj) { struct bignode* hash[1024]; int*p =0; int i,j,m,n,a,b; unsigned int hv; int** local_seq = 0; int* local_sl = 0; local_seq = malloc(sizeof(int*)*numseq); local_sl = malloc(sizeof(int)*numseq); for(i = 0; i< numseq;i++){ local_seq[i] = malloc(sizeof(int)*aln->sl[i]); a = 0; p = aln->s[i]; for (j = 0;j < aln->sl[i];j++){ if(p[j] >= 0){ local_seq[i][a] = p[j]; a++; } } local_sl[i] = a; } //determine number of profiles that were inputted.... for (i = 0;i < 1024;i++){ hash[i] = 0; } if (nj){ dm = malloc (sizeof(float*)*local_numprofiles); for (i = local_numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(local_numprofiles)); for (j = local_numprofiles;j--;){ dm[i][j] = 0.0f; } } }else{ dm = malloc (sizeof(float*)*local_numseq); for (i = local_numseq;i--;){ dm[i] = malloc (sizeof (float)*(local_numseq)); for (j = local_numseq;j--;){ dm[i][j] = 0.0f; } } } fprintf(stderr,"Distance Calculation:\n"); b = (numseq*(numseq-1))/2; a = 1; //fprintf(stderr,"%d %d %8.0f\n",a,b,(float)a /(float)b * 100); for (i = 0; i < numseq-1;i++){ m = is_member(aln,i); p = local_seq[i]; //p = malloc(sizeof(int) for (j = local_sl[i]-2;j--;){ // hv = (p[j+1] << 5) + p[j+2]; // hash[hv] = insert_hash(hash[hv],j); hv = (p[j] << 5) + p[j+1]; hash[hv] = big_insert_hash(hash[hv],j); hv = (p[j] << 5) + p[j+2]; hash[hv] = big_insert_hash(hash[hv],j); // hv = (si->s[i][j+1] << 5) +t2; // hash[hv] = insert_hash(hash[hv],j); } for (j = i+1; j < numseq;j++){ n = is_member(aln,j); if(n != m){ //fprintf(stderr,"%d %d\n",n,m); p = local_seq[j]; dm[m][n] += protein_wu_distance_calculation(hash,p,local_sl[j],local_sl[j]+local_sl[i],param->zlevel); //fprintf(stderr,"%d-%d(%d %d):%f\n",m,n,i,j,dm[m][n]); //exit(0); //dm[i][j] /= min; dm[n][m] = dm[m][n]; } fprintf(stderr,"\r%8.0f percent done",(float)a /(float)b * 100); a++; } for (j = 1024;j--;){ if (hash[j]){ big_remove_nodes(hash[j]); hash[j] = 0; } } } for(i = 0; i< numseq;i++){ free(local_seq[i]); } free(local_seq); free(local_sl); return dm; } float** dna_profile_distance(struct alignment* aln,float** dm,struct parameters* param, int nj) { struct bignode* hash[1024]; int *p = 0; int i,j,a,b,m,n; unsigned int hv; int** local_seq = 0; int* local_sl = 0; local_seq = malloc(sizeof(int*)*numseq); local_sl = malloc(sizeof(int)*numseq); for(i = 0; i< numseq;i++){ local_seq[i] = malloc(sizeof(int)*aln->sl[i]); a = 0; p = aln->s[i]; for (j = 0;j < aln->sl[i];j++){ if(p[j] >= 0){ local_seq[i][a] = p[j]; a++; } } local_sl[i] = a; } fprintf(stderr,"Distance Calculation:\n"); for (i = 0;i < 1024;i++){ hash[i] = 0; } if (nj){ dm = malloc (sizeof(float*)*local_numprofiles); for (i = local_numprofiles;i--;){ dm[i] = malloc (sizeof (float)*(local_numprofiles)); for (j = local_numprofiles;j--;){ dm[i][j] = 0.0f; } } }else{ dm = malloc (sizeof(float*)*local_numseq); for (i = local_numseq;i--;){ dm[i] = malloc (sizeof (float)*(local_numseq)); for (j = local_numseq;j--;){ dm[i][j] = 0.0f; } } } b = (numseq*(numseq-1))/2; a = 1; for (i = 0; i < numseq-1;i++){ m = is_member(aln,i); p = local_seq[i]; for (j = local_sl[i]-5;j--;){ hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+2]&3)<<4) + ((p[j+3]&3)<<2) + (p[j+4]&3);//ABCDE hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+2]&3)<<4) + ((p[j+3]&3)<<2) + (p[j+5]&3);//ABCDF hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+2]&3)<<4) + ((p[j+4]&3)<<2) + (p[j+5]&3);//ABCEF hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+1]&3)<<6) + ((p[j+3]&3)<<4) + ((p[j+4]&3)<<2) + (p[j+5]&3);//ABDEF hash[hv] = big_insert_hash(hash[hv],j); hv = ((p[j]&3)<<8) + ((p[j+2]&3)<<6) + ((p[j+3]&3)<<4) + ((p[j+4]&3)<<2) + (p[j+5]&3);//ACDEF hash[hv] = big_insert_hash(hash[hv],j); } for (j = i+1; j < numseq;j++){ n = is_member(aln,j); if(n != m){ p = local_seq[j]; //min = (si->sl[i] > si->sl[j]) ?si->sl[j] :si->sl[i]; dm[m][n] += dna_distance_calculation(hash,p,local_sl[j],local_sl[j]+local_sl[i],param->zlevel); //dm[i][j] /= (aln->sl[i] > aln->sl[j]) ? aln->sl[j] : aln->sl[i]; dm[n][m] = dm[m][n]; } fprintf(stderr,"\r%8.0f percent done",(float)a /(float)b * 100); a++; } for (j = 1024;j--;){ if (hash[j]){ big_remove_nodes(hash[j]); hash[j] = 0; } } } for(i = 0; i< numseq;i++){ free(local_seq[i]); } free(local_seq); free(local_sl); return dm; } int is_member(struct alignment* aln,int test) { int i,j; for (i = numseq;insip[i];j++){ if(aln->sip[i][j] == test){ return i-numseq; } } } return -1; } kalign2_profile_alignment.h0000644001210100001440000000261311577654215015546 0ustar olifriusers/* kalign2_profile_alignment.h Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ int local_numseq; int local_numprofiles; int* assign_gap_codes(int* seq,int len); int is_member(struct alignment* aln,int test); float** dna_profile_distance(struct alignment* aln,float** dm,struct parameters* param, int nj); float** protein_profile_wu_distance(struct alignment* aln,float** dm,struct parameters* param, int nj); int** hirschberg_profile_alignment(struct alignment* aln,int* tree,float**submatrix, int** map); float* make_profile_from_alignment(float* prof, int num,struct alignment* aln,float** subm); kalign2_profile.c0000644001210100001440000005454511577654215013516 0ustar olifriusers/* kalign2_profile.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" /* void add_feature_information_from_alignment(int* path,int* fprof1,int* fprof2,int weight) { int i = 0; int j = 0; int c = 1; while(path[c] != 3){ if (!path[c]){ fprof1[i] +=1; fprof1[i+1] +=weight; fprof2[j] +=1; fprof2[j+1] +=weight; i+=2; j+=2; } if (path[c] & 1){ j+=2; } if (path[c] & 2){ i+=2; } c++; } free(path); }*/ float* update2(const float* profa, const float* profb,float* newp,int* path,int sipa,int sipb,float internal_gap_weight) { int i,c; int* gap_len = 0; int gap_cost = 0; gap_len = malloc(sizeof(int)* (path[0]+1)); gap_len[0] = 0; //fprintf(stderr,"%d len,,,,\n",path[0]); for(i = 1; i <= path[0];i++){ // fprintf(stderr,"%d,%d ",i,path[i]); gap_len[i] = (path[i] >> 16); path[i] = path[i] & 0x0000ffff; // fprintf(stderr,"%d %d\n",path[i],gap_len[i]); } //gap_len[path[0]] = 0; // int len = 0; c = 1; /*while(path[c] != 3){ fprintf(stderr,"%d %d %d\n",c,path[c],gap_len[c]); c++; } exit(0);*/ while(path[c] != 3){ gap_cost = 0; if (!path[c]){ while(!path[c] && path[c] != 3){ // fprintf(stderr,"Align %d %d\n",c,path[c]); for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; newp += 64; c++; } }else if (path[c] & 1){ //fprintf(stderr,"%d\n",gap_len[c]); if(path[c] & 128){//N terminal gap !!!!!!1 for (i = 0; i < gap_len[c]-1;i++){ gap_cost += profb[29+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } gap_cost += profb[27+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); }else if(path[c] & 64){//c terminal gap !!!!!!1 // fprintf(stderr,"c terminal gap\n"); gap_cost += profb[27+64]; // fprintf(stderr,"i:%d %d\n",0,gap_cost); for (i = 1; i < gap_len[c];i++){ gap_cost += profb[29+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } }else{ // fprintf(stderr,"middle gap\n"); gap_cost += profb[27+64]; // fprintf(stderr,"i:%d %d\n",0,gap_cost); for (i = 1; i < gap_len[c]-1;i++){ gap_cost += profb[28+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } gap_cost += profb[27+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } //fprintf(stderr,"gap_A %d %d length:%d cost:%d\n",c,path[c],gap_len[c],gap_cost); gap_cost /= gap_len[c]; gap_cost *= internal_gap_weight; while(path[c] & 1 && path[c] != 3){ // fprintf(stderr,"gap_A %d %d cost:%d\n",c,path[c],gap_cost); for (i = 64; i--;){ newp[i] = profb[i]; } newp[23] += gap_cost; for (i = 32; i < 55;i++){ newp[i] += gap_cost; } profb +=64; newp += 64; c++; } }else if (path[c] & 2){ //fprintf(stderr,"%d\n",gap_len[c]); if(path[c] & 128){//N terminal gap !!!!!!1 for (i = 0; i < gap_len[c]-1;i++){ gap_cost += profa[29+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } gap_cost += profa[27+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); }else if(path[c] & 64){//c terminal gap !!!!!!1 // fprintf(stderr,"c terminal gap\n"); gap_cost += profa[27+64]; // fprintf(stderr,"i:%d %d\n",c-1,gap_cost); for (i = 1; i < gap_len[c];i++){ gap_cost += profa[29+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } }else{ // fprintf(stderr,"middle gap\n"); gap_cost += profa[27+64]; // fprintf(stderr,"i:%d %d\n",c-1,gap_cost); for (i = 1; i < gap_len[c]-1;i++){ gap_cost += profa[28+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } gap_cost += profa[27+64*i]; // fprintf(stderr,"i:%d %d\n",i,gap_cost); } gap_cost /= gap_len[c]; gap_cost *= internal_gap_weight; while(path[c] & 2 && path[c] != 3){ // fprintf(stderr,"gap_b %d %d cost:%d\n",c,path[c],gap_cost); for (i = 64; i--;){ newp[i] = profa[i]; } newp[23] += gap_cost; for (i = 32;i < 55;i++){ newp[i] += gap_cost; } profa +=64; newp += 64; c++; } } } for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } newp -= path[0] *64; free(gap_len); return newp; } void smooth_gaps(float* prof,int len,int window,float strength) { float tmp_gpo; float tmp_gpe; float tmp_tgpe; int i,j; if(!(window &1)){ window--; } for ( i = (window/2); i < len - (window/2);i++){ tmp_gpo = 0.0; tmp_gpe = 0.0; tmp_tgpe = 0.0; for (j = -(window/2); j < (window/2);j++){ tmp_gpo += (float)prof[27+((i+j)*64)]*strength; tmp_gpe += (float) prof[28+((i+j)*64)]*strength; tmp_tgpe += (float) prof[29+((i+j)*64)]*strength; } tmp_gpo /= window; tmp_gpe /= window; tmp_tgpe /= window; prof[27+(i*64)] = prof[27+(i*64)]*(1.0-strength) + tmp_gpo; prof[28+(i*64)] = prof[28+(i*64)]*(1.0-strength) + tmp_gpe; prof[29+(i*64)] = prof[29+(i*64)]*(1.0-strength) + tmp_tgpe; } } void increase_gaps(float* prof,int len,int window,float strength) { float* mod = 0; int i,j,c; int start_pos = 0; int end_pos = 0; mod = malloc(sizeof(float)*window); for ( i = 0; i < window;i++){ mod[i] = (strength - i*(float)strength / (float) window) - (0.5*strength); } //only gpo first.... for ( i = 0; i < len;i++){ // // fprintf(stderr,"(%0.2f:%0.2f) ",prof[26],prof[23]); prof[26] = 0.0; prof+= 64; } prof -= len << 6; for ( i = 0; i < len;i++){ if(prof[23]!= 0){ start_pos = i-window; if(start_pos < 0){ c = start_pos + window; }else{ c = window; } for ( j = c;j--;){ prof[26 - (64*(j+1))] += mod[j]; } end_pos = i+window; if(end_pos > len){ c = len - i; }else{ c = window; } //fprintf(stderr,"%d %d\n",i,c); for (j = 0;j < c;j++){ prof[26 +(64*(j+1))] += mod[j]; } } prof+= 64; } prof -= len << 6; for ( i = 0; i < len;i++){ // // fprintf(stderr,"(%0.2f:%0.2f) ",prof[26],prof[23]); prof[27] = prof[27] * (prof[26]+1.0); prof[28] = prof[28] * (prof[26]+1.0); prof[29] = prof[29] * (prof[26]+1.0); prof+= 64; } prof -= len << 6; free(mod); } void set_gap_penalties2(float* prof,int len,int nsip,int window,float strength) { int i,j; float tmp_gpo; float tmp_gpe; float tmp_tgpe; prof += (64 *(len)); prof[27] = prof[55]*nsip*-gpo; prof[28] = prof[55]*nsip*-gpe; prof[29] = prof[55]*nsip*-tgpe; i = len; while(i--){ prof -= 64; prof[27] = prof[55]*nsip*-gpo; prof[28] = prof[55]*nsip*-gpe; prof[29] = prof[55]*nsip*-tgpe; } if(!(window &1)){ window--; } for ( i = (window/2); i < len - (window/2);i++){ tmp_gpo = 0.0; tmp_gpe = 0.0; tmp_tgpe = 0.0; for (j = -(window/2); j < (window/2);j++){ tmp_gpo += (float)prof[27+((i+j)*64)]*strength; tmp_gpe += (float) prof[28+((i+j)*64)]*strength; tmp_tgpe += (float) prof[29+((i+j)*64)]*strength; } tmp_gpo /= window; tmp_gpe /= window; tmp_tgpe /= window; prof[27+(i*64)] = prof[27+(i*64)]*(1-strength) + tmp_gpo; prof[28+(i*64)] = prof[28+(i*64)]*(1-strength) + tmp_gpe; prof[29+(i*64)] = prof[29+(i*64)]*(1-strength) + tmp_tgpe; } /*for ( i = 2; i < len-2;i++){ prof[27+(i*64)] = (prof[27+((i-2)*64)] +prof[27+((i-1)*64)] + prof[27+(i*64)] + prof[27+((i+1)*64)] +prof[27+((i+2)*64)])/ 5; }*/ /* for ( i = 2; i < len-2;i++){ prof[28+(i*64)] = (prof[28+((i-2)*64)] + prof[28+((i-1)*64)] + prof[28+(i*64)] + prof[28+((i+1)*64)] +prof[28+((i+2)*64)])/ 5; } for ( i = 2; i < len-2;i++){ prof[29+(i*64)] = (prof[29+((i-2)*64)] + prof[29+((i-1)*64)] + prof[29+(i*64)] + prof[29+((i+1)*64)] +prof[29+((i+2)*64)])/ 5; }*/ } float* make_profile2(float* prof, int* seq,int len,float** subm) { int i,j,c; prof = malloc(sizeof(float)*(len+1)*64); prof += (64 *len); for (i = 0;i < 64;i++){ prof[i] = 0; } prof[55] = 1; i = len; while(i--){ prof -= 64; for (j = 0;j < 64;j++){ prof[j] = 0; } c = seq[i]; prof[c] += 1; prof += 32; for(j = 23;j--;){ prof[j] = subm[c][j]; } prof[23] = 1; prof -= 32; } return prof; } float* feature_update(const float* profa, const float* profb,float* newp,int* path,int stride) { int i,c; c = 1; while(path[c] != 3){ if (!path[c]){ for (i = stride; i--;){ newp[i] = profa[i] + profb[i]; } profa += stride; profb += stride; } if (path[c] & 1){ for (i = stride; i--;){ newp[i] = profb[i]; } profb += stride; } if (path[c] & 2){ for (i = stride; i--;){ newp[i] = profa[i]; } profa+=stride; } newp += stride; c++; } for (i = stride; i--;){ newp[i] = profa[i] + profb[i]; } newp -= path[0] *stride; return newp; } float* make_wu_profile(float* prof,float* wu,int len) { int i; prof = malloc(sizeof(float)*(len+1)*2); for (i = 0;i < (len+1)*2;i++){ prof[i] = 0; } for (i = 0; i < len;i++){ if(!wu[i]){ prof[i<<1] = 1; prof[(i<<1)+1] = 1; }else{ prof[i<<1] = wu[i]+1; prof[(i<<1)+1] = wu[i]+1; } } return prof; } float* make_feature_profile(float* prof,struct feature* f,int len,struct feature_matrix* fm) { int i,j; prof = malloc(sizeof(int)*(len+1)*fm->stride); for (i = 0;i < (len+1)*fm->stride;i++){ prof[i] = 0; } while(f){ if(f->color != -1){ if(f->start < len && f->end < len){ for (i = f->start-1;i < f->end;i++){ prof[i*fm->stride + f->color] += 1; for ( j =fm->mdim ;j < fm->stride;j++){ prof[i*fm->stride+j] += fm->m[f->color][j-fm->mdim]; } } } } f = f->next; } return prof; } float* make_profile(float* prof, int* seq,int len, float** subm) { int i,j,c; prof = malloc(sizeof(float)*(len+2)*64); prof += (64 *(len+1)); for (i = 0;i < 64;i++){ prof[i] = 0; } prof[23+32] = -gpo; prof[24+32] = -gpe; prof[25+32] = -tgpe; i = len; while(i--){ prof -= 64; for (j = 0;j < 64;j++){ prof[j] = 0; } c = seq[i]; prof[c] += 1; prof += 32; for(j = 23;j--;){ prof[j] = subm[c][j]; } prof[23] = -gpo; prof[24] = -gpe; prof[25] = -tgpe; prof -= 32; } prof -= 64; for (i = 0;i < 64;i++){ prof[i] = 0; } prof[23+32] = -gpo; prof[24+32] = -gpe; prof[25+32] = -tgpe; return prof; } float* dna_make_profile(float* prof, int* seq,int len,float** subm) //int* make_profile(int* prof, int* seq,int len) { int i,j,c; prof = malloc(sizeof(float)*(len+2)*22); prof += (22 *(len+1)); //fprintf(stderr,"Len:%d %d\n",len,64*len); //for (i = 64;i--;){ for (i = 0;i < 22;i++){ prof[i] = 0; } prof[5+11] = -gpo; prof[6+11] = -gpe; prof[7+11] = -tgpe; i = len; while(i--){ prof -= 22; //fprintf(stderr,"-64\n"); //for (j = 64; j--;){ for (j = 0;j < 22;j++){ prof[j] = 0; } c = seq[i]; prof[c] += 1; //n = feature[i]; //prof[n+23] = 1; prof += 11; for(j = 5;j--;){ prof[j] = subm[c][j]; } prof[5] = -gpo; prof[6] = -gpe; prof[7] = -tgpe; prof -= 11; } prof -= 22; for (i = 0;i < 22;i++){ prof[i] = 0; } prof[5+11] = -gpo; prof[6+11] = -gpe; prof[7+11] = -tgpe; return prof; } float* update(const float* profa, const float* profb,float* newp,int* path,int sipa,int sipb) { int i,j,c; for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; newp += 64; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = 64; i--;){ newp[i] = profb[i]; } profb += 64; #ifndef SIMPLE if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += sipa;//1; i = tgpe*sipa; }else{ newp[24] += sipa;//1; i = gpe*sipa; } for (j = 32; j < 55;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += sipa;//1; i = tgpe*sipa; newp[23] += sipa;//1; i += gpo*sipa; }else{ newp[23] += sipa;//1; i = gpo*sipa; } for (j = 32; j < 55;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += sipa;//1; i = tgpe*sipa; newp[23] += sipa;//1; i += gpo*sipa; }else{ newp[23] += sipa;//1; i = gpo*sipa; } for (j = 32; j < 55;j++){ newp[j] -=i; } } } #endif } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = 64; i--;){ newp[i] = profa[i]; } profa+=64; #ifndef SIMPLE if(!(path[c] & 20)){ if(path[c] & 32){ newp[25] += sipb;//1; i = tgpe*sipb; }else{ newp[24] += sipb;//1; i = gpe*sipb; } for (j = 32; j < 55;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c] & 32){ newp[25] += sipb;//1; i = tgpe*sipb; newp[23] += sipb;//1; i += gpo*sipb; }else{ newp[23] += sipb;//1; i = gpo*sipb; } for (j = 32; j < 55;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c] & 32){ newp[25] += sipb;//1; i = tgpe*sipb; newp[23] += sipb;//1; i += gpo*sipb; }else{ newp[23] += sipb;//1; i = gpo*sipb; } for (j = 32; j < 55;j++){ newp[j] -=i; } } } #endif } newp += 64; c++; } for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } newp -= (path[0]+1) *64; return newp; } float* update_only_a(const float* profa, const float* profb,float* newp,int* path,int sipa,int sipb) { int i,c; for (i = 64; i--;){ newp[i] = profa[i];// + profb[i]; } profa += 64; profb += 64; newp += 64; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = 64; i--;){ newp[i] = profa[i];// + profb[i]; } profa += 64; profb += 64; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = 64; i--;){ newp[i] = 0.0;//profb[i]; } profb += 64; } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = 64; i--;){ newp[i] = profa[i]; } profa+=64; } newp += 64; c++; } for (i = 64; i--;){ newp[i] = profa[i];// + profb[i]; } newp -= (path[0]+1) *64; return newp; } float* dna_update(const float* profa, const float* profb, float* newp,int* path,int sipa,int sipb) { int i,j,c; for (i = 22; i--;){ newp[i] = profa[i] + profb[i]; } profa += 22; profb += 22; newp += 22; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = 22; i--;){ newp[i] = profa[i] + profb[i]; } profa += 22; profb += 22; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = 22; i--;){ newp[i] = profb[i]; } profb += 22; if(!(path[c]&20)){ if(path[c]&32){ newp[7] += sipa;//1; i = tgpe*sipa; }else{ newp[6] += sipa;//1; i = gpe*sipa; } for (j = 11; j < 16;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c]&32){ newp[7] += sipa;//1; i = tgpe*sipa; newp[5] += sipa;//1; i += gpo*sipa; }else{ newp[5] += sipa;//1; i = gpo*sipa; } for (j = 11; j < 16;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c]&32){ newp[7] += sipa;//1; i = tgpe*sipa; newp[5] += sipa;//1; i += gpo*sipa; }else{ newp[5] += sipa;//1; i = gpo*sipa; } for (j = 11; j < 16; j++){ newp[j] -=i; } } } } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = 22; i--;){ newp[i] = profa[i]; } profa+=22; if(!(path[c]&20)){ if(path[c]&32){ newp[7] += sipb;//1; i = tgpe*sipb; }else{ newp[6] += sipb;//1; i = gpe*sipb; } for (j = 11; j < 16;j++){ newp[j] -=i; } }else{ if (path[c] & 16){ // fprintf(stderr,"close_open"); if(path[c]&32){ newp[7] += sipb;//1; i = tgpe*sipb; newp[5] += sipb;//1; i += gpo*sipb; }else{ newp[5] += sipb;//1; i = gpo*sipb; } for (j = 11; j < 16;j++){ newp[j] -=i; } } if (path[c] & 4){ // fprintf(stderr,"Gap_open"); if(path[c]&32){ newp[7] += sipb;//1; i = tgpe*sipb; newp[5] += sipb;//1; i += gpo*sipb; }else{ newp[5] += sipb;//1; i = gpo*sipb; } for (j = 11; j < 16;j++){ newp[j] -=i; } } } } newp += 22; c++; } for (i = 22; i--;){ newp[i] = profa[i] + profb[i]; } newp -= (path[0]+1) *22; return newp; } float* dna_update_only_a(const float* profa, const float* profb, float* newp,int* path,int sipa,int sipb) { int i,c; for (i = 22; i--;){ newp[i] = profa[i];// + profb[i]; } profa += 22; profb += 22; newp += 22; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ //fprintf(stderr,"Align %d\n",c); for (i = 22; i--;){ newp[i] = profa[i];//+ profb[i]; } profa += 22; profb += 22; } if (path[c] & 1){ //fprintf(stderr,"Gap_A:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]); for (i = 22; i--;){ newp[i] = 0.0f;//profb[i]; } profb += 22; newp[5] = 1000000; newp[6] = 1000000; newp[7] = 1000000; } if (path[c] & 2){ //fprintf(stderr,"Gap_B:%d\n",c); //printf("open:%d ext:%d %d %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]); for (i = 22; i--;){ newp[i] = profa[i]; } profa+=22; } newp += 22; c++; } for (i = 22; i--;){ newp[i] = profa[i];// + profb[i]; } newp -= (path[0]+1) *22; return newp; } void dna_set_gap_penalties(float* prof,int len,int nsip,float strength,int nsip_c) { int i; int j; float res = (float)nsip_c; float local_res = 0; float w = 0.0; prof += (22 *(len+1)); local_res = 0; for(j = 0; j < 5;j++){ local_res+= prof[j]; } w = 1.0 + (((local_res - 1.0 )/ res) * strength); prof[8] = prof[16]*nsip*w;//gap open or close prof[9] = prof[17]*nsip*w;//gap extention prof[10] = prof[18]*nsip*w;//gap open or close //prof[30] = prof[58]*nsip;//gap extention i = len+1; while(i--){ prof -= 22; local_res = 0; for(j = 0; j < 5;j++){ local_res+= prof[j]; } w = 1.0 + (((local_res - 1.0 )/ res) * strength); prof[8] = prof[16]*nsip*w;//gap open or close prof[9] = prof[17]*nsip*w;//gap extention prof[10] = prof[18]*nsip*w;//gap open or close // prof[30] = prof[58]*nsip;//gap extention } } void set_gap_penalties(float* prof,int len,int nsip,float strength,int nsip_c) { int i; int j; float res = (float)nsip_c; float local_res = 0; float w = 0.0; prof += (64 *(len+1)); local_res = 0; for(j = 0; j < 23;j++){ local_res+= prof[j]; } w = 1.0 + (((local_res - 1.0 )/ res) * strength); prof[27] = prof[55]*nsip*w;//gap open or close 23 prof[28] = prof[56]*nsip*w;//gap extention 24 prof[29] = prof[57]*nsip*w;//gap open or close 25 i = len+1; while(i--){ prof -= 64; local_res = 0; for(j = 0; j < 23;j++){ local_res+= prof[j]; } w = 1.0 + (((local_res - 1.0 )/ res) * strength); prof[27] = prof[55]*nsip*w;//gap open or close prof[28] = prof[56]*nsip*w;//gap extention prof[29] = prof[57]*nsip*w;//gap open or close } } kalign2_simple_gaps.c0000644001210100001440000005007211577654215014350 0ustar olifriusers/* kalign2_simple_gaps.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" int** simple_hirschberg_alignment(struct alignment* aln,int* tree,int**submatrix, int** map) { struct hirsch_mem* hm = 0; int i,j,g,a,b,c; int len_a; int len_b; int** profile = 0; profile = malloc(sizeof(int*)*numprofiles); for ( i = 0;i< numprofiles;i++){ profile[i] = 0; } map = malloc(sizeof(int*)*numprofiles); for ( i = 0;i < numprofiles;i++){ map[i] = 0; } hm = hirsch_mem_alloc(hm,1024); fprintf(stderr,"\nAlignment:\n"); for (i = 0; i < (numseq-1);i++){ a = tree[i*3]; b = tree[i*3+1]; c = tree[i*3+2]; fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100); // fprintf(stderr,"Aligning:%d %d->%d done:%f\n",a,b,c,((float)(i+1)/(float)numseq)*100); len_a = aln->sl[a]; len_b = aln->sl[b]; g = (len_a > len_b)? len_a:len_b; map[c] = malloc(sizeof(int) * (g+2)); if(g > hm->size){ hm = hirsch_mem_realloc(hm,g); } for (j = 0; j < (g+2);j++){ // hirsch_path[j] = -1; map[c][j] = -1; // map[c][j] = 0; } // map[c][0] = len_a; //map[c][len_a+len_b+1] = 3; if (a < numseq){ profile[a] = simple_make_profile(profile[a],aln->s[a],len_a,submatrix); } if (b < numseq){ profile[b] = simple_make_profile(profile[b],aln->s[b],len_b,submatrix); } hm->starta = 0; hm->startb = 0; hm->enda = len_a; hm->endb = len_b; hm->len_a = len_a; hm->len_b = len_b; hm->f[0].a = 0; hm->f[0].ga = -INFTY; hm->f[0].gb = -INFTY; hm->b[0].a = 0; hm->b[0].ga = -INFTY; hm->b[0].gb = -INFTY; // fprintf(stderr,"LENA:%d LENB:%d numseq:%d\n",len_a,len_b,numseq); if(len_a < len_b){ map[c] = simple_hirsch_pp_dyn(profile[a],profile[b],hm,map[c]); }else{ hm->enda = len_b; hm->endb = len_a; hm->len_a = len_b; hm->len_b = len_a; map[c] = simple_hirsch_pp_dyn(profile[b],profile[a],hm,map[c]); map[c] = mirror_hirsch_path(map[c],len_a,len_b); } map[c] = add_gap_info_to_hirsch_path(map[c],len_a,len_b); if(i != numseq-2){ profile[c] = malloc(sizeof(int)*64*(map[c][0]+2)); profile[c] = simple_update(profile[a],profile[b],profile[c],map[c]); } aln->sl[c] = map[c][0]; aln->nsip[c] = aln->nsip[a] + aln->nsip[b]; aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b])); g =0; for (j = aln->nsip[a];j--;){ aln->sip[c][g] = aln->sip[a][j]; g++; } for (j = aln->nsip[b];j--;){ aln->sip[c][g] = aln->sip[b][j]; g++; } free(profile[a]); free(profile[b]); } fprintf(stderr,"\r%8.0f percent done\n",100.0); free(profile); hirsch_mem_free(hm); for (i = 32;i--;){ free(submatrix[i]); } free(submatrix); return map; } int* simple_make_profile(int* prof, int* seq,int len,int** subm) { int i,j,c; prof = malloc(sizeof(int)*(len+2)*64); prof += (64 *(len+1)); for (i = 0;i < 64;i++){ prof[i] = 0; } prof[23+32] = -gpo; prof[26] = 1; i = len; while(i--){ prof -= 64; for (j = 0;j < 64;j++){ prof[j] = 0; } prof[26] = 1;//number of residues // both additive c = seq[i]; prof[c] += 1; prof += 32; for(j = 23;j--;){ prof[j] = subm[c][j]; } prof[23] = -gpo; prof -= 32; } prof -= 64; for (i = 0;i < 64;i++){ prof[i] = 0; } prof[23+32] = -gpo; prof[26] = 1; return prof; } int* simple_update(int* profa,int* profb,int* newp,int* path) { int i,c; for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; newp += 64; c = 1; while(path[c] != 3){ //Idea: limit the 'virtual' number of residues of one type to x. // i.e. only allow a maximum of 10 alanines to be registered in each column // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present. // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase // with the number of sequences. -> see Durbin pp 140 if (!path[c]){ for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } profa += 64; profb += 64; } if (path[c] & 1){ for (i = 64; i--;){ newp[i] = profb[i]; } profb += 64; } if (path[c] & 2){ for (i = 64; i--;){ newp[i] = profa[i]; } profa+=64; } newp += 64; c++; } for (i = 64; i--;){ newp[i] = profa[i] + profb[i]; } newp -= (path[0]+1) *64; return newp; } int* simple_hirsch_pp_dyn(const int* prof1,const int* prof2,struct hirsch_mem* hm, int* hirsch_path) { int mid = ((hm->enda - hm->starta) / 2)+ hm->starta; int input_states[6] = {hm->f[0].a,hm->f[0].ga,hm->f[0].gb,hm->b[0].a,hm->b[0].ga,hm->b[0].gb}; int old_cor[5] = {hm->starta,hm->enda,hm->startb,hm->endb,mid}; //fprintf(stderr,"starta:%d enda:%d startb:%d endb:%d mid:%d\n",hm->starta,hm->enda,hm->startb,hm->endb,mid); if(hm->starta >= hm->enda){ return hirsch_path; } if(hm->startb >= hm->endb){ return hirsch_path; } hm->enda = mid; hm->f = simple_foward_hirsch_pp_dyn(prof1,prof2,hm); /*int i; fprintf(stderr,"FOWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->f[i].a,hm->f[i].ga,hm->f[i].gb); }*/ hm->starta = mid; hm->enda = old_cor[1]; hm->b = simple_backward_hirsch_pp_dyn(prof1,prof2,hm); /*fprintf(stderr,"BaCKWARD\n"); for (i = hm->startb; i <= hm->endb;i++){ fprintf(stderr,"%d %d %d\n",hm->b[i].a,hm->b[i].ga,hm->b[i].gb); }*/ hirsch_path = simple_hirsch_align_two_pp_vector(prof1,prof2,hm,hirsch_path,input_states,old_cor); return hirsch_path; } int* simple_hirsch_align_two_pp_vector(const int* prof1,const int* prof2,struct hirsch_mem* hm,int* hirsch_path,int input_states[],int old_cor[]) { struct states* f = hm->f; struct states* b = hm->b; int i,j,c; int transition = -1; //code: // a -> a = 1 // a -> ga = 2 // a -> gb = 3 // ga ->ga = 4 // ga -> a = 5 //gb->gb = 6; //gb->a = 7; //int max = -INFTY; float max = -INFTY; float middle = (hm->endb - hm->startb)/2 + hm->startb; float sub = 0.0; prof1+= (64 * (old_cor[4]+1)); prof2 += 64 * (hm->startb); i = hm->startb; c = -1; for(i = hm->startb; i < hm->endb;i++){ sub = abs(middle -i); sub /= 1000; prof2 += 64; //fprintf(stderr,"%d %d %d \n",f[i].a,b[i].a,max); if(f[i].a+b[i].a-sub > max){ max = f[i].a+b[i].a-sub; // fprintf(stderr,"aligned->aligned:%d + %d = %d\n",f[i].a,b[i].a,f[i].a+b[i].a); transition = 1; c = i; } if(f[i].a+b[i].ga+prof2[23]*prof1[26]-sub > max){ max = f[i].a+b[i].ga+prof2[23]*prof1[26]-sub; // fprintf(stderr,"aligned->gap_a:%d + %d +%d = %d\n",f[i].a,b[i].ga,prof1[27],f[i].a+b[i].ga+prof2[27]); transition = 2; c = i; } if(f[i].a+b[i].gb+prof1[23]*prof2[26] -sub> max){ max = f[i].a+b[i].gb+prof1[23]*prof2[26]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(f[i].ga+b[i].a+prof2[23]*prof1[26]-sub > max){ max = f[i].ga+b[i].a+prof2[23]*prof1[26]-sub; // fprintf(stderr,"gap_a->aligned:%d + %d + %d(gpo) = %d\n",f[i].ga,b[i].a,prof2[27],f[i].ga+b[i].a+prof2[27]); transition = 5; c = i; } if(hm->startb == 0){ if(f[i].gb+b[i].gb-sub > max){ max = f[i].gb+b[i].gb-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb-sub > max){ max = f[i].gb+b[i].gb-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } if(f[i].gb+b[i].a+prof1[23]*prof2[26]-sub > max){ max = f[i].gb+b[i].a+prof1[23]*prof2[26]-sub; // fprintf(stderr,"gap_b->aligned:%d + %d + %d(gpo) = %d\n",f[i].gb,b[i].a,prof1[27],f[i].gb+b[i].a+prof1[27]); transition = 7; c = i; } } i = hm->endb; sub = abs(middle -i); sub /= 1000; if(f[i].a+b[i].gb+prof1[23]*prof2[26]-sub > max){ max = f[i].a+b[i].gb+prof1[23]*prof2[26]-sub; // fprintf(stderr,"aligned->gap_b:%d + %d +%d = %d\n",f[i].a,b[i].gb,prof1[27],f[i].a+b[i].gb+prof1[27]); transition = 3; c = i; } if(hm->endb == hm->len_b){ if(f[i].gb+b[i].gb-sub > max){ max = f[i].gb+b[i].gb-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } }else{ if(f[i].gb+b[i].gb-sub > max){ max = f[i].gb+b[i].gb-sub; // fprintf(stderr,"gap_b->gap_b:%d + %d +%d(gpe) =%d \n",f[i].gb, b[i].gb, prof1[28],f[i].gb+b[i].gb+prof1[28]); transition = 6; c = i; } } prof1-= (64 * (old_cor[4]+1)); prof2 -= hm->endb << 6; //fprintf(stderr,"Transition:%d at:%d\n",transition,c); //if(transition == -1){ // exit(0); //} j = hirsch_path[0]; switch(transition){ case 1: //a -> a = 1 hirsch_path[old_cor[4]] = c; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -INFTY; hm->b[0].gb = -INFTY; // fprintf(stderr,"Using this for start:%d %d %d\n",hm->f[0].a,hm->f[0].ga,hm->f[0].gb); hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -INFTY; hm->f[0].gb = -INFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 2:// a -> ga = 2 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -INFTY; hm->b[0].gb = -INFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = -INFTY; hm->f[0].ga = 0; hm->f[0].gb = -INFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d what:%d-%d %d-%d\n",c+1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 3:// a -> gb = 3 hirsch_path[old_cor[4]] = c; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4],c); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = 0; hm->b[0].ga = -INFTY; hm->b[0].gb = -INFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -INFTY; hm->f[0].ga = -INFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 5://ga -> a = 5 hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -INFTY; hm->b[0].ga = 0; hm->b[0].gb = -INFTY; hm->starta = old_cor[0]; hm->enda = old_cor[4]; hm->startb = old_cor[2]; hm->endb = c-1; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -INFTY; hm->f[0].gb = -INFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 6://gb->gb = 6; //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -INFTY; hm->b[0].ga = -INFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c; hm->endb = old_cor[3]; hm->f[0].a = -INFTY; hm->f[0].ga = -INFTY; hm->f[0].gb = 0; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; case 7://gb->a = 7; hirsch_path[old_cor[4]+1] = c+1; // fprintf(stderr,"Aligning:%d-%d\n",old_cor[4]+1,c+1); //foward: hm->f[0].a = input_states[0]; hm->f[0].ga = input_states[1]; hm->f[0].gb = input_states[2]; hm->b[0].a = -INFTY; hm->b[0].ga = -INFTY; hm->b[0].gb = 0; hm->starta = old_cor[0]; hm->enda = old_cor[4]-1; hm->startb = old_cor[2]; hm->endb = c; //fprintf(stderr,"Following first: %d what:%d-%d %d-%d\n",c-1,hm->starta,hm->enda,hm->startb,hm->endb); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); //backward: hm->starta = old_cor[4]+1; hm->enda = old_cor[1]; hm->startb = c+1; hm->endb = old_cor[3]; hm->f[0].a = 0; hm->f[0].ga = -INFTY; hm->f[0].gb = -INFTY; hm->b[0].a = input_states[3]; hm->b[0].ga = input_states[4]; hm->b[0].gb = input_states[5]; //fprintf(stderr,"Following last: %d\n",c+1); hirsch_path = simple_hirsch_pp_dyn(prof1,prof2,hm,hirsch_path); break; } return hirsch_path; } struct states* simple_foward_hirsch_pp_dyn(const int* prof1,const int* prof2,struct hirsch_mem* hm) { unsigned int freq[23]; struct states* s = hm->f; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; prof1 += (hm->starta) << 6; prof2 += (hm->startb) << 6; s[hm->startb].a = s[0].a; s[hm->startb].ga = s[0].ga; s[hm->startb].gb = s[0].gb; if(hm->startb == 0){ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -INFTY; s[j].ga = s[j-1].a; if (s[j-1].ga > s[j].ga){ s[j].ga = s[j-1].ga; } s[j].gb = -INFTY; } prof2+=64; }else{ for (j = hm->startb+1; j < hm->endb;j++){ prof2+=64; s[j].a = -INFTY; s[j].ga = s[j-1].a+prof2[23]*prof1[26]; if (s[j-1].ga > s[j].ga){ s[j].ga = s[j-1].ga; } s[j].gb = -INFTY; // prof2+=64; } prof2+=64; } prof2 -= (hm->endb-hm->startb) << 6; s[hm->endb].a = -INFTY; s[hm->endb].ga = -INFTY; s[hm->endb].gb = -INFTY; for (i = hm->starta;i < hm->enda;i++){ prof1 += 64; pa = 1; for (j = 23; j--;){ if(prof1[j]){ freq[pa] = j; pa++; } } freq[0] = pa; pa = s[hm->startb].a; pga = s[hm->startb].ga; pgb = s[hm->startb].gb; if(hm->startb == 0){ s[hm->startb].a = -INFTY; s[hm->startb].ga = -INFTY; s[hm->startb].gb = pa; if(pgb> s[hm->startb].gb){ s[hm->startb].gb = pgb; } }else{ s[hm->startb].a = -INFTY; s[hm->startb].ga = -INFTY; s[hm->startb].gb = pa+prof1[23]*prof2[26]; if(pgb > s[hm->startb].gb){ s[hm->startb].gb = pgb; } } for (j = hm->startb+1; j <= hm->endb;j++){ prof2 += 64; ca = s[j].a; if((pga += prof2[23-64]*prof1[26-64]) > pa){ pa = pga; } if((pgb += prof1[23-64]*prof2[26-64]) > pa){ pa = pgb; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j-1].a+prof2[23]*prof1[26]; if (s[j-1].ga> s[j].ga){ s[j].ga = s[j-1].ga; } pgb = s[j].gb; s[j].gb = ca+prof1[23]*prof2[26]; if(pgb > s[j].gb){ s[j].gb = pgb; } pa = ca; } prof2 -= (hm->endb-hm->startb) << 6; } prof1 -= 64 * (hm->enda); return s; } struct states* simple_backward_hirsch_pp_dyn(const int* prof1,const int* prof2,struct hirsch_mem* hm) { unsigned int freq[23]; struct states* s = hm->b; register int pa = 0; register int pga = 0; register int pgb = 0; register int ca = 0; register int i = 0; register int j = 0; prof1 += (hm->enda+1) << 6; prof2 += (hm->endb+1) << 6; s[hm->endb].a = s[0].a; s[hm->endb].ga = s[0].ga; s[hm->endb].gb = s[0].gb; //init of first row; //j = endb-startb; if(hm->endb == hm->len_b){ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -INFTY; s[j].ga = s[j+1].a; if (s[j+1].ga> s[j].ga){ s[j].ga = s[j+1].ga; } s[j].gb = -INFTY; } prof2 -= 64; }else{ for(j = hm->endb-1;j > hm->startb;j--){ prof2 -= 64; s[j].a = -INFTY; s[j].ga = s[j+1].a+prof2[23]*prof1[26]; if (s[j+1].ga > s[j].ga){ s[j].ga = s[j+1].ga; } s[j].gb = -INFTY; // prof2 -= 64; } prof2 -= 64; } s[hm->startb].a = -INFTY; s[hm->startb].ga = -INFTY; s[hm->startb].gb = -INFTY; // prof2 -= (endb -startb) << 6; i = hm->enda-hm->starta; while(i--){ prof1 -= 64; pa = 1; for (j = 23; j--;){ if(prof1[j]){ freq[pa] = j; pa++; } } freq[0] = pa; pa = s[hm->endb].a; pga = s[hm->endb].ga; pgb = s[hm->endb].gb; s[hm->endb].a = -INFTY; s[hm->endb].ga = -INFTY; if(hm->endb == hm->len_b){ s[hm->endb].gb = pa; if(pgb > s[hm->endb].gb){ s[hm->endb].gb = pgb; } }else{ s[hm->endb].gb = pa+prof1[23]*prof2[26]; if(pgb> s[hm->endb].gb){ s[hm->endb].gb = pgb; } } //j = endb-startb; prof2 += (hm->endb-hm->startb) << 6; //while(j--){ for(j = hm->endb-1;j >= hm->startb;j--){ prof2 -= 64; ca = s[j].a; if((pga += prof2[64+23]*prof1[26]) > pa){ pa = pga; } if((pgb += prof1[64+23]*prof2[26]) > pa){ pa = pgb; } prof2 += 32; for (pga = freq[0];--pga;){ pgb = freq[pga]; pa += prof1[pgb]*prof2[pgb]; } prof2 -= 32; s[j].a = pa; pga = s[j].ga; s[j].ga = s[j+1].a+prof2[23]*prof1[26]; if (s[j+1].ga > s[j].ga){ s[j].ga = s[j+1].ga; } pgb = s[j].gb; s[j].gb = ca+prof1[23]*prof2[26]; if(pgb > s[j].gb){ s[j].gb = pgb; } pa = ca; } } return s; } kalign2_stats.c0000644001210100001440000000052511577654215013201 0ustar olifriusers#include "kalign2.h" void stats(struct alignment* aln) { int i,j; float for (i = 0; i < numseq-1;i++){ for (j = i + 1; j < numseq;j++){ for (j = 0; j < aln->sl[f];j++){ } f = aln->nsip[i]; fprintf(stdout,">%s\n",aln->sn[f]); c = 0; for (j = 0; j < aln->sl[f];j++){ tmp = aln->s[f][j];] aln->seq[f][j] } } } kalign2_string_matching.c0000644001210100001440000000570311577654215015226 0ustar olifriusers/* kalign2_string_matching.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include int byg_detect(int* text,int n) { int Tc; int i = 0; int s = 0; int T[256]; for (i = 0;i < 256;i++){ T[i] = 0; } int mb = 1; //char *unique_aa = "EFILPQXZ";//permissiv //ABCDEFGHIJKLMNOPQRSTUVWXYZ char *unique_aa = "BDEFHIJKLMNOPQRSVWYZ";//restrictive int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; for (i= 0;i < 20;i++){ T[(int)aacode[unique_aa[i]-65]] |= 1; } for (i = 0;i < n;i++){ // fprintf(stderr,"%d\n",text[i]); if(text[i] != -1){ s <<= 1; s |= 1; Tc = T[text[i]]; s &= Tc; if(s & mb){ return 0; } } } return 1; } int check_identity(char* n,char*m) { int len_n; int len_m; int i; len_n = strlen(n); len_m = strlen(m); if(len_m != len_n){ return -1; } for (i = 0; i < len_n;i++){ if(n[i] != m[i]){ return -1; } } return 1; } int byg_count(char* pattern,char*text) { int Tc; int count = 0; int i = 0; int s = 0; int T[256]; for (i = 0;i < 256;i++){ T[i] = 0; } int m = strlen(pattern); int n = strlen (text); int mb = (1 << (m-1)); for (i= 0;i < m;i++){ T[(int)pattern[i]] |= (1 << i); } for (i = 0;i < n;i++){ s <<= 1; s |= 1; Tc = T[(int)text[i]]; s &= Tc; if(s & mb){ count++; } } return count; } int byg_end(char* pattern,char*text) { int Tc; int i = 0; int s = 0; int T[256]; for (i = 0;i < 256;i++){ T[i] = 0; } int m = strlen(pattern); int n = strlen (text); int mb = (1 << (m-1)); for (i= 0;i < m;i++){ T[(int)pattern[i]] |= (1 << i); } for (i = 0;i < n;i++){ s <<= 1; s |= 1; if(!text[i]){ return -1; } Tc = T[(int)text[i]]; s &= Tc; if(s & mb){ return i+1; } } return -1; } int byg_start(char* pattern,char*text) { int Tc; int i = 0; int s = 0; int T[256]; for (i = 0;i < 256;i++){ T[i] = 0; } int m = strlen(pattern); int n = strlen(text); int mb = (1 << (m-1)); for (i= 0;i < m;i++){ T[(int)pattern[i]] |= (1 << i); } for (i = 0;i < n;i++){ s <<= 1; s |= 1; Tc = T[(int)text[i]]; s &= Tc; if(s & mb){ return i-m+1; } } return -1; } kalign2_tree.c0000644001210100001440000004255411577654215013012 0ustar olifriusers/* kalign2_tree.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" struct aln_tree_node* real_upgma(float **dm,int ntree) { int i,j; int *as = 0; float max; int node_a = 0; int node_b = 0; int cnode = numseq; struct aln_tree_node** tree = 0; struct aln_tree_node* tmp = 0; as = malloc(sizeof(int)*numseq); for (i = numseq; i--;){ as[i] = i+1; } tree = malloc(sizeof(struct aln_tree_node*)*numseq); for (i=0;i < numseq;i++){ tree[i] = malloc(sizeof(struct aln_tree_node)); tree[i]->done = 1; tree[i]->num = i; tree[i]->path = 0; tree[i]->profile = 0; tree[i]->seq = 0;//seq[i]; tree[i]->len = 0;//len[i]; /* Needs to be +2 because: at n = 3 is is possible to get a perfectly balanced binary tree with 4 sequences at intermediate nodes */ /*tree[i]->links = malloc(sizeof(struct aln_tree_node*)*2); for ( j =0;j < 2;j++){ tree[i]->links[j] = 0; }*/ tree[i]->internal_lables = malloc(sizeof(int)*(ntree+(ntree-1))); tree[i]->links = malloc(sizeof(struct aln_tree_node*)*(ntree+(ntree-1))); for ( j =0;j < (ntree+(ntree-1));j++){ tree[i]->links[j] = 0; tree[i]->internal_lables[j] = 0; } } while (cnode != numprofiles){ max = -INFTY; for (i = 0;i < numseq-1; i++){ if (as[i]){ for ( j = i + 1;j < numseq;j++){ if (as[j]){ if (dm[i][j] > max){ max = dm[i][j]; node_a = i; node_b = j; } } } } } tmp = malloc(sizeof(struct aln_tree_node)); tmp->done = 0; tmp->path = 0; tmp->profile = 0; tmp->num = cnode; tmp->seq = 0; tmp->len = 0; tmp->links = malloc(sizeof(struct aln_tree_node*)*(ntree+(ntree-1))); tmp->internal_lables = malloc(sizeof(int)*(ntree+(ntree-1))); tmp->links[0] = tree[node_a]; tmp->links[1] = tree[node_b]; tmp->internal_lables[0] = cnode; tmp->internal_lables[1] = 0; for ( i =2;i < (ntree+(ntree-1));i++){ tmp->links[i] = 0; tmp->internal_lables[i] = 0; } tree[node_a] = tmp; tree[node_b] = 0; /*deactivate sequences to be joined*/ as[node_a] = cnode+1; as[node_b] = 0; cnode++; /*calculate new distances*/ for (j = numseq;j--;){ if (j != node_b){ dm[node_a][j] = (dm[node_a][j] + dm[node_b][j])*0.5; } } dm[node_a][node_a] = 0.0f; for (j = numseq;j--;){ dm[j][node_a] = dm[node_a][j]; dm[j][node_b] = 0.0f; dm[node_b][j] = 0.0f; } } tmp = tree[node_a]; for (i = numseq;i--;){ free(dm[i]); } free(dm); free(tree); free(as); return tmp; } struct aln_tree_node* real_nj(float **dm,int ntree) { int i,j; //float **dm = 0; float *r = 0; float *r_div = 0; int *active = 0; int node = 0; float min = 0; int join_a = 0; int join_b = 0; int leaves = 0; struct aln_tree_node** tree = 0; struct aln_tree_node* tmp = 0; leaves = numseq; r = malloc ((numseq*2-1) *sizeof(float)); r_div = malloc ((numseq*2-1) *sizeof(float)); active = malloc((numseq*2-1)*sizeof(int)); for ( i = 0;i < numseq*2-1;i++){ active[i] = 0; } for ( i = 0;i < numseq;i++){ active[i] = 1; } tree = malloc(sizeof(struct aln_tree_node*)*(numseq*2-1)); for (i=0;i < numseq*2-1;i++){ tree[i] = malloc(sizeof(struct aln_tree_node)); tree[i]->done = 1; tree[i]->num = i; tree[i]->path = 0; tree[i]->profile = 0; tree[i]->seq = 0;//seq[i]; tree[i]->len = 0;//len[i]; tree[i]->internal_lables = malloc(sizeof(int)*(ntree+(ntree-1))); tree[i]->links = malloc(sizeof(struct aln_tree_node*)*(ntree+(ntree-1))); for ( j =0;j < (ntree+(ntree-1));j++){ tree[i]->links[j] = 0; tree[i]->internal_lables[j] = 0; } } node = numseq; while (node != numseq*2 -1){ for (i = 0;i min){ min = dm[i][j]; join_a = j; join_b = i; } } } } } //join_a always smaller than join_b && both smaller than node dm[join_a][node] = dm[join_a][join_b]/2 + (r_div[join_a] - r_div[join_b])/2; dm[join_b][node] = dm[join_a][join_b] - dm[join_a][node]; tree[node]->num = node; tree[node]->links[0] = tree[join_a]; tree[node]->links[1] = tree[join_b]; tree[node]->internal_lables[0] = node; tree[node]->internal_lables[1] = 0; active[join_a] = 0; active[join_b] = 0; for (i = 0;ijoin_a) ? dm[join_a][i]: dm[i][join_a]; dm[i][node] -= dm[join_a][node]; dm[i][node] += (i > join_b) ? dm[join_b][i] : dm[i][join_b] ; dm[i][node] -= dm[join_b][node]; dm[i][node] /= 2; } } active[node] = 1; node++; } for (i = numprofiles;i--;){ free(dm[i]); } free(dm); free(r); free(r_div); free(active); tmp = tree[node-1]; free(tree); return tmp; } struct ntree_data* alignntree(struct ntree_data* ntree_data,struct aln_tree_node* p) { int i = 0; int ntree = ntree_data->ntree; int* leaves = 0; leaves = malloc(sizeof(int)* (ntree+(ntree-1))); while(p->links[i]){ alignntree(ntree_data,p->links[i]); i++; } i = 0; if (p->links[i]){ fprintf(stderr,"Aligning subtree: at node:%d\n",p->num); while(p->links[i]){ leaves[i] = p->links[i]->num; i++; } leaves[i] = -1; // fprintf(stderr,"NODES:%d\n",i); ntree_data = find_best_topology(ntree_data,leaves,p->internal_lables); // exit(0); } free(leaves); return ntree_data; } void print_simple_phylip_tree(struct aln_tree_node* p) { if(p->links[0]){ fprintf(stderr,"("); print_simple_phylip_tree(p->links[0]); } if(p->num < numseq){ fprintf(stderr,"%d",p->num); }else{ fprintf(stderr,","); } if(p->links[1]){ print_simple_phylip_tree(p->links[1]); fprintf(stderr,")"); } } void printtree(struct aln_tree_node* p) { int i = 0; while(p->links[i]){ printtree(p->links[i]); i++; } i = 0; if (p->links[i]){ printf("Aligning: at node:%d\n",p->num); while(p->links[i]){ printf("%d\n",p->links[i]->num); i++; } i = 0; while(p->internal_lables[i]){ printf("%d ",p->internal_lables[i]); i++; } printf("\n"); } } void ntreeify(struct aln_tree_node* p,int ntree) { int i = 0; int c = 0; struct aln_tree_node* tmp1 = 0; struct aln_tree_node* tmp2 = 0; if (p->links[0]){ ntreeify(p->links[0],ntree); } if (p->links[1]){ ntreeify(p->links[1],ntree); } if (!p->done){ tmp1 = p->links[0]; tmp2 = p->links[1]; p->done = tmp1->done + tmp2->done; i = 0; c = 0; if(tmp1->done != 1){ while(tmp1->internal_lables[i]){ p->internal_lables[c] = tmp1->internal_lables[i]; i++; c++; } if(tmp2->done != 1){ i = 0; while(tmp2->internal_lables[i]){ p->internal_lables[c] = tmp2->internal_lables[i]; c++; i++; } } }else if(tmp2->done != 1){ i = 0; while(tmp2->internal_lables[i]){ p->internal_lables[c] = tmp2->internal_lables[i]; c++; i++; } } p->internal_lables[c] = p->num; //fprintf(stderr,"%d:%d %d:%d %d\n",tmp1->num,tmp1->internal_lables[0],tmp2->num,tmp2->internal_lables[0],p->num); /*for (i = 0; i< c;i++){ fprintf(stderr,"il:%d ",p->internal_lables[i]); } fprintf(stderr,"\n");*/ if (tmp1->done > 1){ for ( i = 0;i < tmp1->done;i++){ p->links[i] = tmp1->links[i]; tmp1->links[i] = 0; } } if (tmp2->done > 1){ for ( i = 0; i < tmp2->done;i++){ p->links[tmp1->done+i] = tmp2->links[i]; tmp2->links[i] = 0; } free(tmp2->internal_lables); free(tmp2->links); free(tmp2); }else{ p->links[tmp1->done] = tmp2; } // fprintf(stderr,"p->num:%d\n",p->num); p->links[p->done] = 0; if (tmp1->done > 1){ free(tmp1->internal_lables); free(tmp1->links); free(tmp1); } if (p->done >= ntree){ p->done = 1; /*i = 0; while(p->internal_lables[i]){ i++; } p->internal_lables[i] = p->num;*/ } } } struct ntree_data* find_best_topology(struct ntree_data* ntree_data,int* leaves,int* nodes) { int i,c; int elements = 0; //int num_topologies =0; int* milometer = 0; //DURBIN struct tree_node* tree = 0; struct tree_node* tmp = 0; int newnode = 0; int local_ntree = 0; int *tmp_tree = 0; while(leaves[local_ntree] != -1){ local_ntree++; } //fprintf(stderr,"REALKDASF KJAF SA:%d\n",local_ntree); //for (i = 0; i < local_ntree-1;i++){ // fprintf(stderr,"nodes:%d\n",nodes[i]); //} tmp_tree = malloc(sizeof(int)*(local_ntree+local_ntree-1)*3); for (c = 0; c < (local_ntree+local_ntree-1)*3;c++){ tmp_tree[c] = 0; } tmp_tree[0] =1; if (local_ntree < 3){ //printf("ORDER1: %d and %d\n",leaves[0],leaves[1]); tmp_tree[0] =1; tmp = malloc(sizeof(struct tree_node)); tmp->left = 0; tmp->right = 0; tmp->label = -1; tmp->edge = 0; tmp->left = malloc(sizeof(struct tree_node)); tmp->left->left = 0; tmp->left->right = 0; tmp->left->edge = 1; tmp->left->label = leaves[0]; tmp->right = malloc(sizeof(struct tree_node)); tmp->right->left = 0; tmp->right->right = 0; tmp->right->edge = 2; tmp->right->label = leaves[1]; tree = malloc(sizeof(struct tree_node)); tree->left =tmp; tree->right = 0; tree->edge = -1; tree->label = -1; c = add_label_simpletree(tree,nodes,0); readsimpletree(tree,tmp_tree); /*for (c = 1; c < tmp_tree[0];c++){ fprintf(stderr,"%d ",tmp_tree[c]); } fprintf(stderr,"\n\n");*/ ntree_data =ntree_sub_alignment(ntree_data,tmp_tree,local_ntree); free(tmp_tree); }else{ elements = local_ntree-2; milometer = malloc(sizeof(int)*(elements)); for ( i = 0; i < elements;i++){ milometer[i] = 0; } i = 0; while(milometer[0] != -1){ tmp_tree[0] =1; tmp = malloc(sizeof(struct tree_node)); tmp->left = 0; tmp->right = 0; tmp->label = -1; tmp->edge = 0; tmp->left = malloc(sizeof(struct tree_node)); tmp->left->left = 0; tmp->left->right = 0; tmp->left->edge = 1; tmp->left->label = leaves[0]; tmp->right = malloc(sizeof(struct tree_node)); tmp->right->left = 0; tmp->right->right = 0; tmp->right->edge = 2; tmp->right->label = leaves[1]; tree = malloc(sizeof(struct tree_node)); tree->left =tmp; tree->right = 0; tree->edge = -1; tree->label = -1; //printsimpleTree(tree); //tree = simpleinsert(tree,0,3,-3); //fprintf(stderr,"\n\n"); //printsimpleTree(tree); newnode = 3; for(c = 0; c < elements;c++){ // printf("%d ",milometer[c]); tree = simpleinsert(tree,milometer[c],newnode,leaves[2+c]); newnode+=2; } fprintf(stderr,"Topology:%d ",i); //printsimpleTree(tree); c = add_label_simpletree(tree,nodes,0); readsimpletree(tree,tmp_tree); freesimpletree(tree); /*for (c = 1; c < tmp_tree[0];c++){ fprintf(stderr,"%d ",tmp_tree[c]); } fprintf(stderr,"\n\n");*/ ntree_data =ntree_sub_alignment(ntree_data,tmp_tree,local_ntree); //exit(0); //for (c = 0;c < ntree -1;c++){ // fprintf(stderr,"%d ",nodes[c]); //} //fprintf(stderr,"\n\n"); i++; milometer = ticker(milometer,elements); } free(milometer); free(tmp_tree); } return ntree_data; } int add_label_simpletree(struct tree_node* p,int* nodes,int i) { if(p->left){ i = add_label_simpletree(p->left,nodes,i); } if(p->right){ i = add_label_simpletree(p->right,nodes,i); } if(p->left){ if(p->right){ p->label = nodes[i]; i++; return i; } } return i; } int* readsimpletree(struct tree_node* p,int* tree) { if(p->left){ tree = readsimpletree(p->left,tree); } if(p->right){ tree = readsimpletree(p->right,tree); } if(p->left){ if(p->right){ tree[tree[0]] = p->left->label; tree[tree[0]+1] = p->right->label; tree[tree[0]+2] = p->label; tree[0] +=3; // free(p->left); // free(p->right); // }else{ // free(p->left); } }//else{ // free(p->right); //} return tree; } void printsimpleTree(struct tree_node* p) { if(p->left){ printsimpleTree(p->left); } //fprintf(stderr,"%d\n",p->label); if(p->right){ printsimpleTree(p->right); } if(p->left){ if(p->right){ fprintf(stderr,"%d %d -> %d\n",p->left->label,p->right->label,p->label); free(p->left); free(p->right); }else{ free(p->left); } }else{ free(p->right); } // fprintf(stderr,"Edge:%d Label:%d\n",p->edge,p->label); } struct tree_node* simpleinsert(struct tree_node* p,int target, int new_edge,int leaf_label) { struct tree_node* tmp = 0; struct tree_node* tmp2 = 0; if(p->left){ if(p->left->edge == target){ tmp = malloc(sizeof(struct tree_node)); tmp->left = 0; tmp->right = 0; tmp->label = leaf_label; tmp->edge = new_edge+1; tmp2 = malloc(sizeof(struct tree_node)); tmp2->left = tmp; tmp2->right = p->left; tmp2->label = -1; tmp2->edge = p->left->edge; p->left->edge = new_edge; p->left = tmp2; return p; }else{ p->left = simpleinsert(p->left,target,new_edge,leaf_label); } } if(p->right){ if(p->right->edge == target){ tmp = malloc(sizeof(struct tree_node)); tmp->left = 0; tmp->right = 0; tmp->label = leaf_label; tmp->edge = new_edge+1; tmp2 = malloc(sizeof(struct tree_node)); tmp2->left = tmp; tmp2->right = p->right; tmp2->label = -1; tmp2->edge = p->right->edge; p->right->edge = new_edge; p->right = tmp2; return p; }else{ p->right = simpleinsert(p->right,target,new_edge,leaf_label); } } return p; } int* ticker(int* milometer,int elements) { while(elements){ if (milometer[elements-1] < (2*elements)){ milometer[elements-1]++; return milometer; }else{ milometer[elements-1] = 0; elements--; } } milometer[0] = -1; return milometer; } int* upgma(float **dm,int* tree) { int i,j,t; int *as = 0; float max; int node_a = 0; int node_b = 0; int cnode = numseq; as = malloc(sizeof(int)*numseq); for (i = numseq; i--;){ as[i] = i+1; } t = 0; while (cnode != numprofiles){ max = -INFTY; for (i = 0;i < numseq-1; i++){ if (as[i]){ for ( j = i + 1;j < numseq;j++){ if (as[j]){ if (dm[i][j] > max){ max = dm[i][j]; node_a = i; node_b = j; } } } } } tree[t] = as[node_a]-1; tree[t+1] = as[node_b]-1; tree[t+2] = cnode; t += 3; /*deactivate sequences to be joined*/ as[node_a] = cnode+1; as[node_b] = 0; cnode++; /*calculate new distances*/ for (j = numseq;j--;){ if (j != node_b){ dm[node_a][j] = (dm[node_a][j] + dm[node_b][j])/2; } } dm[node_a][node_a] = 0.0f; for (j = numseq;j--;){ dm[j][node_a] = dm[node_a][j]; dm[j][node_b] = 0.0f; dm[node_b][j] = 0.0f; } } free(as); return tree; } int* nj(float **dm,int* tree) { int i,j; //float **dm = 0; float *r = 0; float *r_div = 0; int *active = 0; int node = 0; float min = 0; int join_a = 0; int join_b = 0; int leaves = 0; int c =0; leaves = numseq; r = malloc ((numseq*2-1) *sizeof(float)); r_div = malloc ((numseq*2-1) *sizeof(float)); active = malloc((numseq*2-1)*sizeof(int)); for ( i = 0;i < numseq*2-1;i++){ active[i] = 0; } for ( i = 0;i < numseq;i++){ active[i] = 1; } node = numseq; while (node != numseq*2 -1){ for (i = 0;i min){ min = dm[i][j]; join_a = j; join_b = i; } } } } } //join_a always smaller than join_b && both smaller than node dm[join_a][node] = dm[join_a][join_b]/2 + (r_div[join_a] - r_div[join_b])/2; dm[join_b][node] = dm[join_a][join_b] - dm[join_a][node]; active[join_a] = 0; active[join_b] = 0; tree[c] = join_a; tree[c+1] = join_b; tree[c+2] = node; for (i = 0;ijoin_a) ? dm[join_a][i]: dm[i][join_a]; dm[i][node] -= dm[join_a][node]; dm[i][node] += (i > join_b) ? dm[join_b][i] : dm[i][join_b] ; dm[i][node] -= dm[join_b][node]; dm[i][node] /= 2; } } active[node] = 1; c += 3; node++; } for (i = numprofiles;i--;){ free(dm[i]); } free(dm); free(r); free(r_div); free(active); return tree; } kalign2_upgma.c0000644001210100001440000001312511577654215013154 0ustar olifriusers/* kalign2_upgma.c Released under GPL - see the 'COPYING' file Copyright (C) 2006 Timo Lassmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please send bug reports, comments etc. to: timolassmann@gmail.com */ #include "kalign2.h" struct aln_tree_node* real_upgma(float **dm) { int i,j; int *as = 0; float max; int node_a = 0; int node_b = 0; int cnode = numseq; struct aln_tree_node** tree = 0; struct aln_tree_node* tmp = 0; as = malloc(sizeof(int)*numseq); for (i = numseq; i--;){ as[i] = i+1; } tree = malloc(sizeof(struct aln_tree_node*)*numseq); for (i=0;i < numseq;i++){ tree[i] = malloc(sizeof(struct aln_tree_node)); tree[i]->done = 1; tree[i]->num = i; tree[i]->path = 0; tree[i]->profile = 0; tree[i]->seq = 0;//seq[i]; tree[i]->len = 0;//len[i]; /* Needs to be +2 because: at n = 3 is is possible to get a perfectly balanced binary tree with 4 sequences at intermediate nodes */ tree[i]->links = malloc(sizeof(struct aln_tree_node*)*3); for ( j =0;j < 3;j++){ tree[i]->links[j] = 0; } } while (cnode != numprofiles){ max = -INFTY; for (i = 0;i < numseq-1; i++){ if (as[i]){ for ( j = i + 1;j < numseq;j++){ if (as[j]){ if (dm[i][j] > max){ max = dm[i][j]; node_a = i; node_b = j; } } } } } tmp = malloc(sizeof(struct aln_tree_node)); tmp->done = 0; tmp->path = 0; tmp->profile = 0; tmp->num = cnode; tmp->seq = 0; tmp->len = 0; tmp->links = malloc(sizeof(struct aln_tree_node*)*(3)); tmp->links[0] = tree[node_a]; tmp->links[1] = tree[node_b]; tmp->links[2] =0; tree[node_a] = tmp; tree[node_b] = 0; /*deactivate sequences to be joined*/ as[node_a] = cnode+1; as[node_b] = 0; cnode++; /*calculate new distances*/ for (j = numseq;j--;){ if (j != node_b){ dm[node_a][j] = (dm[node_a][j] + dm[node_b][j])/2; } } dm[node_a][node_a] = 0.0f; for (j = numseq;j--;){ dm[j][node_a] = dm[node_a][j]; dm[j][node_b] = 0.0f; dm[node_b][j] = 0.0f; } } tmp = tree[node_a]; for (i = numprofiles;i--;){ free(dm[i]); } free(dm); free(tree); free(as); return tmp; } int* nj(float **dm,int* tree) { int i,j; //float **dm = 0; float *r = 0; float *r_div = 0; int *active = 0; int node = 0; float min = 0; int join_a = 0; int join_b = 0; int leaves = 0; int c =0; leaves = numseq; r = malloc ((numseq*2-1) *sizeof(float)); r_div = malloc ((numseq*2-1) *sizeof(float)); active = malloc((numseq*2-1)*sizeof(int)); for ( i = 0;i < numseq*2-1;i++){ active[i] = 0; } for ( i = 0;i < numseq;i++){ active[i] = 1; } node = numseq; while (node != numseq*2 -1){ for (i = 0;i min){ min = dm[i][j]; join_a = j; join_b = i; } } } } } //join_a always smaller than join_b && both smaller than node dm[join_a][node] = dm[join_a][join_b]/2 + (r_div[join_a] - r_div[join_b])/2; dm[join_b][node] = dm[join_a][join_b] - dm[join_a][node]; active[join_a] = 0; active[join_b] = 0; tree[c] = join_a; tree[c+1] = join_b; tree[c+2] = node; for (i = 0;ijoin_a) ? dm[join_a][i]: dm[i][join_a]; dm[i][node] -= dm[join_a][node]; dm[i][node] += (i > join_b) ? dm[join_b][i] : dm[i][join_b] ; dm[i][node] -= dm[join_b][node]; dm[i][node] /= 2; } } active[node] = 1; c += 3; node++; } for (i = numprofiles;i--;){ free(dm[i]); } free(dm); free(r); free(r_div); free(active); return tree; } int* upgma(float **dm,int* tree) { int i,j,t; int *as = 0; float max; int node_a = 0; int node_b = 0; int cnode = numseq; as = malloc(sizeof(int)*numseq); for (i = numseq; i--;){ as[i] = i+1; } t = 0; while (cnode != numprofiles){ max = -INFTY; for (i = 0;i < numseq-1; i++){ if (as[i]){ for ( j = i + 1;j < numseq;j++){ if (as[j]){ if (dm[i][j] > max){ max = dm[i][j]; node_a = i; node_b = j; } } } } } tree[t] = as[node_a]-1; tree[t+1] = as[node_b]-1; tree[t+2] = cnode; t += 3; /*deactivate sequences to be joined*/ as[node_a] = cnode+1; as[node_b] = 0; cnode++; /*calculate new distances*/ for (j = numseq;j--;){ if (j != node_b){ dm[node_a][j] = (dm[node_a][j] + dm[node_b][j])/2; } } dm[node_a][node_a] = 0.0f; for (j = numseq;j--;){ dm[j][node_a] = dm[node_a][j]; dm[j][node_b] = 0.0f; dm[node_b][j] = 0.0f; } } free(as); return tree; } Makefile.in0000644001210100001440000000175411577654214012341 0ustar olifriusersPREFIX = /usr/local/bin TEST = test/ CC = gcc CFLAGS = -O9 -Wall DEBUGFLAGS = -ggdb -Wall SOURCES = kalign2_distance_calculation.c kalign2_dp.c kalign2_input.c kalign2_main.c kalign2_mem.c kalign2_inferface.c kalign2_misc.c kalign2_tree.c kalign2_profile.c kalign2_alignment_types.c kalign2_feature.c kalign2_hirschberg.c kalign2_advanced_gaps.c kalign2_hirschberg_dna.c kalign2_output.c kalign2_string_matching.c kalign2_profile_alignment.c PROGS = kalign DEBUGPROGS = kalign_debug OBJECTS = $(SOURCES:.c=.o) DEBUGOBJECTS = $(SOURCES:.c=_debug.o) .PHONY: clean all: $(OBJECTS) $(CC) $(CFLAGS) $(OBJECTS) -o $(PROGS) %.o: %.c $(CC) $(CFLAGS) -c $< debug: $(DEBUGOBJECTS) $(CC) $(DEBUGFLAGS) $(DEBUGOBJECTS) -o $(DEBUGPROGS) %_debug.o: %.c $(CC) $(DEBUGFLAGS) -c $< -o $@ install: cp $(PROGS) /usr/local/bin/ clean: rm -f $(PROGS) $(OBJECTS) rm -f $(DEBUGPROGS) $(DEBUGOBJECTS) rm -f config.status config.log config.h Makefile rm -f test* rm -f *~ README0000644001210100001440000000245211577654215011151 0ustar olifriusers----------------------------------------------------------------------- Kalign version 2.03, Copyright (C) 2006 Timo Lassmann http://msa.cgb.ki.se/ timolassmann@gmail.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA A copy of this license is in the COPYING file. ----------------------------------------------------------------------- Installation: % ./configure % make and as root: % make install Usage: kalign [Options] infile.fasta outfile.fasta or: kalign [Options] -i infile.fasta -o outfile.fasta or: kalign [Options] < infile.fasta > outfile.fasta Options: type: kalign -h