stacks-1.35/000755 000765 000024 00000000000 12574070564 013514 5ustar00catchenstaff000000 000000 stacks-1.35/acinclude.m4000644 000765 000024 00000010763 12335173442 015706 0ustar00catchenstaff000000 000000 # ============================================================================ # http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html # ============================================================================ # # SYNOPSIS # # AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional]) # # DESCRIPTION # # Check for baseline language coverage in the compiler for the C++11 # standard; if necessary, add switches to CXXFLAGS to enable support. # # The first argument, if specified, indicates whether you insist on an # extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. # -std=c++11). If neither is specified, you get whatever works, with # preference for an extended mode. # # The second argument, if specified 'mandatory' or if left unspecified, # indicates that baseline C++11 support is required and that the macro # should error out if no mode with that support is found. If specified # 'optional', then configuration proceeds regardless, after defining # HAVE_CXX11 if and only if a supporting mode is found. # # LICENSE # # Copyright (c) 2008 Benjamin Kosnik # Copyright (c) 2012 Zack Weinberg # Copyright (c) 2013 Roy Stogner # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 3 m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [ template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; typedef check> right_angle_brackets; int a; decltype(a) b; typedef check check_type; check_type c; check_type&& cr = static_cast(c); auto d = a; ]) AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl m4_if([$1], [], [], [$1], [ext], [], [$1], [noext], [], [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl m4_if([$2], [], [ax_cxx_compile_cxx11_required=true], [$2], [mandatory], [ax_cxx_compile_cxx11_required=true], [$2], [optional], [ax_cxx_compile_cxx11_required=false], [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])])dnl AC_LANG_PUSH([C++])dnl ac_success=no AC_CACHE_CHECK(whether $CXX supports C++11 features by default, ax_cv_cxx_compile_cxx11, [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [ax_cv_cxx_compile_cxx11=yes], [ax_cv_cxx_compile_cxx11=no])]) if test x$ax_cv_cxx_compile_cxx11 = xyes; then ac_success=yes fi m4_if([$1], [noext], [], [dnl if test x$ac_success = xno; then for switch in -std=gnu++11 -std=gnu++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, $cachevar, [ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [eval $cachevar=yes], [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi]) m4_if([$1], [ext], [], [dnl if test x$ac_success = xno; then for switch in -std=c++11 -std=c++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, $cachevar, [ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [eval $cachevar=yes], [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi]) AC_LANG_POP([C++]) if test x$ax_cxx_compile_cxx11_required = xtrue; then if test x$ac_success = xno; then AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.]) fi else if test x$ac_success = xno; then HAVE_CXX11=0 AC_MSG_NOTICE([No compiler with C++11 support was found]) else HAVE_CXX11=1 AC_DEFINE(HAVE_CXX11,1, [define if the compiler supports basic C++11 syntax]) fi AC_SUBST(HAVE_CXX11) fi ]) stacks-1.35/aclocal.m4000644 000765 000024 00000121742 12571641546 015364 0ustar00catchenstaff000000 000000 # generated automatically by aclocal 1.14.1 -*- Autoconf -*- # Copyright (C) 1996-2013 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, [m4_warning([this file was generated for autoconf 2.69. You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) # Copyright (C) 2002-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_AUTOMAKE_VERSION(VERSION) # ---------------------------- # Automake X.Y traces this macro to ensure aclocal.m4 has been # generated from the m4 files accompanying Automake X.Y. # (This private macro should not be called outside this file.) AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.14' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. m4_if([$1], [1.14.1], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) # _AM_AUTOCONF_VERSION(VERSION) # ----------------------------- # aclocal traces this macro to find the Autoconf version. # This is a private macro too. Using m4_define simplifies # the logic in aclocal, which can simply ignore this definition. m4_define([_AM_AUTOCONF_VERSION], []) # AM_SET_CURRENT_AUTOMAKE_VERSION # ------------------------------- # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], [AM_AUTOMAKE_VERSION([1.14.1])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) # AM_AUX_DIR_EXPAND -*- Autoconf -*- # Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets # $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to # '$srcdir', '$srcdir/..', or '$srcdir/../..'. # # Of course, Automake must honor this variable whenever it calls a # tool from the auxiliary directory. The problem is that $srcdir (and # therefore $ac_aux_dir as well) can be either absolute or relative, # depending on how configure is run. This is pretty annoying, since # it makes $ac_aux_dir quite unusable in subdirectories: in the top # source directory, any form will work fine, but in subdirectories a # relative path needs to be adjusted first. # # $ac_aux_dir/missing # fails when called from a subdirectory if $ac_aux_dir is relative # $top_srcdir/$ac_aux_dir/missing # fails if $ac_aux_dir is absolute, # fails when called from a subdirectory in a VPATH build with # a relative $ac_aux_dir # # The reason of the latter failure is that $top_srcdir and $ac_aux_dir # are both prefixed by $srcdir. In an in-source build this is usually # harmless because $srcdir is '.', but things will broke when you # start a VPATH build or use an absolute $srcdir. # # So we could use something similar to $top_srcdir/$ac_aux_dir/missing, # iff we strip the leading $srcdir from $ac_aux_dir. That would be: # am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` # and then we would define $MISSING as # MISSING="\${SHELL} $am_aux_dir/missing" # This will work as long as MISSING is not called from configure, because # unfortunately $(top_srcdir) has no meaning in configure. # However there are other variables, like CC, which are often used in # configure, and could therefore not use this "fixed" $ac_aux_dir. # # Another solution, used here, is to always expand $ac_aux_dir to an # absolute PATH. The drawback is that using absolute paths prevent a # configured tree to be moved without reconfiguration. AC_DEFUN([AM_AUX_DIR_EXPAND], [dnl Rely on autoconf to set up CDPATH properly. AC_PREREQ([2.50])dnl # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` ]) # AM_CONDITIONAL -*- Autoconf -*- # Copyright (C) 1997-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_CONDITIONAL(NAME, SHELL-CONDITION) # ------------------------------------- # Define a conditional. AC_DEFUN([AM_CONDITIONAL], [AC_PREREQ([2.52])dnl m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl AC_SUBST([$1_TRUE])dnl AC_SUBST([$1_FALSE])dnl _AM_SUBST_NOTMAKE([$1_TRUE])dnl _AM_SUBST_NOTMAKE([$1_FALSE])dnl m4_define([_AM_COND_VALUE_$1], [$2])dnl if $2; then $1_TRUE= $1_FALSE='#' else $1_TRUE='#' $1_FALSE= fi AC_CONFIG_COMMANDS_PRE( [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then AC_MSG_ERROR([[conditional "$1" was never defined. Usually this means the macro was only invoked conditionally.]]) fi])]) # Copyright (C) 1999-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be # written in clear, in which case automake, when reading aclocal.m4, # will think it sees a *use*, and therefore will trigger all it's # C support machinery. Also note that it means that autoscan, seeing # CC etc. in the Makefile, will ask for an AC_PROG_CC use... # _AM_DEPENDENCIES(NAME) # ---------------------- # See how the compiler implements dependency checking. # NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". # We try a few techniques and use that to set a single cache variable. # # We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was # modified to invoke _AM_DEPENDENCIES(CC); we would have a circular # dependency, and given that the user is not expected to run this macro, # just rely on AC_PROG_CC. AC_DEFUN([_AM_DEPENDENCIES], [AC_REQUIRE([AM_SET_DEPDIR])dnl AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl AC_REQUIRE([AM_MAKE_INCLUDE])dnl AC_REQUIRE([AM_DEP_TRACK])dnl m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], [$1], [CXX], [depcc="$CXX" am_compiler_list=], [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], [$1], [UPC], [depcc="$UPC" am_compiler_list=], [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], [depcc="$$1" am_compiler_list=]) AC_CACHE_CHECK([dependency style of $depcc], [am_cv_$1_dependencies_compiler_type], [if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_$1_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` fi am__universal=false m4_case([$1], [CC], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac], [CXX], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac]) for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_$1_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_$1_dependencies_compiler_type=none fi ]) AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) AM_CONDITIONAL([am__fastdep$1], [ test "x$enable_dependency_tracking" != xno \ && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) ]) # AM_SET_DEPDIR # ------------- # Choose a directory name for dependency files. # This macro is AC_REQUIREd in _AM_DEPENDENCIES. AC_DEFUN([AM_SET_DEPDIR], [AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl ]) # AM_DEP_TRACK # ------------ AC_DEFUN([AM_DEP_TRACK], [AC_ARG_ENABLE([dependency-tracking], [dnl AS_HELP_STRING( [--enable-dependency-tracking], [do not reject slow dependency extractors]) AS_HELP_STRING( [--disable-dependency-tracking], [speeds up one-time build])]) if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) AC_SUBST([AMDEPBACKSLASH])dnl _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl AC_SUBST([am__nodep])dnl _AM_SUBST_NOTMAKE([am__nodep])dnl ]) # Generate code to set up dependency tracking. -*- Autoconf -*- # Copyright (C) 1999-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_OUTPUT_DEPENDENCY_COMMANDS # ------------------------------ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], [{ # Older Autoconf quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. case $CONFIG_FILES in *\'*) eval set x "$CONFIG_FILES" ;; *) set x $CONFIG_FILES ;; esac shift for mf do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named 'Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then dirpart=`AS_DIRNAME("$mf")` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running 'make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "$am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`AS_DIRNAME(["$file"])` AS_MKDIR_P([$dirpart/$fdir]) # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done } ])# _AM_OUTPUT_DEPENDENCY_COMMANDS # AM_OUTPUT_DEPENDENCY_COMMANDS # ----------------------------- # This macro should only be invoked once -- use via AC_REQUIRE. # # This code is only required when automatic dependency tracking # is enabled. FIXME. This creates each '.P' file that we will # need in order to bootstrap the dependency handling code. AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], [AC_CONFIG_COMMANDS([depfiles], [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) ]) # Do all the work for Automake. -*- Autoconf -*- # Copyright (C) 1996-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This macro actually does too much. Some checks are only needed if # your package does certain things. But this isn't really a big deal. dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. m4_define([AC_PROG_CC], m4_defn([AC_PROG_CC]) [_AM_PROG_CC_C_O ]) # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) # AM_INIT_AUTOMAKE([OPTIONS]) # ----------------------------------------------- # The call with PACKAGE and VERSION arguments is the old style # call (pre autoconf-2.50), which is being phased out. PACKAGE # and VERSION should now be passed to AC_INIT and removed from # the call to AM_INIT_AUTOMAKE. # We support both call styles for the transition. After # the next Automake release, Autoconf can make the AC_INIT # arguments mandatory, and then we can depend on a new Autoconf # release and drop the old call support. AC_DEFUN([AM_INIT_AUTOMAKE], [AC_PREREQ([2.65])dnl dnl Autoconf wants to disallow AM_ names. We explicitly allow dnl the ones we care about. m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl AC_REQUIRE([AC_PROG_INSTALL])dnl if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl # test to see if srcdir already configured if test -f $srcdir/config.status; then AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi AC_SUBST([CYGPATH_W]) # Define the identity of the package. dnl Distinguish between old-style and new-style calls. m4_ifval([$2], [AC_DIAGNOSE([obsolete], [$0: two- and three-arguments forms are deprecated.]) m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl AC_SUBST([PACKAGE], [$1])dnl AC_SUBST([VERSION], [$2])], [_AM_SET_OPTIONS([$1])dnl dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. m4_if( m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), [ok:ok],, [m4_fatal([AC_INIT should be called with package and version arguments])])dnl AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl _AM_IF_OPTION([no-define],, [AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl # Some tools Automake needs. AC_REQUIRE([AM_SANITY_CHECK])dnl AC_REQUIRE([AC_ARG_PROGRAM])dnl AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) AM_MISSING_PROG([AUTOCONF], [autoconf]) AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) AM_MISSING_PROG([AUTOHEADER], [autoheader]) AM_MISSING_PROG([MAKEINFO], [makeinfo]) AC_REQUIRE([AM_PROG_INSTALL_SH])dnl AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl AC_REQUIRE([AC_PROG_MKDIR_P])dnl # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: # # AC_SUBST([mkdir_p], ['$(MKDIR_P)']) # We need awk for the "check" target. The system "awk" is bad on # some platforms. AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl _AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], [_AM_PROG_TAR([v7])])]) _AM_IF_OPTION([no-dependencies],, [AC_PROVIDE_IFELSE([AC_PROG_CC], [_AM_DEPENDENCIES([CC])], [m4_define([AC_PROG_CC], m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl AC_PROVIDE_IFELSE([AC_PROG_CXX], [_AM_DEPENDENCIES([CXX])], [m4_define([AC_PROG_CXX], m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJC], [_AM_DEPENDENCIES([OBJC])], [m4_define([AC_PROG_OBJC], m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], [_AM_DEPENDENCIES([OBJCXX])], [m4_define([AC_PROG_OBJCXX], m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl ]) AC_REQUIRE([AM_SILENT_RULES])dnl dnl The testsuite driver may need to know about EXEEXT, so add the dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. AC_CONFIG_COMMANDS_PRE(dnl [m4_provide_if([_AM_COMPILER_EXEEXT], [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl # POSIX will say in a future version that running "rm -f" with no argument # is OK; and we want to be able to make that assumption in our Makefile # recipes. So use an aggressive probe to check that the usage we want is # actually supported "in the wild" to an acceptable degree. # See automake bug#10828. # To make any issue more visible, cause the running configure to be aborted # by default if the 'rm' program in use doesn't match our expectations; the # user can still override this though. if rm -f && rm -fr && rm -rf; then : OK; else cat >&2 <<'END' Oops! Your 'rm' program seems unable to run without file operands specified on the command line, even when the '-f' option is present. This is contrary to the behaviour of most rm programs out there, and not conforming with the upcoming POSIX standard: Please tell bug-automake@gnu.org about your system, including the value of your $PATH and any error possibly output before this message. This can help us improve future automake versions. END if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then echo 'Configuration will proceed anyway, since you have set the' >&2 echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 echo >&2 else cat >&2 <<'END' Aborting the configuration process, to ensure you take notice of the issue. You can download and install GNU coreutils to get an 'rm' implementation that behaves properly: . If you want to complete the configuration process using your problematic 'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM to "yes", and re-run configure. END AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) fi fi]) dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further dnl mangled by Autoconf and run in a shell conditional statement. m4_define([_AC_COMPILER_EXEEXT], m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) # When config.status generates a header, we must update the stamp-h file. # This file resides in the same directory as the config header # that is generated. The stamp files are numbered to have different names. # Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the # loop where config.status creates the headers, so we can generate # our stamp files there. AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], [# Compute $1's index in $config_headers. _am_arg=$1 _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) # Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_INSTALL_SH # ------------------ # Define $install_sh. AC_DEFUN([AM_PROG_INSTALL_SH], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi AC_SUBST([install_sh])]) # Copyright (C) 2003-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # Check whether the underlying file-system supports filenames # with a leading dot. For instance MS-DOS doesn't. AC_DEFUN([AM_SET_LEADING_DOT], [rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_MAKE_INCLUDE() # ----------------- # Check to see how make treats includes. AC_DEFUN([AM_MAKE_INCLUDE], [am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. AC_MSG_CHECKING([for style of include used by $am_make]) am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # Ignore all kinds of additional output from 'make'. case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=include am__quote= _am_result=GNU ;; esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=.include am__quote="\"" _am_result=BSD ;; esac fi AC_SUBST([am__include]) AC_SUBST([am__quote]) AC_MSG_RESULT([$_am_result]) rm -f confinc confmf ]) # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Copyright (C) 1997-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_MISSING_PROG(NAME, PROGRAM) # ------------------------------ AC_DEFUN([AM_MISSING_PROG], [AC_REQUIRE([AM_MISSING_HAS_RUN]) $1=${$1-"${am_missing_run}$2"} AC_SUBST($1)]) # AM_MISSING_HAS_RUN # ------------------ # Define MISSING if not defined so far and test if it is modern enough. # If it is, set am_missing_run to use it, otherwise, to nothing. AC_DEFUN([AM_MISSING_HAS_RUN], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([missing])dnl if test x"${MISSING+set}" != xset; then case $am_aux_dir in *\ * | *\ *) MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; *) MISSING="\${SHELL} $am_aux_dir/missing" ;; esac fi # Use eval to expand $SHELL if eval "$MISSING --is-lightweight"; then am_missing_run="$MISSING " else am_missing_run= AC_MSG_WARN(['missing' script is too old or missing]) fi ]) # Helper functions for option handling. -*- Autoconf -*- # Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_MANGLE_OPTION(NAME) # ----------------------- AC_DEFUN([_AM_MANGLE_OPTION], [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) # _AM_SET_OPTION(NAME) # -------------------- # Set option NAME. Presently that only means defining a flag for this option. AC_DEFUN([_AM_SET_OPTION], [m4_define(_AM_MANGLE_OPTION([$1]), [1])]) # _AM_SET_OPTIONS(OPTIONS) # ------------------------ # OPTIONS is a space-separated list of Automake options. AC_DEFUN([_AM_SET_OPTIONS], [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) # _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) # ------------------------------------------- # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. AC_DEFUN([_AM_IF_OPTION], [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) # Copyright (C) 1999-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_PROG_CC_C_O # --------------- # Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC # to automatically call this. AC_DEFUN([_AM_PROG_CC_C_O], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([compile])dnl AC_LANG_PUSH([C])dnl AC_CACHE_CHECK( [whether $CC understands -c and -o together], [am_cv_prog_cc_c_o], [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) # Make sure it works both with $CC and with simple cc. # Following AC_PROG_CC_C_O, we do the test twice because some # compilers refuse to overwrite an existing .o file with -o, # though they will create one. am_cv_prog_cc_c_o=yes for am_i in 1 2; do if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ && test -f conftest2.$ac_objext; then : OK else am_cv_prog_cc_c_o=no break fi done rm -f core conftest* unset am_i]) if test "$am_cv_prog_cc_c_o" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi AC_LANG_POP([C])]) # For backward compatibility. AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) # Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_RUN_LOG(COMMAND) # ------------------- # Run COMMAND, save the exit status in ac_status, and log it. # (This has been adapted from Autoconf's _AC_RUN_LOG macro.) AC_DEFUN([AM_RUN_LOG], [{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD (exit $ac_status); }]) # Check to make sure that the build environment is sane. -*- Autoconf -*- # Copyright (C) 1996-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_SANITY_CHECK # --------------- AC_DEFUN([AM_SANITY_CHECK], [AC_MSG_CHECKING([whether build environment is sane]) # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[[\\\"\#\$\&\'\`$am_lf]]*) AC_MSG_ERROR([unsafe absolute working directory name]);; esac case $srcdir in *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; esac # Do 'set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( am_has_slept=no for am_try in 1 2; do echo "timestamp, slept: $am_has_slept" > conftest.file set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$[*]" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi if test "$[*]" != "X $srcdir/configure conftest.file" \ && test "$[*]" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken alias in your environment]) fi if test "$[2]" = conftest.file || test $am_try -eq 2; then break fi # Just in case. sleep 1 am_has_slept=yes done test "$[2]" = conftest.file ) then # Ok. : else AC_MSG_ERROR([newly created file is older than distributed files! Check your system clock]) fi AC_MSG_RESULT([yes]) # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= if grep 'slept: no' conftest.file >/dev/null 2>&1; then ( sleep 1 ) & am_sleep_pid=$! fi AC_CONFIG_COMMANDS_PRE( [AC_MSG_CHECKING([that generated files are newer than configure]) if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi AC_MSG_RESULT([done])]) rm -f conftest.file ]) # Copyright (C) 2009-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_SILENT_RULES([DEFAULT]) # -------------------------- # Enable less verbose build rules; with the default set to DEFAULT # ("yes" being less verbose, "no" or empty being verbose). AC_DEFUN([AM_SILENT_RULES], [AC_ARG_ENABLE([silent-rules], [dnl AS_HELP_STRING( [--enable-silent-rules], [less verbose build output (undo: "make V=1")]) AS_HELP_STRING( [--disable-silent-rules], [verbose build output (undo: "make V=0")])dnl ]) case $enable_silent_rules in @%:@ ((( yes) AM_DEFAULT_VERBOSITY=0;; no) AM_DEFAULT_VERBOSITY=1;; *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; esac dnl dnl A few 'make' implementations (e.g., NonStop OS and NextStep) dnl do not support nested variable expansions. dnl See automake bug#9928 and bug#10237. am_make=${MAKE-make} AC_CACHE_CHECK([whether $am_make supports nested variables], [am_cv_make_support_nested_variables], [if AS_ECHO([['TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 am__doit: @$(TRUE) .PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no fi]) if test $am_cv_make_support_nested_variables = yes; then dnl Using '$V' instead of '$(V)' breaks IRIX make. AM_V='$(V)' AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' else AM_V=$AM_DEFAULT_VERBOSITY AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY fi AC_SUBST([AM_V])dnl AM_SUBST_NOTMAKE([AM_V])dnl AC_SUBST([AM_DEFAULT_V])dnl AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl AC_SUBST([AM_DEFAULT_VERBOSITY])dnl AM_BACKSLASH='\' AC_SUBST([AM_BACKSLASH])dnl _AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl ]) # Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_INSTALL_STRIP # --------------------- # One issue with vendor 'install' (even GNU) is that you can't # specify the program used to strip binaries. This is especially # annoying in cross-compiling environments, where the build's strip # is unlikely to handle the host's binaries. # Fortunately install-sh will honor a STRIPPROG variable, so we # always use install-sh in "make install-strip", and initialize # STRIPPROG with the value of the STRIP variable (set by the user). AC_DEFUN([AM_PROG_INSTALL_STRIP], [AC_REQUIRE([AM_PROG_INSTALL_SH])dnl # Installed binaries are usually stripped using 'strip' when the user # run "make install-strip". However 'strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the 'STRIP' environment variable to overrule this program. dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. if test "$cross_compiling" != no; then AC_CHECK_TOOL([STRIP], [strip], :) fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" AC_SUBST([INSTALL_STRIP_PROGRAM])]) # Copyright (C) 2006-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_SUBST_NOTMAKE(VARIABLE) # --------------------------- # Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. # This macro is traced by Automake. AC_DEFUN([_AM_SUBST_NOTMAKE]) # AM_SUBST_NOTMAKE(VARIABLE) # -------------------------- # Public sister of _AM_SUBST_NOTMAKE. AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) # Check how to create a tarball. -*- Autoconf -*- # Copyright (C) 2004-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_PROG_TAR(FORMAT) # -------------------- # Check how to create a tarball in format FORMAT. # FORMAT should be one of 'v7', 'ustar', or 'pax'. # # Substitute a variable $(am__tar) that is a command # writing to stdout a FORMAT-tarball containing the directory # $tardir. # tardir=directory && $(am__tar) > result.tar # # Substitute a variable $(am__untar) that extract such # a tarball read from stdin. # $(am__untar) < result.tar # AC_DEFUN([_AM_PROG_TAR], [# Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AC_SUBST([AMTAR], ['$${TAR-tar}']) # We'll loop over all known methods to create a tar archive until one works. _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' m4_if([$1], [v7], [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], [m4_case([$1], [ustar], [# The POSIX 1988 'ustar' format is defined with fixed-size fields. # There is notably a 21 bits limit for the UID and the GID. In fact, # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 # and bug#13588). am_max_uid=2097151 # 2^21 - 1 am_max_gid=$am_max_uid # The $UID and $GID variables are not portable, so we need to resort # to the POSIX-mandated id(1) utility. Errors in the 'id' calls # below are definitely unexpected, so allow the users to see them # (that is, avoid stderr redirection). am_uid=`id -u || echo unknown` am_gid=`id -g || echo unknown` AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) if test $am_uid -le $am_max_uid; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) _am_tools=none fi AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) if test $am_gid -le $am_max_gid; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) _am_tools=none fi], [pax], [], [m4_fatal([Unknown tar format])]) AC_MSG_CHECKING([how to create a $1 tar archive]) # Go ahead even if we have the value already cached. We do so because we # need to set the values for the 'am__tar' and 'am__untar' variables. _am_tools=${am_cv_prog_tar_$1-$_am_tools} for _am_tool in $_am_tools; do case $_am_tool in gnutar) for _am_tar in tar gnutar gtar; do AM_RUN_LOG([$_am_tar --version]) && break done am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' am__untar="$_am_tar -xf -" ;; plaintar) # Must skip GNU tar: if it does not support --format= it doesn't create # ustar tarball either. (tar --version) >/dev/null 2>&1 && continue am__tar='tar chf - "$$tardir"' am__tar_='tar chf - "$tardir"' am__untar='tar xf -' ;; pax) am__tar='pax -L -x $1 -w "$$tardir"' am__tar_='pax -L -x $1 -w "$tardir"' am__untar='pax -r' ;; cpio) am__tar='find "$$tardir" -print | cpio -o -H $1 -L' am__tar_='find "$tardir" -print | cpio -o -H $1 -L' am__untar='cpio -i -H $1 -d' ;; none) am__tar=false am__tar_=false am__untar=false ;; esac # If the value was cached, stop now. We just wanted to have am__tar # and am__untar set. test -n "${am_cv_prog_tar_$1}" && break # tar/untar a dummy directory, and stop if the command works. rm -rf conftest.dir mkdir conftest.dir echo GrepMe > conftest.dir/file AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) rm -rf conftest.dir if test -s conftest.tar; then AM_RUN_LOG([$am__untar /dev/null 2>&1 && break fi done rm -rf conftest.dir AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) AC_MSG_RESULT([$am_cv_prog_tar_$1])]) AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR m4_include([acinclude.m4]) stacks-1.35/autogen.sh000755 000765 000024 00000000272 12335173442 015510 0ustar00catchenstaff000000 000000 #!/bin/sh -e test -n "$srcdir" || srcdir=`dirname "$0"` test -n "$srcdir" || srcdir=. autoreconf --force --install --verbose "$srcdir" test -n "$NOCONFIGURE" || "$srcdir/configure" "$@"stacks-1.35/ChangeLog000644 000765 000024 00000175727 12574070543 015306 0ustar00catchenstaff000000 000000 Stacks 1.35 - Sept 09, 2015 --------------------------- Feature: Added --retain_header flag to process_radtags/process_shortreads which will keep the unmodified FASTQ header in the output. This allows clone_filter/process_radtags/ process_shortreads to be run in different sequences and more than one time. Feature: Added --treemix to the populations program, allowing SNPs to be output in TreeMix format. Feature: Added --phylip_var_all to the populations program. This option outputs the full sequence from each variable locus, encoding polymorphisms using IUPAC notation. -This option will also output a file containing the coordinates of each RAD locus so they can be input to phylogenetic software (such as RAxML) to partition each RAD locus out and then build the phylogenetic tree independently for each partitioned locus. Feature: Added the AgeI restriction enzyme. Feature: refactored clone_filter to handle random oligo sequences used as inline/indexed barcodes to identify and discard PCR duplicates. Bugfix: added code to process_radtags/process_shortreads to handle cases when data writes fail due to a filled disk or other error conditions. Bugfix: kmer_filter was not handling gzipped FASTQ files properly when filtering rare kmers. Stacks 1.34 - July 26, 2015 --------------------------- Bugfix: fixed phylip output to again include nucleotides from subsets of the full set of populations. Bugfix: private alleles were being associated to the incorrect population at a particular locus (the counts and summary statistics of private alleles were not affected). Stacks 1.33 - July 22, 2015 --------------------------- Bugfix: Corrected the second-stage filtering of the populations program to properly respect the -p flag. Bugfix: Corrected the display of individual samples in the web interface (tags.php file). Stacks 1.32 - June 18, 2015 --------------------------- Bugfix: Updated the Phylip output to reflect the changed meaning of 'fixed' as determined in the PopSum::tally() function. Stacks 1.31 - June 17, 2015 --------------------------- Bugfix: site-level filtering in the populations program was not working correctly when dealing with sites that were fixed within populations but variable among populations. The code in the PopSum::tally() function was not correctly identifying sites as not fixed in these cases causing them to be incorrectly filtered out. Bugfix: --write_random_snp was causing a segfault in the populations program in some cases. Feature: changed the default setting for the -n option of cstacks (number of fixed differences allowed between loci) to 1 (at the request of Josie Paris ). Bugfix: made some tweaks to improve layout in the web interface. Bugfix: single-end reads, with paired barcodes (inline/index) were not being handled properly, resulting in a segfault. Bugfix: process_radtags was allowing a non-null barcode type to be specified without specifying a barcode file, which caused a segfault. Feature: exposed kmer length setting in ustacks and cstacks. This allows the kmer length used for sequence matching to be set manually. While this can result in some missed matches (there is a trade off between kmer length and sequence length when searching for matches between the two) it also allows the algorithm to run at faster speeds. Feature: Changed default database engine type to be excplicitly MyISAM. Previously Stacks just used the default which at one time was MyISAM but has recently changed in many systems to be INNODB. Using MyISAM should provide much faster imports of data and ultimately use less disk space (as the space is reclaimed when databases are deleted). Stacks 1.30 - May 07, 2015 -------------------------- Feature: sstacks can now accept multiple sample files at a time, saving run time by only processing the catalog once. Feature: changed batch_X.sumstats.tsv file so the P/Q alleles are always presented in the same order in each local population (according to the overall frequency of the allele across all populations). This will sync results with the VCF exports but will sometimes cause the frequency of p in the local population to be less than 0.5 (up until now the frequency of p has always represented the most frequenct allele in the local population). Feature: added an maximum observed heterozygosity filter to populations program. Bugfix: Fis values in batch_X.sumstats_summary.tsv were incorrect (although raw values in batch_X.sumstats.tsv were correct). Bugfix: corrected the allele depth output in the VCF export to follow defacto standards used by other programs. Bugfix: in some cases loci were sneaking past the --write_single_snp directive in in populations (due to interactions with pruning out SNPs that fail the MAF filter). Feature: Updated the Stacks web interface. The web app is now almost 100% dynamic (parts of the page are draws on demand instead of fetching new, full pages from the server) using local javascript to draw the population view of genotypes, summary statistics, and the view of raw stacks. The web app uses asynchronous AJAX queries that trade data encoded in JSON to fetch the necessary data for dynamic display. Feature: added DdeI, RsaI, AluI restriction enzymes to process_radtags. Bugfix: sstacks could generate extra matching haplotpyes in a very small number of cases. Stacks 1.29 - Mar 21, 2015 -------------------------- Feature: added the --ordered_export option to the populations program. For the VCF, GenePop, and Structure exports, if this option is specified, only one copy of each SNP is exported in the case where one nucleotide position is covered by more than one RAD locus. Most useful for ddRAD data. Feature: VCF export now includes individual allele depths for each SNP call. Feature: improved the filtering logging code in populations, if the --verbose flag is specified, a reason is provided for each pruned site, or each removed locus. Bugfix: PHASE output was broken in the populations program. SNP pruning/filtering code did not update the catalog copies of the alleles after pruning which are needed by the PHASE output code. Bugfix: adjusted the filtering code in populations to not exclude fixed loci. Bugfix: removed extra tab from ID line for Structure export. Bugfix: fixed issue in genepop output that may have overfiltered some loci. Bugfix: fixed small problems with --write_single_snp/--write_random_snp in the populations program. Some polymorphic loci were erroneously being omitted. Stacks 1.28 - Mar 06, 2015 -------------------------- Feature: added a second barcode distance to process_radtags/process_shortreads. This allows you to specify two distances for recovering barcodes if you are using combinatorial barcodes (e.g. a 12bp barcode inline on the single-end read plus a 6bp index). I have changed the meaning of the parameter from "distance between barcodes" to "number of allowed mismatches when correcting barcodes." The --barcode_dist parameter is now --barcode_dist_1, and --barcode_dist_2 was added. Bugfix: the process_shortreads/process_radtags programs were trimming sequence as if an inline barcode was specified, even when it was an index barcode and no sequence should have been trimmed. Bugfix: the process_shortreads program was outputting FASTA even when FASTQ was requested due to not handling gzipped outputs properly. Bugfix: fixed segfault in populations that could occur when using a whitelist that contained loci that were being filtered out due to -p/-r constraints. Stacks 1.27 - Feb 25, 2015 -------------------------- Bugfix: the minor allele frequency filter and the proceny limit filter were not working properly in all cases with the other filters. Bugfix: barcode length (href->inline_bc_len) was not being correctly set for single-end, inline line barcodes of variable length. Stacks 1.26 - Feb 23, 2015 -------------------------- Bugfix: if you are running non-compressed data, then version 1.25 broke the parsing code. If your data were zipped (or a BAM file) when it went through pstacks/ustacks, then there was no bug. Feature: refactored the filtering code in the populations program to add a second filtering step. In previous versions the -r (sample limit) and -p (population limit) were applied on the basis of the entire RAD locus. This could lead to situations where a RAD locus remained in the data set while one or more of the individual SNPs on that locus were missing data and were below the -r or -p limits. Now, the filters are applied to individual SNPs after the filters are applied to the RAD loci. This greatly affects the -r (sample limit) filter with more SNPs being pruned out, as well as the -a (minor allele frequency filter) such that all SNPs below the MAF are pruned fully from the data set and will not appear in any statistical results or downstream exports. Feature: added restriction enzyme kpnI. Feature: added code to check for the existence of the loci and SNPs provided in a whitelist. Stacks 1.25 - Feb 17, 2015 -------------------------- Feature: added support for unaligned BAM files for process_radtaags and process_shortreads. The two programs can now read paired data that is interleaved in a single file (which is required to support paired-end data in BAM format). Feature: Haplotypes can now be output in VCF format from the populations program using the --vcf_haplotypes option. Feature: added --fasta_strict option to populations program. Will output full sequence for each individual at each haplotype at each locus, but only for biologically plausible loci. It won't output loci with more than two haplotypes and will output single haplotypes twice, once per allele. Feature: Changed the sumstats/hapstats files to output a one-based genome base pair position so it matches other export formats. Bugfix: fixed problem with gzipped files where last line of file was not read properly causing the program to output an erroneous error message. Bugfix: The FASTA output from the populations program was reporting the internal value (zero-based index) for the basepair position of each read (the first nucleotide of the cutsite) causing an off-by-one error for all reads and reads on the negative strand had the coordinate for the cutsite end of the read (right-most end) reported instead of the standard left-most end. Bugfix: the log likelihood filter was not working properly in export_sql.pl, causing many genotypes to be excluded during export. Bugfix: process_radtags was not looking for the paired-end RAD cutsite in the proper location when dealing with double-digest, inline/index barcoded reads. Feature: added initial, internal support for merging and phasing loci that overlap at a restriction enzyme cut site. Feature: code now prints program version and generation date to all internal Stacks files. Stacks 1.24 - Jan 07, 2015 -------------------------- Feature: added restriction enzyme ecoRV. Bugfix: fixed segmentation fault in process_radtags/process_shortreads when resizing sequence and phred internal buffer sizes. Stacks 1.23 - Dec 12, 2014 -------------------------- Bugfix: Fixed a segfault bug in process_radtags where the process_barcode function returned prematurely when one barcode was correct and one was incorrect in paired cases. Bugfix: fixed compiler warnings when building with CLANG. Stacks 1.22 - Dec 08, 2014 -------------------------- Feature: process_radtags and process_shortreads now support variable barcode lengths. In process_radtags sequences will automatically be trimmed to keep stacks a uniform length with the variable barcode lengths. Feature: a filename can now be specified in the barcodes file for process_radtags and process_shortreads. When a filename is specified, process_radtags will write data to this filename instead of a filename made up of the barcode. Feature: process_radtags and process_shortreads will now output gzipped files if provided gzipped inputs or if requested using the '-y' option. Feature: Added SacI and BgIII restriction enzymes. Bugfix: Tightened up parsing of FASTQ ID field to prevent a buffer overrun (and subsequent segfault) in FASTQ headers that look like the Illumina format but are malformed. Bugfix: Fixed GenePop output of populations program as last locus on second line was missing commas if more than one SNP was present at that locus. Bugfix: -R option to retain unused reads was not being recognized by ustacks. Bugfix: changed populations to record program run parameters and execution time to log file. Bugfix: corrected Makefile.am to include Sparsehash compile flags for process_radtags. Bugfix: corrected load_radtags.pl so as not to try and load the population ID as a number to the samples table (and instead as a string). Stacks 1.21 - Oct 02, 2014 -------------------------- Feature: Added the XbaI, BstYI, and XhoI restriction enzymes. Feature: Added ability to specify column position in whitelist along with locus ID in populations program. This allows for specific SNPs within specific loci to be processed. Feature: In populations program, changed implementation of --write_single_snp to create an internal whitelist from the first SNP in each catalog locus. Added a new command line option, --write_random_snp to select a single, random SNP per RAD locus using the same internal mechanism. Feature: Added HZAR, Hybrid Zone Analysis in R output to populations program. Bugfix: Added code in populations program to handle cases where a haplotype contains one or more uncalled bases (Ns). These haplotypes are now excluded from haplotype-based statistical calculations. Bugfix: In Phi_st/ct/sc calculations of populations program, total population count was not adjusted downward when one of the populations dropped out of the analysis at a particular locus in the all-populations, haplotype-based AMOVA calculation (batch_X.phistats.tsv). Bugfix: "All positions" Fis measure in batch_X.sumstats_summary.tsv file too negative due to internal logic error. Bugfix: updated queries in index_radtags.pl to account for new 'type' variable in SNPs tables. Stacks 1.20 - Jul 29, 2014 -------------------------- Synced corrections module branch with main Stacks branch. *** The internal formats of the *.tags.tsv, *.snps.tsv, and *.matches.tsv files have changed and therefore version 1.20 programs cannot be used on earlier generated data sets. However, the convert_stacks.pl script is included in this release to convert an older data set into the new formats. *** Feature: Implemented new haplotype trimming algorithm for rxstacks. Feature: new script, convert_stacks.pl, to convert previous Stacks files to new format. Feature: Modified VCF output to include likelihood values from heterozygous and homozygous SNP model calls. Feature: added log likelihood filter to genotypes and populations programs and to web interface. Feature: Added SpeI restriction enzyme to process_radtags. Feature: Modified Beagle output formats in populations program to be population-specific and not to include monomorphic nucleotide positions. Stacks 1.19 - Apr 23, 2014 -------------------------- Feature: the populations program now calculates Fst' and D_est on haplotypes between all pairwise populations. Our implementations are based on: Bird, Karl, Smouse & Toonen. (2011) Detecting and measuring genetic differentiation. D_est: Jost. (2008) Gst and its relatives do not measure differentiation. Fst': based on modifying the AMOVA implementation from Excoffier, Smouse, & Quattro (1992). Feature: we have refactored the populations program to use a common framework for kernel smoothing and bootstrapping. This has allowed us to add smoothing and bootstrapping to all statistics calculated by the populations program: pi, Fis, Fst, Fst', D_est, Phi_st, Phi_ct, Phi_sc, Haplotype diversity, gene diversity. Feature: we have implemented fine-grained control of bootstrapping by providing flags to turn on bootstrapping for each group of population statistics, as well as providing a bootstrapping whitelist allowing only certain loci to be included in the bootstrapping calculations. Stacks 1.18 - Apr 04, 2014 -------------------------- Feature: we now use chi squared segregation ratios to detect missing alleles in parental mapping markers. in F1 crosses (CP map type). We can now map ab/a- and -a/ab as ab/--, and --/ab markers; we can map ab/c- and -c/ab markers as ab/cd markers; we can map aa/b- and -a/bb markers as ab/-- and --/ab markers. Feature: in F1 crosses we are now mapping ab/cc and cc/ab markers as ab/-- and --/ab markers. Feature: reworked genetic map display of web interface. Included chisq p-value from segregation distortion test as a filter. Feature: implemented measure of segregation distortion in genotypes program based on chi square test of genotype counts. Removed deprecated measure of F, inbreeding coefficient, replaced it with segregation distortion. Bugfix: corrected calling of markers in genotypes program. When a whitelist with a small number of markers is specified, some of the parental IDs could be missed, causing markers not to be called and hence dropped from the analysis. Bugfix: changed genotype mappings for generic map types to make certain non-biologically plausible genotype combinations illegal. Bugfix: fixed compilation issues when using Google's SparseHash (thanks to khuck@cs.uoregon.edu for the patch). Stacks 1.17 - Mar 26, 2014 -------------------------- Bugfix: Added #ifdefs to deal with missing functions in older versions of zlib. Stacks 1.16 - Mar 25, 2014 -------------------------- Feature: added haplotype counts for each population and locus to the batch_X.hapstats.tsv file. Feature: haplotype F statistics are now calculated for the whole set of populations (one analysis of variance calculation for all populations), and also as a set of pairwise calculations to mirror the existing Fst calculations. Bugfix: fixed small bug in calculation of MSD(Total) component of Phi_st (haplotype F statistics). Bugfix: fixed bug in parsing of populations maps when using strings for population identifiers. Bugfix: kernel-smoothing not correct for haplotype/gene diversity. Stacks 1.15 - Mar 15, 2014 -------------------------- Bugfix: fix various bugs related to gzip support. Stacks 1.14 - Mar 14, 2014 -------------------------- Feature: Stacks files are now kept in gzipped format if FASTQ data is fed into pipeline gzipped or as a BAM. Bugfix: fixed some compile bugs on OSX Mavericks. Stacks 1.13 - Feb 24, 2014 -------------------------- Feature: We have implemented the first set of haplotype-level population genetics statistics. Specifically, we are now calculating gene diversity and haplotype diversity (pi) for each locus, as well as F statistics for haplotypes including, Phi_st, Phi_ct, and Phi_sc, which are calculated using Analysis of Molecular Variance (AMOVA): Excoffier, Smouse, & Quattro, (1992). Analysis of molecular variance inferred from metric distances among DNA haplotypes: application to human mitochondrial DNA restriction data. Genetics. Data can be analyzed as populations of individuals (the previous default) and now using populations of individuals, and groups of populations. Feature: If a reference genome is available, haplotype F statistics can also be kernel-smoothed. Feature: populations in the population map can now be specified as text strings or numbers. Groups of populations can now be specified by adding a third column to the population map for each individual and listing the group they belong to (again as a text string or number). Bugfix: allow batch IDs of 0 in populations and genotypes. Bugfix: in populations, changed VCF output to be ordered by basepair. Bugfix: in populations, change value of expected homozygosity to be set to 1 - expected heterozygosity instead of 1 - Pi. Pi (computed as [1 - ((p choose 2) + (q choose 2) / (n choose 2))] and expected heterozygosity (2pq) can produce sligthly different estimates resulting in exp het + exp hom != 1. Stacks 1.12 - Jan 21, 2014 -------------------------- Bugfix: accidentally broke gzipped FASTQ support through a typo in gzFastq.h. Stacks 1.11 - Jan 09, 2014 -------------------------- Feature: changed build to work properly with g++ and clang, which is the native compiler on Apple's OS X. Feature: Added NheI restriction enzyme. Bugfix: changed logging in denovo_map.pl/ref_map.pl to write outputs from Stacks programs continuously instead of waiting until the program completed to write output to log file. Bugfix: corrected parsing of population map for gzipped input files for denovo_map.pl. Stacks 1.10 - Dec 10, 2013 -------------------------- Feature: Added phased output for PHASE and Beagle. The phased output writes multiple SNPs in a single RAD locus as an already phased haplotype, leaving PHASE and Beagle to only phase between these haplotypes, instead of having to re-phase SNPs from within the same RAD site. Bugfix: corrected the SNP genotype output for Beagle. Bugfix: Corrected PHP warnings; enabled scrolling in catalog.php for iframes. Bugfix: allele percentages from ustacks were off since ustacks was changed to load/unload read IDs from disk (Stacks 0.99995). Only the calculation of the percentages was affected, not the underlying algorithms. Stacks 1.09 - Oct 30, 2013 -------------------------- Feature: added export support for F2 and backcross map types for Onemap to genotypes. Feature: added EaeI, ClaI, and TaqI restriction enzymes to process_radtags. Feature: changed populations bootstrap to use AMOVA Fst. Feature: added bootstrap whitelist to populations, so users can restrict the loci that are bootstrapped to a particular set (e.g. on a single chromosome). Bugfix: modified PHASE output so that SNPs are ordered properly. Previously, although RAD loci are ordered properly, some individual SNPs between RAD loci could still be output out of order. Bugfix: corrected onemap CP output so that B3.7 markers are output as "ab", not "2ab". Stacks 1.10.Beta1 - Sept 30, 2013 --------------------------------- Feature: completed implementation of rxstacks. Bugfix: when merging a homozygous locus into the catalog, if homozygous allele conflicted with existing catalog SNP alleles, new allele was not added to SNP object (but was added to the allele list). Bugfix: found small memory leak in cstacks - old SNP objects were not being freed when new SNPs were merged into the catalog. Bugfix: empty alleles were being output to the batch_X.catalog.alleles file by cstacks. Did not affect the function of the program. Stacks 1.08 - Sept 24, 2013 --------------------------- Feature: added a FASTA output to populations to output the full locus sequence for each allele at each sample locus, applying any filters or whitelists supplied to populations. Stacks 1.07 - Sept 23, 2013 --------------------------- Bugfix: updated process_radtags to drop reads shorter than length limit when read trimming turned on. Bugfix: corrected build failures on Mac OS X due to Samtools' bam.h header conflicting with Stacks' Bam.h header when building on OS X's case insensitive file system. Feature: changed process_radtags to drop reads already shorter than limit if sequence truncation turned on. You can also specify the read length limit to drop reads if your data have already been trimmed. Bugfix: Updated VCF ouput, missing genotypes now reported as "./." instead of "." Bugfix: Updated VCF ouput, alleles reported on the negative strand are now complemented so their positive strand conterparts are reported and will align aginst a reference genome. Bugfix: Updated VCF ouput, "reference allele" is now always reported as most frequent allele. Stacks 1.06 - August 28, 2013 ----------------------------- Bugfix: Illumina FASTQ header specifying read pair could override internal enumeration of read pair if paired-end data was fed in as a single-end file. Bugfix: corrected locus starting base in reference-aligned data. Feature: refactored sort_read_pairs.pl to process input files one at a time, without retaining them in memory. The program should now be able to handle an arbitrary number of samples. Feature: sort_read_pairs.pl can now read gzipped files directly. Stacks 1.05 - August 17, 2013 ----------------------------- Bugfix: adapter filtering code in process_radtags/process_shortreads bit rotted and was not properly functioning. Switching from deprecated hash function to TR1 hash broke the expected hashing behavior for char *. Bugfix: modified process_radtags/process_shortreads to handle single adapters when processing paired-end data (previously you had to specify two adapters for paired data). Bugfix: corrected barcode-specific counters in process_radtags/process_shortreads. Overall counts were correct but counts for barcodes were off due to shuffling of code that happened with support of combinatorial barcodes. Stacks 1.04 - July 25, 2013 --------------------------- Bugfix: process_radtags was not properly handling index_index and inline_inline barcode types. Bugfix: the hindIII restriction enzyme sequence was incorrectly specified in renz.h. Bugfix: ustacks wasn't properly removing file suffix when gzip files are processed. Stacks 1.03 - June 28, 2013 --------------------------- Bugfix: non-barcoded data were not being handled properly by process_radtags/process_shortreads. Stacks 1.02 - June 24, 2013 --------------------------- Bugfix: single-end barcode, double-digested data were not being handled properly by process_radtags causing a crash. Feature: added support for PLINK and Beagle output files from the populations program. Feature: Modified the minor allele frequency (MAF) filter to remove polymorphic nucleotide SNPs from Stacks output on a per-population basis. So, if a second allele is present at a frequency below the MAF, that nucleotide site is not output (although other sites at the same RAD locus could still be output). Bugfix: Tri-allelic loci were being output into the STRUCTURE, GENEPOP and PHASE output (but not in sumstats or Fst). Stacks 1.01 - June 07, 2013 --------------------------- Bugfix: an off-by-one error was preventing haplotypes from being verified by sstacks if a SNP occurred in the last position of the read. This could cause tags to fail to match to the catalog if there is a SNP in the final position. Stacks 1.0 - June 06, 2013 -------------------------- Feature: added XbaI and BamHI restriction enzymes to process_radtags. Feature: added code to output genotypes in PHASE/fastPHASE format. Feature: extended combinatorial barcodes support so one can process single-end data that contains both an inline and indexed barcode. Feature: added command line option and supporting code to cstacks to allow samples to be added to an existing catalog. Feature: refactored command line handling in denovo_map.pl and ref_map.pl to be much more flexible. Arbitrary command line options can now be passed to particular pipeline programs using the -X flag. Feature: for genetic maps, catalog may now be constructed out of mulitple parents, genotypes is smart enough to cross check the parents used to construct the catalog against those submitted to genotypes for producing a map. Will allow for a single catalog to be used across a series of crosses so all maps share the same catalog IDs. Feature: added option to genotypes to import manual corrections exported from Stacks SQL database. Feature: added --log_fst_comp option to populations to log components of the Fst calculations to a file for debugging / testing purposes. Bugfix: corrected handling of files in kmer_filter. Adding support for gzipped files broke file handling in some cases. Stacks 0.999991 - May 14, 2013 ------------------------------ Feature: changed populations to use AMOVA Fst for batch_1.fst_summary.tsv file. Previously it used the Binomial Fst. Bugfix: If --write_single_snp not specified, Structure output was not naming loci properly (it was naming each SNP from the same RAD locus using the same ID, instead of differentiating each SNP in each RAD locus). Feature: Added Sau3AI and SexAI restriction enzymes. Fixed bug in specificaion of MseI, MspI enzymes. Bugfix: changed VCF and Fst code in populations to output SNPs from reads aligned to the negative strand on a reference genome correctly. Stacks 0.99999 - May 06, 2013 ----------------------------- Bugfix: process_shortreads/process_radtags not working with non-barcoded data. Stacks 0.99998 - May 01, 2013 ----------------------------- Feature: Added option to sort_read_pairs.pl to output FASTQ if desired. Bugfix: make sort_read_pairs.pl understand new file naming scheme. Feature: added mseI, mspI restriction enzymes to process_radtags. Bugfix: corrected sphI cutsite sequence in process_radtags. Bugfix: stopped "uninitialized value" errors in export_sql.pl when marker type is undefined for a particular map. Stacks 0.99997 - April 01, 2013 ------------------------------- Bugfix: paired barcode could become uninitialized on second pair of files in process_radtags/process_shortreads causing all barcodes to mismatch. Made Read class explicitly initialize everything. Stacks 0.99996 - March 24, 2013 ------------------------------- Feature: major overhaul of the process_radtags / process_shortreads programs to support combinatorial barcodes and double-digested data. Programs now support a mixture of barcodes from single-end inline or index barcodes, to mixtures of inline/index barcodes. 1) changed naming scheme for process_radtags/process_shortreads output files for paired reads. Changed file suffix to properly be ".fq" or ".fa", with paired-reads named sample_XXX.1.fq and sample_XXX.2.fq instead of the previous ".fq_1" and ".fq_2". 2) Paired-reads remain synced in output files, with sinlgetons written to sample_XXX.rem.1.fq and sample_XXX.rem.2.fq. 2) changed Phred+33 to be the default encoding scheme (previously was the now deprecated Phred+64) 3) Combinatorial barcdoes are specified as --inline_index or --inline_inline among a number of other supported possibilities. Barcodes are listed in the barcode file as either a single column or two, tab-separated columns. 4) Two restriction enzymes can now be specified via --renz_1 and --renz_2 to have the program check (and correct) the restriction enzyme cut site on the first and second read respectively. 5) programs now properly ignore files starting with "." which is required for Mac OS X's ".DS_Store" files and for "." and ".." on Linux. Bugfix: processing paired-end data with process_radtags could incorrectly alter the first few nucleotides of the paired-read when correcting barcodes. Bugfix: two regressions were fixed in process_shortreads causing all reads to be improperly trimmed. Bugfix: VCF output did not include sites fixed within and variable among populations. Bugfix: changed the parsing code to accept a wider range of Illumina named, paired-end files in process_radtags/shortreads. Bugfix: gzipped files were not read properly in process_radtags/shortreads when a directory was specified with -P. Bugfix: setting secondary read distance to 0 in ustacks (-N) was ineffective. Bugfix: changed the PHP code to remove 'Strict Standards' warnings and a few other warnings. Thanks to Yue Yu for tracking down the proper changes to avoid the warnings. Stacks 0.99995 - February 19, 2013 ---------------------------------- Feature: added support for using Google's Sparsehash Object: http://code.google.com/p/sparsehash/ If enabled at compile time, this object will replace all the hash maps with Google's sparsehash saving significant memory. Feature: removed the -S command line option from cstacks and sstacks. These programs now read this ID directly from the Stacks input files. Feature: altered ustacks to no longer store FASTQ/FASTA IDs from input files in memory to lower memory usage. Instead, an integer representing the read is stored and the IDs are read back in from disk just before results are written. Feature: added the '--write_single_snp' option to populations. When writing Genepop or Structure files this option will cause populations to write just the first SNP per locus to the file, avoiding potential problems with linked SNPs originating from the same locus. Feature: compressed the Hval/Stack/Rem objects to remove convenience integer variables to save memory. Feature: updated Stacks programs to use the newer TR1 unordered_map hash object instead of the deprecated SGI hash_map object. Bugfix: fixed a memory leak in cstacks in which not all of the Locus Class elements was being properly freed (only the SNP objects were being freed). Bugfix: Added code to denovo_map.pl/ref_map.pl to remove from the logfile the 'counter' lines that printed when initially loading radtags data. Stacks 0.99994 - February 12, 2013 ---------------------------------- Bugfix: process_radtags/process_shortreads, when adding support for reads of different length, I clobbered the sequence truncation option. Fixed this regression. Bugfix: the kernel smoothing algorithms for calculating Fst, Pi, and Fis could sometimes segfault as some RAD sites can overlap. Added code to find and describe overlapping RAD sites and report these to the user. Stacks 0.99993 - January 30, 2013 --------------------------------- Feature: process_radtags/process_shortreads/ustacks can now read gzipped Fasta/Fastq input files. Feature: ref_map.pl/pstacks now supports the use of BAM alignment files. This feature is optional and must be enabled during compilation. It requires the Samtools library to be installed. Bugfix: When using referenced aligned data, soft-masked alignments (Ns) were getting imporperly injected into the SNP models, which would call them as Homozygous Ns, and this data would eventually be passed to the summary statistics in populations, which would make errant Fst calculations. Bugfix: In rare cases, sequences aligned to the negative strand had their base pair positions slightly off, this could cause a segfault during populations' kernel-smoothed Fst calculations. Bugfix: In populations, fixed a rare, infinite loop condition in Fisher's exact test for Fst calculations. Could occur due to a floating point rounding error when calculating allele frequencies for Fst calculation. Stacks 0.99992 - January 8, 2013 -------------------------------- Bugfix: floating point command line options were not being processed correctly and may have been truncated. Stacks 0.99991 - December 17, 2012 ---------------------------------- Feature: process_shortreads and process_radtags can now filter for adapter sequence in raw data, trimming (process_shortreads) or discarding (process_shortreads/process_radtags) it. Mismatches to the adapter sequence are allowed to accomodate for sequencing error. Bugfix: added --merge flag to process_shortreads/process_radtags to handle regression where unbarcoded data should be merged together into single output files. Bugfix: code in cstacks to characterize differentially fixed SNPs was only running with -n > 0, but should also run by default if -g is specified. Feature: made automated correction thresholds for the genotypes program accessible from the command line, including --min_hom_seqs, --min_het_seqs, and --max_het_seqs options. Feature: refactored clone_filter to be more functional. Now can output sequences in FASTA or FASTQ (FASTA will save memory). Keeps sequence headers intact, can capture discarded reads, and prints a distribution of the number of cloned read pairs. Bugfix: Remainder reads weren't being written properly as the file handles weren't properly closed. Bugfix: Processing paired reads with process_radtags/process_shortreads was not functioning correctly, barcode was not being transferred properly from P1 to P2 read. Regression introduce Aug 21, 2012. Feature: added support for OneMap CP map export in genotypes. Bugfix: Fixed some bugs in pstacks/ustacks command line processing involving --alpha and --model_type. Bugfix: several bugs in the exact and approximate bootstrap algorithms were corrected. These algorithms are now robust. Bugfix: Added code to ensure command line IDs are in fact integers. Bugfix: fixed nucleotide positions were not being tallied across populations properly resulting in an incorrect value for number of sites and percent polymorphic sites in the sumstats_summary file. Bugfix: pstacks could identify a locus that despite having SNPs would have no haplotypes generated. This would late cause sstacks to segfault. Added code in pstacks to blacklist these loci and code in sstacks to catch this case and not segfault, now will print a warning. Stacks 0.9999 - October 03, 2012 -------------------------------- Feature: two bootstrapping procedures have been introduced into the populations program to determine the statistical significance of kernel smoothed windows. These algorithms are controlled by the --bootstrap and --bootstrap_reps command line options. Feature: summary summary statistics are now written for all populations, giving the mean, variance, and standard error for each of the population-specific summary stats. In addition, private alleles are identified and marked in the sumstats file, and summarized across populations. Number and percent of polymorphic loci are also reported. The actual variable nucleotides at each site are now reported in the sumstats file. Feature: the populations program can now generate kernel-smoothed values for Fis and Pi, in addition to the current support for Fst. Feature: the populations program can now output SNP data for use in the program Structure. Feature: various sections of the populations program have been parallelized. Feature: the populations program can now output SNP data in the Phylip file format. If --phylip is specified, the populations program will identify SNPs that are fixed within populations, but variable between populations and output these in a Phylip file. This file can then be fed into any phylogenetics program, such as PhyML. This feature is equivalent to the analysis done in Emerson, et al., 2010. In addition, if the --phylip_var flag is specified as well, variable sites within populations are encoded into the Phylip file using standard alternative nucleotide encodings. Feature: for ustacks/pstacks, the alpha significance level can now be specified on the command line. Specifying --alpha to ustacks or pstacks will set the chi square significance level to determine whether a heterozygous or homozygous model call is statistically significant. Legal values of alpha are 0.1, 0.05 (the previous default), 0.01, or 0.001. Feature: for ustacks/pstacks, a new bounded SNP calling model has been introduced, allowing limits to be set on the error rate. This model allows the calling of SNPs to be affected by prior knowledge as to how likely polymorphism is in the data set. This behavior is controlled by the --bound_low and --bound_high parameters to ustacks and pstacks. Feature: additional sections of ustacks has been parallelized. In addition, stack merging has been changed to occur in a single step (instead of in rounds as done previously). Feature: the deleveraging algorithm in ustacks has been replaced with a simple algorithm based on a minimum spanning tree. A new parameter has been introduced, --max_locus_stacks, which controls the number of stacks allowed to be merged together into a single locus. Loci that contain more than --max_locus_stacks stacks are set aside and not added to the catalog later on. Feature: export_sql.pl now has two depth parameters, allele and locus depth, allowing for the filtering of loci based on either one. Feature: added a 'dry run' flag (-d) to denovo_map.pl and ref_map.pl to allow the pipeline to be tested to see what it would execute, before actually executing any programs. Bugfix: problem with the FASTA parser fixed (it was introduced with fixes to handle windows-style files). Bugfix: sample counts where off in batch_*.haplotypes.tsv file generated by populations program. Stacks 0.9996 - August 24, 2012 ------------------------------- Bugfix: fixed significant memory leak in Kmer hashing for both ustacks and cstacks. Results in an approximately 3.4x reduction in memory use for cstacks, and an approximately 1.6x reduction in ustacks. Feature: process_radtags and process_shortreads can handle non-Illumina FASTQ headers (any generic FASTQ type). Feature: process_radtags can process data without barcodes. Feature: process_radtags and process_shortreads can handle Illumina barcodes, when the barcode is not inline but is instead provided in the FASTQ header. Bugfix: Corrected the behavior of the '-m' parameter to populations and genotypes. It is meant to apply to the total depth of a stack at a locus, but was instead being applied to the depth of each allele at each locus. Feature: process_radtags and process_shortreads can now automatically discard reads marked as 'failed' by Illumina's chastity/purity filter. Feature: added ecoT22I, mluCI, nlaIII, and sphI restriction enzymes to process_radtags Bugfix: modified Stacks programs to handle Windows-style line endings ('\r\n') from FASTQ, FASTA, and SAM files as well as population maps. Bugfix: changed populations' genepop output to only include loci that are variable in the populations specified. Previously, in some cases, additional fixed loci were included, which are not included in the VCF output, causing the two files to have different loci present. Bugfix: expected homozygosity and observed homozygosity were not being reported correctly in the sumstats files. The other population statistics were not affected by the bug. Feature: process_radtags and process_shortreads now print command and time executed to log file. Stacks 0.9995 - July 05, 2012 ----------------------------- Bugfix: Fst summary matrix was being incorrectly written. Stacks 0.9994 - July 01, 2012 ----------------------------- Feature: the populations program can now write a file in the GenePop format. GenePop files can be read by the GenePop program and converted for other population genetics programs such as Arlequin. Caution: you may not be able to include all loci from a Stacks run in the output as these programs aren't necessarily capable of handling such a volume of data. However, you can use populations' whitelist feature to only include certain loci in the output. Feature: the populations program now writes an Fst summary file providing a matrix of mean Fst measures for each pair of populations in the analysis. Feature: added two filters to populations to require a locus to be present in a certain percentage of individuals in a population, and requiring a locus to be present in a certain number of populations. If the former criteria is not reached, the locus is zeroed out only in the specific population, if the latter criteria is not met, the locus is discarded from the analysis. Feature: three Fst corrections are now provided by the populations program: requiring a locus to have a significant p-value (smaller than 0.05, although its configurable), applying a Bonferroni correction according to the number of data points in the sliding window, and applying a Bonferroni correction according to the number of data points in the genome. Loci that fail to reach statitical significance in each case are considered not different from zero and are set to zero. Feature: a filter can be specified to the populations program requiring a minimum allele frequency (MAF) at a locus to consider the locus variable. If an allele at a locus is below the MAF, the locus is considered fixed. Feature: when using a reference genome, Stacks can now work with samples of different sequence lengths. This means one can combine samples generated from different Illumina runs of different length. Each individual sample must be of the same length internally, however. Feature: pstacks can now handle gapped alignments properly. It parses the CIGAR string in the SAM file and inserts/removes Ns to accomodate indels and soft-masked alignment fragments. This prevents the SNP model from mistakenly calling polymorphisms due to indel frameshifts. Bugfix: Removed O(n^2) algorithm from Sliding window Fst calculation in populations program, significant speedup acheived. Bugfix: Updated load_radtags.pl to support population types and to import sumstats, fst, and genotypes files. Bugfix: fixed a small memory leak in DNANSeq. Stacks 0.9993 - June 07, 2012 -------------------------------- Feature: Added Fisher's Exact Test statistics to Fst estimates. This provides a p-value, an odds ratio along with a 95% confidence interval and a Log of Odds (LOD) score for each Fst estimate. These statistics allow one to decide if a particular Fst measurement is significant. Feature: denovo_map.pl and ref_map.pl now import population statistics files into the database (fst and sumstats files). Feature: Web interface now displays summary statistics and Fst values for every locus. Feature: population names can now be directly added through the web interface and they will be stored in the database and propogated. Stacks 0.9992 - May 22, 2012 -------------------------------- Bugfix: fixed massive memory leak in Fst calcuations in populations program. Bugfix: if using a population map to calculate Fst in the populations program, some individuals could be inadvertently attributed to the wrong populations, due to a mismatch between the indices of the population map (PopMap.h) and the indexes recorded for making the population summary (PopSum.h). Feature: population map can now be specified to denovo_map.pl and ref_map.pl. This data is populated into the database and samples are displayed according to their population in the web interface. Feature: improved denovo_map.pl and ref_map.pl to check for existence of input files. Bugfix: export_sql.pl wasn't properly using the new filters that use a lower and upper bound (snps, alle, pare). Feature: improved how values are generated for web-based filters, allowing for larger populations/maps. Improved HTML rendering for extremely long haplotype strings. Bugfix: corrected alleles to be output as "unphased" in VCF file; corrected homozygotes to be printed as diploid values, e.g. '0/0' or '1/1' instead of just '0'. Bugfix: changed reporting of SNPs on samples.php page to specify total number of SNPs and the number of polymorphic loci (containing one or more SNPs). Bugfix: an extra tab was being placed in the VCF output file. Feature: added flag to process_radtags to disable checking the integrity of the RAD site in each raw read. Added a flag to allow more nucleotide mismatches in the barcode when rescuing barcodes. Stacks 0.9991 - April 17, 2012 -------------------------------- Bugfix: replaced bit-rotted code causing all nucleotides to be masked as 'N' when fixed model engaged on ustacks. Stacks 0.999 - April 11, 2012 -------------------------------- Feature: Added support for the 1000 Genomes Project, Variant Call Format (VCF) in the populations program. (http://www.1000genomes.org/node/101). This file output includes the genotype calls for every individual for each locus, allele depth, and likelihood values for heterozygous SNP calls. Feature: implemented a three-bit compression scheme so that uncalled bases ('N's) can be stored in compressed format in pstacks. Other stacks programs currently use two-bit compression which is more compact, but can only store plain nucleotides ('A', 'C', 'G', 'T'). This restores earlier behavior that allowed Ns in pstacks prior to the implementation of the two-bit compression scheme. Bugfix: the populations program was only outputing sites to the summary statistics file (*.sumstats.tsv) if they were heterozygous in a population. This could give the impression that the same site may be absent in other populations when in reality it was simply fixed in the other populations. Now, if a site is heterozygous in any of the populations, it will be output for all populations. Bugfix: added lots of error checking code to populations so it properly handles poorly formatted population maps, missing files, and similar errors. Bugfix: added uncalled bases ('n', 'N', and '.') to the reverse complement function (reads aligned on the negative strand and processed by pstacks will be stored reverse complement. Bugfix: updated the PHP code as well as export_sql.pl to properly use the new filters for chromosome, basepair, as well as lower and upper ranges to various filters. Other: Removed the deprecated markers.pl, genotypes.pl, and process_radtags.pl programs from the distribution. Stacks 0.998 - January 06, 2012 -------------------------------- Feature: Pipeline is now aware if samples are submitted as a 'population' or a 'mapping cross'. A new command line option, -s, has been added to denovo_map.pl and ref_map.pl that will label the dataset as a population. The -p/-r flags continue to keep the samples as a mapping cross. Feature: The web interface has been updated to display more information specific to populations. The filtering code has been changed to include lower and upper limits for filter fields such as SNPs, alleles, and number of parents/samples. Feature: A new program, populations, has been written to be executed in place of the exisiting genotypes program when a population is being processed through the pipeline. A map specifiying which individuals belong to which population is submitted to the program and the program will then calculate population genetics statistics, expected/observed heterzygosity, Pi, and Fis at each nucleotide position. Feature: the populations program will compare all populations pairwise to compute Fst. If a set of data is reference aligned, then a kernel-smoothed Fst will also be calculated. These statistics were originally designed by Paul Hohenlohe and Bill Cresko, and are described in the paper: Population Genomics of Parallel Adaptation in Threespine Stickleback using Sequenced RAD Tags, http://www.plosgenetics.org/article/info%3Adoi%2F10.1371%2Fjournal.pgen.1000862 They have been implemented independently in Stacks. Feature: added the DpnII enzyme to the process_radtags program. Feature: Added new 'model' line to *.tags.tsv files. This line records the output of the SNP model at every position in the read as either Homozygous (O), Heterozygous (E), or unknown (U). Previously only polymorphic loci were recorded in the SNPs file (and this remains unchanged). The model output line is now also available in the web interface. Bugfix: fixed crasher bug in cstacks when parallel processing was enabled for genomic-aligned data. Bugfix: allele depths are now properly reported in reference-aligned data. Stacks 0.997 - November 22, 2011 -------------------------------- Feature: new program, called clone_filter, that will take a set of paired-end reads and reduce them according to PCR clones (a PCR clone is a pair of reads that match exactly, while paried-end reads from two different DNA molecules will nearly always be slightly different lengths). Feature: new program, called kmer_filter, that allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data. Feature: new program, called process_shortreads, performs the same task as process_radtags for fast cleaning of randomly sheared genomic or transcriptomic data (a 'beta' version of this program has actually been distributed in the last few Stacks releases). Feature: the Stacks tags.tsv file format has a new column to record the DNA strand that a particular read is aligned to, currently only used in datasets aligned to a reference genome. Feature: pstacks now reverse complements all stacks aligned to the negative strand and stores them in this orientation in the output files and database. (All aligners always present these reads in the positive orientation.) This change allows one to align reads to a reference genome using a gapped aligner, such as Tophat or GSNAP and have the RAD site still align with genomic data. (One can then compare genomic RAD tags along with cDNA RAD tags.) Feature: added the '-d' flag to export_sql.pl to export allele depths from the database. Feature: altered process_radtags to store orphaned, paired-end reads in a remainder file, keeping paired-reads in frame. Bugfix: fixed the handling of the paired-end barcode in process_shortreads, added a check to make sure the barcodes from both pairs of a read match. Bugfix: genotypes was not capitalizing auto-corrected genotypes in the generic format (it was in joinmap/rqtl specific formats). Bugfix: corrected cut site sequence for ApeKI in process_radtags. Bugfix: process_radtags inadvertantly used newly initialized memory that had not been cleared, causing rare parsing errors when uncleared memory resembled portions of a FASTQ record. Bugfix: the default MySQL permissions were not being properly passed to index_radatags.pl. Bugfix: changed load_radtags.pl to extract parental IDs from directly catalog files, instead of relying on file names. Feature: added a 'dry run' option to load_radtags.pl so it will print what it intends to do without actually doing it. Stacks 0.996 - October 5, 2011 --------------------------------- Web interface updates: * If the RAD tags are aligned to a reference genome, a filter is now available to view markers from a particular genomic region. * The individual RAD tag viewer now scrolls while keeping the scale view and consensus sequence always visible. * The RAD tag viewer now highlights columns for which the catalog locus shows a SNP, but the RAD tag does not. * In the genotype viewer, the map between the haplotype and genotype is now available. * The depth of each RAD tag is now visible in the genotype viewer. * The genotype viewer has now been integrated with the observed haplotype viewer. You can make changes/corrections to genotypes directly now, no need to submit a form and wait for the page to reload. Bugfix: process_radtags wasn't properly parsing the names of v1 Illumnina BUSTARD files. Bugfix: process_radtags wasn't counting the total number of barceded paired-end reads correctly. Bugfix: sstacks' impute_haplotype() was causing spurious matching in some, error-based cases. Bugfix: build system was not properly replacing the _PKGDATADIR_ variable in denovo/ref_map.pl programs. Stacks 0.995 - September 23, 2011 --------------------------------- Feature: sstacks can now handle samples and catalogs that have different length reads. Each individual sample must be constructed from the same length reads (by ustacks and cstacks), but between samples there can be different lengths, e.g. a catalog of length 50bp and samples of length 100bp, or vice versa. Feature: Added the ApeKI restriction enzyme to process_radtags Feature: process_radtags can now capture discarded reads to a file. Bugfix: a coding limitation was removed that required polymorphic sites in the catalog to contain only two alleles. Now, all four alleles can be recorded at a single site in a locus in the catalog. Bugfix: Exporting results from the web interface was not including manual genotype corrections when requested. Stacks 0.994 - August 08, 2011 ------------------------------ Feature: added catalog index structure to cstacks to speed construction of catalog when using reference-aligned sequences. Feature: added a new output type, 'genomic' to genotypes. Outputs SNPs individually, encoded as a set of integers, for reference-aligned reads. Bugfix: pstacks was not writing individual stack sequences properly. Bugfix: process_radtags was still checking the quality of sequence that was destined to be truncated off the read. Bugfix: process_radtags segfault fixed, parsing stop position mis-specified in parse_input_record(). Stacks 0.993 - August 05, 2011 ------------------------------ Memory usage optimization: Individual sequence reads are now stored internally using a 2-bit encoding of DNA nucleotides. Some simple benchmarking of ustacks (previous version / new version): Sample size Elapsed Time Used Memory ------------- ----------------- ------------- 3.78m reads 3:16 / 3:23 4.64G / 1.86G 17.62m reads 1:31:21 / 1:43:54 55.55G / 45.42G Feature: Added the programs sort_read_pairs.pl, exec_velvet.pl, load_sequences.pl to facilitate the assembly of paired-end RAD-Tags into mini-contigs and allow them to be uploaded into and viewed from the web interface. Bugfix: made process_radtags emit an error when an unrecognized restriction enzyme is specified. Bugfix: made process_radtags accept barcodes with trailing whitespace, such as would be seen in a DOS text file or if errant tabs are specified. Stacks 0.992 - July 04, 2011 ---------------------------- Feature: process_radtags can now handle Phred+33 or Phred+64 encodings, Phred+33 is the new default encoding in Illumina's CASAVA software (v1.8). Bugfix: Changed the sql input parser to handle variable length input lines. Necessary if loading tens of individuals into a catalog. Bugfix: Added command line options to ustacks to better control the use of secondary reads in the stack-building procedure. Stacks 0.991 - June 06, 2011 ---------------------------- Bugfix: genotypes was failing to parse Stacks output files with unanticipated names. Bugfix: when using ref_map.pl, tags without SNPs were failing to match against the catalog. Stacks 0.99 - May 20, 2011 -------------------------- *A new C++ genotypes program has been added. This program works independently from the database allowing the pipeline to fully function without installing the database. The new program performs the tasks once completed by markers.pl and genotypes.pl. - The pipeline has been modified to now automatically execute the genotypes program as the last stage in an analysis. It will generate a file containing the observed haplotypes and an additional file containing a map-agnostic set of genotype calls. - If SQL interaction is enabled, the genotypes will be imported to the database and serve as a base to export genotypes directly from the web interface for a particular map and using the set of filters available online. - If a population is being examinined, the observed haplotypes file can be imported into Microsoft Excel or another tab-separated file viewer to immediately see the results. - By replacing the Perl version of genotypes.pl we also no longer need to install or worry about the caching mechanism for auto-correcting stacks, the C++ version can do this by directly reading the Stacks output files. *markers.pl and genotypes.pl are now deprecated and will no longer be supported. *Feature: When exporting observed haplotypes, you can now specify a minimum stack depth to include a particular individual at a locus. *Feature: map-specific genotypes can now be exported directly from the database/web server. *Bugfix: genotypes.pl: make script ignore parental genotypes based on the sample type from the MySQL table, not based on the file name. *Bugfix: genotypes.pl: some loci were sneaking in despite being under the progeny limit. *Bugfix: made process_radtags Bustard file parser check number of fields to prevent attempting to parse FASTQ (and segfaulting). Thanks to Maureen.Liu -at- nottingham.ac.uk for reporting it. *Bugfix: in sstacks, when matching to the catalog using reads aligned to a reference genome (-g), sstacks did not verify that haplotypes matched exactly, causing some spurious matching, which later translated into dropped genotypes. *Bugfix: in markers.pl, the ratio observed alleles in the progeny was not being tallied correctly for ab/ac markers. Stacks 0.984 - May 04, 2011 --------------------------- *Bugfix: renamed constants.php to constants.php.dist to avoid overwriting an existing file on reinstallation. *Feature: process_radtags has been converted to a C++ program increasing its speed by approximately 25x. The parameters were modified to be a little more intuitive and parameters were added to control the size and score limit of the sliding window. The program can process a GAII lane in about 5 minutes, a HiSeq lane in about 12 minutes, depending on the hardware used. Stacks 0.983 - Apr 30, 2011 --------------------------- *Bugfix: sstacks segfault when running parallelized. Improper insertion into map object when it should have only been checking for element presence/absence. Thanks to for first reporting it. *Feature: added code to impute the genotype of a missing, second parent for some map types. This code adds up all the observed haplotypes in the progeny and then compares their frequencies against those that would be expected for the marker under Hardy-Weinberg equilibrium, choosing the marker type that best fits the Hardy-Weinberg expectation. Stacks 0.982 - Mar 29, 2011 --------------------------- *Bugfix: process_radtags.pl was not properly parsing FASTQ formated, paired-end file names. *Bugfix: counts of matching parents/progeny were sometimes incorrect due to a slightly promiscuous SQL query in index_radtags.pl. Stacks 0.98 - Feb 25, 2011 --------------------------- Note: if you have pre-existing databases, you must rebuild the catalog index (index_radtags.pl -D db -c) so that they are compatible with the new elements of the web interface. *Added option to pstacks to require a minimum depth of coverage for a stack aligned to the refernce genome before reporting it. *Added double haploid (DH) and F2 export types to the genotypes.pl script. *Added option to output any map in R/QTL output in genotypes.pl *Added feature to filter by number of available genotypes in progeny *Added command line option to ustacks to capture and output unused reads. *Added display of chromosome/base pair to web interface for stacks aligned to a reference genome. *Bugfix: FASTA parser was missing records due to a bug introduced from a FASTQ parser fix. *Bugfix: process_radtags.pl was not properly checking the integrity of the RAD site after adding restriction enzymes with alternate nucleotides. *Bugfix: when constructing the catalog, some tags being added to the did not have their genomic location transferred over to a new catalog tag. *Modified sstacks to include an option to match stacks against the catalog based on the genomic location (assuming individuals were processed with pstacks). *Bugfix: Lots of clean-ups and command line option fixes, thanks to . Stacks 0.971 - Jan 30, 2011 --------------------------- *Illumina software version 1.3 produces Phred scores that can begin with a '@' character, throwing off the FASTQ parser. Added code to clear the read buffer in between records to solve the problem. Thanks to Aarti for finding the bug. Stacks 0.97 --------------------------- *ustacks now detects when there are uncalled nucleotides in FASTA or FASTQ input files, replaces those bases with 'A'. *process_radtags.pl now detects barcode length automatically. Removed spurious error messages when no data is processed. Stacks 0.96 - Jan 7, 2011 --------------------------- *Fixed typo in README giving the wrong file path for the Apache configuration file. *Fixed several hard-coded paths in PHP files that referred to our local system. stacks-1.35/config/000755 000765 000024 00000000000 12574070564 014761 5ustar00catchenstaff000000 000000 stacks-1.35/config.h.in000644 000765 000024 00000006624 12571641562 015546 0ustar00catchenstaff000000 000000 /* config.h.in. Generated from configure.ac by autoheader. */ /* Define to 1 if the `closedir' function returns void instead of `int'. */ #undef CLOSEDIR_VOID /* Enable compilation with Samtools BAM library */ #undef HAVE_BAM /* define if the compiler supports basic C++11 syntax */ #undef HAVE_CXX11 /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_DIRENT_H /* Define to 1 if you have the header file. */ #undef HAVE_FLOAT_H /* Define to 1 if you have the `floor' function. */ #undef HAVE_FLOOR /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if you have the `gomp' library (-lgomp). */ #undef HAVE_LIBGOMP /* Define to 1 if you have the `z' library (-lz). */ #undef HAVE_LIBZ /* Define to 1 if you have the header file. */ #undef HAVE_LIMITS_H /* Define to 1 if your system has a GNU libc compatible `malloc' function, and to 0 otherwise. */ #undef HAVE_MALLOC /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Define to 1 if you have the `memset' function. */ #undef HAVE_MEMSET /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_NDIR_H /* Define to 1 if you have the `pow' function. */ #undef HAVE_POW /* Define to 1 if your system has a GNU libc compatible `realloc' function, and to 0 otherwise. */ #undef HAVE_REALLOC /* Enable compilation with Google Sparsehash */ #undef HAVE_SPARSEHASH /* Define to 1 if you have the `sqrt' function. */ #undef HAVE_SQRT /* Define to 1 if stdbool.h conforms to C99. */ #undef HAVE_STDBOOL_H /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_SYS_DIR_H /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_SYS_NDIR_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to 1 if the system has the type `_Bool'. */ #undef HAVE__BOOL /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Version number of package */ #undef VERSION /* Define to empty if `const' does not conform to ANSI C. */ #undef const /* Define to rpl_malloc if the replacement function should be used. */ #undef malloc /* Define to rpl_realloc if the replacement function should be used. */ #undef realloc /* Define to `unsigned int' if does not define. */ #undef size_t stacks-1.35/configure000755 000765 000024 00000626027 12571641547 015442 0ustar00catchenstaff000000 000000 #! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.69 for Stacks 1.35. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then _as_can_reexec=no; export _as_can_reexec; # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 as_fn_exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : else exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1" if (eval "$as_required") 2>/dev/null; then : as_have_required=yes else as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir/$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : CONFIG_SHELL=$as_shell as_have_required=yes if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : break 2 fi fi done;; esac as_found=false done $as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : CONFIG_SHELL=$SHELL as_have_required=yes fi; } IFS=$as_save_IFS if test "x$CONFIG_SHELL" != x; then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi if test x$as_have_required = xno; then : $as_echo "$0: This script requires a shell more modern than all" $as_echo "$0: the shells that I found on your system." if test x${ZSH_VERSION+set} = xset ; then $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall # in an infinite loop. This has already happened in practice. _as_can_reexec=no; export _as_can_reexec # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME='Stacks' PACKAGE_TARNAME='stacks' PACKAGE_VERSION='1.35' PACKAGE_STRING='Stacks 1.35' PACKAGE_BUGREPORT='' PACKAGE_URL='' ac_unique_file="src/ustacks.cc" # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS OPENMP_CFLAGS EGREP GREP CPP HAVE_CXX11 am__fastdepCC_FALSE am__fastdepCC_TRUE CCDEPMODE ac_ct_CC CFLAGS CC am__fastdepCXX_FALSE am__fastdepCXX_TRUE CXXDEPMODE am__nodep AMDEPBACKSLASH AMDEP_FALSE AMDEP_TRUE am__quote am__include DEPDIR OBJEXT EXEEXT ac_ct_CXX CPPFLAGS LDFLAGS CXXFLAGS CXX SPARSEHASH_CFLAGS BAM_LIBS BAM_CFLAGS AM_BACKSLASH AM_DEFAULT_VERBOSITY AM_DEFAULT_V AM_V am__untar am__tar AMTAR am__leading_dot SET_MAKE AWK mkdir_p MKDIR_P INSTALL_STRIP_PROGRAM STRIP install_sh MAKEINFO AUTOHEADER AUTOMAKE AUTOCONF ACLOCAL VERSION PACKAGE CYGPATH_W am__isrc INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking enable_silent_rules enable_bam with_bam_include_path with_bam_lib_path enable_sparsehash with_sparsehash_include_path enable_dependency_tracking enable_openmp ' ac_precious_vars='build_alias host_alias target_alias CXX CXXFLAGS LDFLAGS LIBS CPPFLAGS CCC CC CFLAGS CPP' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures Stacks 1.35 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/stacks] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF Program names: --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of Stacks 1.35:";; esac cat <<\_ACEOF Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-silent-rules less verbose build output (undo: "make V=1") --disable-silent-rules verbose build output (undo: "make V=0") --enable-bam Enable Samtools' use of BAM files (requires BAM library to be installed). --enable-sparsehash Enable the use of Google Sparsehash (must be installed). --enable-dependency-tracking do not reject slow dependency extractors --disable-dependency-tracking speeds up one-time build --disable-openmp do not use OpenMP Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-bam-include-path location of Samtools BAM headers, defaults to /usr/include/bam --with-bam-lib-path location of Samtools BAM library --with-sparsehash-include-path location of Google Sparsehash headers Some influential environment variables: CXX C++ compiler command CXXFLAGS C++ compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CC C compiler command CFLAGS C compiler flags CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to the package provider. _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF Stacks configure 1.35 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_cxx_try_compile LINENO # ---------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_compile # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link # ac_fn_c_try_cpp LINENO # ---------------------- # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp # ac_fn_c_try_run LINENO # ---------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes # that executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then : ac_retval=0 else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists, giving a warning if it cannot be compiled using # the include files in INCLUDES and setting the cache variable VAR # accordingly. ac_fn_c_check_header_mongrel () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if eval \${$3+:} false; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } else # Is the header compilable? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 $as_echo_n "checking $2 usability... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_header_compiler=yes else ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 $as_echo_n "checking $2 presence... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <$2> _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : ac_header_preproc=yes else ac_header_preproc=no fi rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( yes:no: ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; no:yes:* ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=\$ac_header_compiler" fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_mongrel # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile # ac_fn_c_check_type LINENO TYPE VAR INCLUDES # ------------------------------------------- # Tests whether TYPE exists after having included INCLUDES, setting cache # variable VAR accordingly. ac_fn_c_check_type () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main () { if (sizeof ($2)) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main () { if (sizeof (($2))) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else eval "$3=yes" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_type # ac_fn_c_check_func LINENO FUNC VAR # ---------------------------------- # Tests whether FUNC exists, setting the cache variable VAR accordingly ac_fn_c_check_func () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $2 (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef $2 /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $2 (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_$2 || defined __stub___$2 choke me #endif int main () { return $2 (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_func cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by Stacks $as_me 1.35, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo $as_echo "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo $as_echo "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then $as_echo "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then $as_echo "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h $as_echo "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_URL "$PACKAGE_URL" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then # We do not want a PATH search for config.site. case $CONFIG_SITE in #(( -*) ac_site_file1=./$CONFIG_SITE;; */*) ac_site_file1=$CONFIG_SITE;; *) ac_site_file1=./$CONFIG_SITE;; esac elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_aux_dir= for ac_dir in config "$srcdir"/config; do if test -f "$ac_dir/install-sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f "$ac_dir/install.sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break elif test -f "$ac_dir/shtool"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/shtool install -c" break fi done if test -z "$ac_aux_dir"; then as_fn_error $? "cannot find install-sh, install.sh, or shtool in config \"$srcdir\"/config" "$LINENO" 5 fi # These three variables are undocumented and unsupported, # and are intended to be withdrawn in a future Autoconf release. # They can cause serious problems if a builder's source tree is in a directory # whose full name contains unusual characters. ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. am__api_version='1.14' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AmigaOS /C/install, which installs bootblocks on floppy discs # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # OS/2's system install, which has a completely different semantic # ./install, which can be erroneously created by make from ./install.sh. # Reject install programs that cannot install multiple files. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 $as_echo_n "checking for a BSD-compatible install... " >&6; } if test -z "$INSTALL"; then if ${ac_cv_path_install+:} false; then : $as_echo_n "(cached) " >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. # Account for people who put trailing slashes in PATH elements. case $as_dir/ in #(( ./ | .// | /[cC]/* | \ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ /usr/ucb/* ) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then if test $ac_prog = install && grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : elif test $ac_prog = install && grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # program-specific install script used by HP pwplus--don't use. : else rm -rf conftest.one conftest.two conftest.dir echo one > conftest.one echo two > conftest.two mkdir conftest.dir if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && test -s conftest.one && test -s conftest.two && test -s conftest.dir/conftest.one && test -s conftest.dir/conftest.two then ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" break 3 fi fi fi done done ;; esac done IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir fi if test "${ac_cv_path_install+set}" = set; then INSTALL=$ac_cv_path_install else # As a last resort, use the slow shell script. Don't cache a # value for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. INSTALL=$ac_install_sh fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 $as_echo "$INSTALL" >&6; } # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 $as_echo_n "checking whether build environment is sane... " >&6; } # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[\\\"\#\$\&\'\`$am_lf]*) as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; esac case $srcdir in *[\\\"\#\$\&\'\`$am_lf\ \ ]*) as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; esac # Do 'set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( am_has_slept=no for am_try in 1 2; do echo "timestamp, slept: $am_has_slept" > conftest.file set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$*" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi if test "$*" != "X $srcdir/configure conftest.file" \ && test "$*" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". as_fn_error $? "ls -t appears to fail. Make sure there is not a broken alias in your environment" "$LINENO" 5 fi if test "$2" = conftest.file || test $am_try -eq 2; then break fi # Just in case. sleep 1 am_has_slept=yes done test "$2" = conftest.file ) then # Ok. : else as_fn_error $? "newly created file is older than distributed files! Check your system clock" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= if grep 'slept: no' conftest.file >/dev/null 2>&1; then ( sleep 1 ) & am_sleep_pid=$! fi rm -f conftest.file test "$program_prefix" != NONE && program_transform_name="s&^&$program_prefix&;$program_transform_name" # Use a double $ so make ignores it. test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. # By default was `s,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` if test x"${MISSING+set}" != xset; then case $am_aux_dir in *\ * | *\ *) MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; *) MISSING="\${SHELL} $am_aux_dir/missing" ;; esac fi # Use eval to expand $SHELL if eval "$MISSING --is-lightweight"; then am_missing_run="$MISSING " else am_missing_run= { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi # Installed binaries are usually stripped using 'strip' when the user # run "make install-strip". However 'strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the 'STRIP' environment variable to overrule this program. if test "$cross_compiling" != no; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 $as_echo "$STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 $as_echo "$ac_ct_STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 $as_echo_n "checking for a thread-safe mkdir -p... " >&6; } if test -z "$MKDIR_P"; then if ${ac_cv_path_mkdir+:} false; then : $as_echo_n "(cached) " >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in mkdir gmkdir; do for ac_exec_ext in '' $ac_executable_extensions; do as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir (GNU coreutils) '* | \ 'mkdir (coreutils) '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext break 3;; esac done done done IFS=$as_save_IFS fi test -d ./--version && rmdir ./--version if test "${ac_cv_path_mkdir+set}" = set; then MKDIR_P="$ac_cv_path_mkdir -p" else # As a last resort, use the slow shell script. Don't cache a # value for MKDIR_P within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. MKDIR_P="$ac_install_sh -d" fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 $as_echo "$MKDIR_P" >&6; } for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_AWK+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_AWK="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 $as_echo "$AWK" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$AWK" && break done { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 $as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } set x ${MAKE-make} ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : $as_echo_n "(cached) " >&6 else cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' _ACEOF # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. case `${MAKE-make} -f conftest.make 2>/dev/null` in *@@@%%%=?*=@@@%%%*) eval ac_cv_prog_make_${ac_make}_set=yes;; *) eval ac_cv_prog_make_${ac_make}_set=no;; esac rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } SET_MAKE= else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } SET_MAKE="MAKE=${MAKE-make}" fi rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null # Check whether --enable-silent-rules was given. if test "${enable_silent_rules+set}" = set; then : enableval=$enable_silent_rules; fi case $enable_silent_rules in # ((( yes) AM_DEFAULT_VERBOSITY=0;; no) AM_DEFAULT_VERBOSITY=1;; *) AM_DEFAULT_VERBOSITY=1;; esac am_make=${MAKE-make} { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 $as_echo_n "checking whether $am_make supports nested variables... " >&6; } if ${am_cv_make_support_nested_variables+:} false; then : $as_echo_n "(cached) " >&6 else if $as_echo 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 am__doit: @$(TRUE) .PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 $as_echo "$am_cv_make_support_nested_variables" >&6; } if test $am_cv_make_support_nested_variables = yes; then AM_V='$(V)' AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' else AM_V=$AM_DEFAULT_VERBOSITY AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY fi AM_BACKSLASH='\' if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." am__isrc=' -I$(srcdir)' # test to see if srcdir already configured if test -f $srcdir/config.status; then as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi # Define the identity of the package. PACKAGE='stacks' VERSION='1.35' cat >>confdefs.h <<_ACEOF #define PACKAGE "$PACKAGE" _ACEOF cat >>confdefs.h <<_ACEOF #define VERSION "$VERSION" _ACEOF # Some tools Automake needs. ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: # # mkdir_p='$(MKDIR_P)' # We need awk for the "check" target. The system "awk" is bad on # some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' # We'll loop over all known methods to create a tar archive until one works. _am_tools='gnutar pax cpio none' am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' # POSIX will say in a future version that running "rm -f" with no argument # is OK; and we want to be able to make that assumption in our Makefile # recipes. So use an aggressive probe to check that the usage we want is # actually supported "in the wild" to an acceptable degree. # See automake bug#10828. # To make any issue more visible, cause the running configure to be aborted # by default if the 'rm' program in use doesn't match our expectations; the # user can still override this though. if rm -f && rm -fr && rm -rf; then : OK; else cat >&2 <<'END' Oops! Your 'rm' program seems unable to run without file operands specified on the command line, even when the '-f' option is present. This is contrary to the behaviour of most rm programs out there, and not conforming with the upcoming POSIX standard: Please tell bug-automake@gnu.org about your system, including the value of your $PATH and any error possibly output before this message. This can help us improve future automake versions. END if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then echo 'Configuration will proceed anyway, since you have set the' >&2 echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 echo >&2 else cat >&2 <<'END' Aborting the configuration process, to ensure you take notice of the issue. You can download and install GNU coreutils to get an 'rm' implementation that behaves properly: . If you want to complete the configuration process using your problematic 'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM to "yes", and re-run configure. END as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 fi fi ac_config_headers="$ac_config_headers config.h" # Get BAM library and include locations if requested # Check whether --enable-bam was given. if test "${enable_bam+set}" = set; then : enableval=$enable_bam; fi if test "x$enable_bam" = "xyes"; then : $as_echo "#define HAVE_BAM 1" >>confdefs.h # Check whether --with-bam-include-path was given. if test "${with_bam_include_path+set}" = set; then : withval=$with_bam_include_path; BAM_CFLAGS="-I$withval" else BAM_CFLAGS='-I/usr/include/bam' fi # Check whether --with-bam-lib-path was given. if test "${with_bam_lib_path+set}" = set; then : withval=$with_bam_lib_path; BAM_LIBS="$withval/libbam.a" else BAM_LIBS='/usr/lib/libbam.a' fi fi # Enable use of Google Sparsehash and get include location if requested. # Check whether --enable-sparsehash was given. if test "${enable_sparsehash+set}" = set; then : enableval=$enable_sparsehash; fi if test "x$enable_sparsehash" = "xyes"; then : $as_echo "#define HAVE_SPARSEHASH 1" >>confdefs.h # Check whether --with-sparsehash-include-path was given. if test "${with_sparsehash_include_path+set}" = set; then : withval=$with_sparsehash_include_path; SPARSEHASH_CFLAGS="-I$withval" else SPARSEHASH_CFLAGS="" fi fi # Checks for programs. ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test -z "$CXX"; then if test -n "$CCC"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CXX+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CXX"; then ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CXX=$ac_cv_prog_CXX if test -n "$CXX"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 $as_echo "$CXX" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CXX" && break done fi if test -z "$CXX"; then ac_ct_CXX=$CXX for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CXX+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CXX"; then ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CXX="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CXX=$ac_cv_prog_ac_ct_CXX if test -n "$ac_ct_CXX"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 $as_echo "$ac_ct_CXX" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CXX" && break done if test "x$ac_ct_CXX" = x; then CXX="g++" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CXX=$ac_ct_CXX fi fi fi fi # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C++ compiler works" >&5 $as_echo_n "checking whether the C++ compiler works... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi if test -z "$ac_file"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C++ compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler default output file name" >&5 $as_echo_n "checking for C++ compiler default output file name... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run C++ compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if ${ac_cv_objext+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 $as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } if ${ac_cv_cxx_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_cxx_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 $as_echo "$ac_cv_cxx_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GXX=yes else GXX= fi ac_test_CXXFLAGS=${CXXFLAGS+set} ac_save_CXXFLAGS=$CXXFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 $as_echo_n "checking whether $CXX accepts -g... " >&6; } if ${ac_cv_prog_cxx_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_cxx_werror_flag=$ac_cxx_werror_flag ac_cxx_werror_flag=yes ac_cv_prog_cxx_g=no CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ac_cv_prog_cxx_g=yes else CXXFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : else ac_cxx_werror_flag=$ac_save_cxx_werror_flag CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ac_cv_prog_cxx_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cxx_werror_flag=$ac_save_cxx_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 $as_echo "$ac_cv_prog_cxx_g" >&6; } if test "$ac_test_CXXFLAGS" = set; then CXXFLAGS=$ac_save_CXXFLAGS elif test $ac_cv_prog_cxx_g = yes; then if test "$GXX" = yes; then CXXFLAGS="-g -O2" else CXXFLAGS="-g" fi else if test "$GXX" = yes; then CXXFLAGS="-O2" else CXXFLAGS= fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 $as_echo_n "checking for style of include used by $am_make... " >&6; } am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # Ignore all kinds of additional output from 'make'. case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=include am__quote= _am_result=GNU ;; esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=.include am__quote="\"" _am_result=BSD ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 $as_echo "$_am_result" >&6; } rm -f confinc confmf # Check whether --enable-dependency-tracking was given. if test "${enable_dependency_tracking+set}" = set; then : enableval=$enable_dependency_tracking; fi if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi if test "x$enable_dependency_tracking" != xno; then AMDEP_TRUE= AMDEP_FALSE='#' else AMDEP_TRUE='#' AMDEP_FALSE= fi depcc="$CXX" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CXX_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CXX_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CXX_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CXX_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 $as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; } CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then am__fastdepCXX_TRUE= am__fastdepCXX_FALSE='#' else am__fastdepCXX_TRUE='#' am__fastdepCXX_FALSE= fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if ${ac_cv_c_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if ${ac_cv_prog_cc_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes else CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if ${ac_cv_prog_cc_c89+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include struct stat; /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then : fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 $as_echo_n "checking whether $CC understands -c and -o together... " >&6; } if ${am_cv_prog_cc_c_o+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF # Make sure it works both with $CC and with simple cc. # Following AC_PROG_CC_C_O, we do the test twice because some # compilers refuse to overwrite an existing .o file with -o, # though they will create one. am_cv_prog_cc_c_o=yes for am_i in 1 2; do if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } \ && test -f conftest2.$ac_objext; then : OK else am_cv_prog_cc_c_o=no break fi done rm -f core conftest* unset am_i fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 $as_echo "$am_cv_prog_cc_c_o" >&6; } if test "$am_cv_prog_cc_c_o" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CC" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CC_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 $as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi ax_cxx_compile_cxx11_required=truednl ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu ac_success=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5 $as_echo_n "checking whether $CXX supports C++11 features by default... " >&6; } if ${ax_cv_cxx_compile_cxx11+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; typedef check> right_angle_brackets; int a; decltype(a) b; typedef check check_type; check_type c; check_type&& cr = static_cast(c); auto d = a; _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ax_cv_cxx_compile_cxx11=yes else ax_cv_cxx_compile_cxx11=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11" >&5 $as_echo "$ax_cv_cxx_compile_cxx11" >&6; } if test x$ax_cv_cxx_compile_cxx11 = xyes; then ac_success=yes fi if test x$ac_success = xno; then for switch in -std=gnu++11 -std=gnu++0x; do cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5 $as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; } if eval \${$cachevar+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; typedef check> right_angle_brackets; int a; decltype(a) b; typedef check check_type; check_type c; check_type&& cr = static_cast(c); auto d = a; _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : eval $cachevar=yes else eval $cachevar=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS="$ac_save_CXXFLAGS" fi eval ac_res=\$$cachevar { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi if test x$ac_success = xno; then for switch in -std=c++11 -std=c++0x; do cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5 $as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; } if eval \${$cachevar+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; typedef check> right_angle_brackets; int a; decltype(a) b; typedef check check_type; check_type c; check_type&& cr = static_cast(c); auto d = a; _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : eval $cachevar=yes else eval $cachevar=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS="$ac_save_CXXFLAGS" fi eval ac_res=\$$cachevar { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test x$ax_cxx_compile_cxx11_required = xtrue; then if test x$ac_success = xno; then as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5 fi else if test x$ac_success = xno; then HAVE_CXX11=0 { $as_echo "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5 $as_echo "$as_me: No compiler with C++11 support was found" >&6;} else HAVE_CXX11=1 $as_echo "#define HAVE_CXX11 1" >>confdefs.h fi fi # Checks for libraries. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for omp_set_num_threads in -lgomp" >&5 $as_echo_n "checking for omp_set_num_threads in -lgomp... " >&6; } if ${ac_cv_lib_gomp_omp_set_num_threads+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lgomp $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char omp_set_num_threads (); int main () { return omp_set_num_threads (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_gomp_omp_set_num_threads=yes else ac_cv_lib_gomp_omp_set_num_threads=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gomp_omp_set_num_threads" >&5 $as_echo "$ac_cv_lib_gomp_omp_set_num_threads" >&6; } if test "x$ac_cv_lib_gomp_omp_set_num_threads" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBGOMP 1 _ACEOF LIBS="-lgomp $LIBS" else { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to locate OpenMP library, you should probably specify '--disable-openmp'." >&5 $as_echo "$as_me: WARNING: Unable to locate OpenMP library, you should probably specify '--disable-openmp'." >&2;} fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gzread in -lz" >&5 $as_echo_n "checking for gzread in -lz... " >&6; } if ${ac_cv_lib_z_gzread+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lz $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char gzread (); int main () { return gzread (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_z_gzread=yes else ac_cv_lib_z_gzread=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gzread" >&5 $as_echo "$ac_cv_lib_z_gzread" >&6; } if test "x$ac_cv_lib_z_gzread" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBZ 1 _ACEOF LIBS="-lz $LIBS" else as_fn_error $? "Zlib not found, reading gzipped files will not be possible." "$LINENO" 5 fi # Checks for header files. ac_header_dirent=no for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do as_ac_Header=`$as_echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_hdr that defines DIR" >&5 $as_echo_n "checking for $ac_hdr that defines DIR... " >&6; } if eval \${$as_ac_Header+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include <$ac_hdr> int main () { if ((DIR *) 0) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$as_ac_Header=yes" else eval "$as_ac_Header=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$as_ac_Header { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_hdr" | $as_tr_cpp` 1 _ACEOF ac_header_dirent=$ac_hdr; break fi done # Two versions of opendir et al. are in -ldir and -lx on SCO Xenix. if test $ac_header_dirent = dirent.h; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing opendir" >&5 $as_echo_n "checking for library containing opendir... " >&6; } if ${ac_cv_search_opendir+:} false; then : $as_echo_n "(cached) " >&6 else ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char opendir (); int main () { return opendir (); ; return 0; } _ACEOF for ac_lib in '' dir; do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO"; then : ac_cv_search_opendir=$ac_res fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext if ${ac_cv_search_opendir+:} false; then : break fi done if ${ac_cv_search_opendir+:} false; then : else ac_cv_search_opendir=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_opendir" >&5 $as_echo "$ac_cv_search_opendir" >&6; } ac_res=$ac_cv_search_opendir if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" fi else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing opendir" >&5 $as_echo_n "checking for library containing opendir... " >&6; } if ${ac_cv_search_opendir+:} false; then : $as_echo_n "(cached) " >&6 else ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char opendir (); int main () { return opendir (); ; return 0; } _ACEOF for ac_lib in '' x; do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO"; then : ac_cv_search_opendir=$ac_res fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext if ${ac_cv_search_opendir+:} false; then : break fi done if ${ac_cv_search_opendir+:} false; then : else ac_cv_search_opendir=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_opendir" >&5 $as_echo "$ac_cv_search_opendir" >&6; } ac_res=$ac_cv_search_opendir if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if ${ac_cv_prog_CPP+:} false; then : $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if ${ac_cv_path_EGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default " if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in float.h limits.h stdlib.h string.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done # Check for OpenMP parallel execution support OPENMP_CFLAGS= # Check whether --enable-openmp was given. if test "${enable_openmp+set}" = set; then : enableval=$enable_openmp; fi if test "$enable_openmp" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 $as_echo_n "checking for $CC option to support OpenMP... " >&6; } if ${ac_cv_prog_c_openmp+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP choke me #endif #include int main () { return omp_get_num_threads (); } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_prog_c_openmp='none needed' else ac_cv_prog_c_openmp='unsupported' for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ -Popenmp --openmp; do ac_save_CFLAGS=$CFLAGS CFLAGS="$CFLAGS $ac_option" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP choke me #endif #include int main () { return omp_get_num_threads (); } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_prog_c_openmp=$ac_option fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CFLAGS=$ac_save_CFLAGS if test "$ac_cv_prog_c_openmp" != unsupported; then break fi done fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 $as_echo "$ac_cv_prog_c_openmp" >&6; } case $ac_cv_prog_c_openmp in #( "none needed" | unsupported) ;; #( *) OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;; esac fi # Checks for typedefs, structures, and compiler characteristics. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5 $as_echo_n "checking for stdbool.h that conforms to C99... " >&6; } if ${ac_cv_header_stdbool_h+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #ifndef bool "error: bool is not defined" #endif #ifndef false "error: false is not defined" #endif #if false "error: false is not 0" #endif #ifndef true "error: true is not defined" #endif #if true != 1 "error: true is not 1" #endif #ifndef __bool_true_false_are_defined "error: __bool_true_false_are_defined is not defined" #endif struct s { _Bool s: 1; _Bool t; } s; char a[true == 1 ? 1 : -1]; char b[false == 0 ? 1 : -1]; char c[__bool_true_false_are_defined == 1 ? 1 : -1]; char d[(bool) 0.5 == true ? 1 : -1]; /* See body of main program for 'e'. */ char f[(_Bool) 0.0 == false ? 1 : -1]; char g[true]; char h[sizeof (_Bool)]; char i[sizeof s.t]; enum { j = false, k = true, l = false * true, m = true * 256 }; /* The following fails for HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]. */ _Bool n[m]; char o[sizeof n == m * sizeof n[0] ? 1 : -1]; char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1]; /* Catch a bug in an HP-UX C compiler. See http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html */ _Bool q = true; _Bool *pq = &q; int main () { bool e = &s; *pq |= q; *pq |= ! q; /* Refer to every declared value, to avoid compiler optimizations. */ return (!a + !b + !c + !d + !e + !f + !g + !h + !i + !!j + !k + !!l + !m + !n + !o + !p + !q + !pq); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdbool_h=yes else ac_cv_header_stdbool_h=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdbool_h" >&5 $as_echo "$ac_cv_header_stdbool_h" >&6; } ac_fn_c_check_type "$LINENO" "_Bool" "ac_cv_type__Bool" "$ac_includes_default" if test "x$ac_cv_type__Bool" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE__BOOL 1 _ACEOF fi if test $ac_cv_header_stdbool_h = yes; then $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 $as_echo_n "checking for an ANSI C-conforming const... " >&6; } if ${ac_cv_c_const+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __cplusplus /* Ultrix mips cc rejects this sort of thing. */ typedef int charset[2]; const charset cs = { 0, 0 }; /* SunOS 4.1.1 cc rejects this. */ char const *const *pcpcc; char **ppc; /* NEC SVR4.0.2 mips cc rejects this. */ struct point {int x, y;}; static struct point const zero = {0,0}; /* AIX XL C 1.02.0.0 rejects this. It does not let you subtract one const X* pointer from another in an arm of an if-expression whose if-part is not a constant expression */ const char *g = "string"; pcpcc = &g + (g ? g-g : 0); /* HPUX 7.0 cc rejects these. */ ++pcpcc; ppc = (char**) pcpcc; pcpcc = (char const *const *) ppc; { /* SCO 3.2v4 cc rejects this sort of thing. */ char tx; char *t = &tx; char const *s = 0 ? (char *) 0 : (char const *) 0; *t++ = 0; if (s) return 0; } { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ int x[] = {25, 17}; const int *foo = &x[0]; ++foo; } { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ typedef const int *iptr; iptr p = 0; ++p; } { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ struct s { int j; const int *ap[3]; } bx; struct s *b = &bx; b->j = 5; } { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ const int foo = 10; if (!foo) return 0; } return !cs[0] && !zero.x; #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_const=yes else ac_cv_c_const=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 $as_echo "$ac_cv_c_const" >&6; } if test $ac_cv_c_const = no; then $as_echo "#define const /**/" >>confdefs.h fi ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" if test "x$ac_cv_type_size_t" = xyes; then : else cat >>confdefs.h <<_ACEOF #define size_t unsigned int _ACEOF fi # Checks for library functions. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether closedir returns void" >&5 $as_echo_n "checking whether closedir returns void... " >&6; } if ${ac_cv_func_closedir_void+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_func_closedir_void=yes else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default #include <$ac_header_dirent> #ifndef __cplusplus int closedir (); #endif int main () { return closedir (opendir (".")) != 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_func_closedir_void=no else ac_cv_func_closedir_void=yes fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_closedir_void" >&5 $as_echo "$ac_cv_func_closedir_void" >&6; } if test $ac_cv_func_closedir_void = yes; then $as_echo "#define CLOSEDIR_VOID 1" >>confdefs.h fi for ac_header in stdlib.h do : ac_fn_c_check_header_mongrel "$LINENO" "stdlib.h" "ac_cv_header_stdlib_h" "$ac_includes_default" if test "x$ac_cv_header_stdlib_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_STDLIB_H 1 _ACEOF fi done { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU libc compatible malloc" >&5 $as_echo_n "checking for GNU libc compatible malloc... " >&6; } if ${ac_cv_func_malloc_0_nonnull+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_func_malloc_0_nonnull=no else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #if defined STDC_HEADERS || defined HAVE_STDLIB_H # include #else char *malloc (); #endif int main () { return ! malloc (0); ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_func_malloc_0_nonnull=yes else ac_cv_func_malloc_0_nonnull=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_malloc_0_nonnull" >&5 $as_echo "$ac_cv_func_malloc_0_nonnull" >&6; } if test $ac_cv_func_malloc_0_nonnull = yes; then : $as_echo "#define HAVE_MALLOC 1" >>confdefs.h else $as_echo "#define HAVE_MALLOC 0" >>confdefs.h case " $LIBOBJS " in *" malloc.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS malloc.$ac_objext" ;; esac $as_echo "#define malloc rpl_malloc" >>confdefs.h fi for ac_header in stdlib.h do : ac_fn_c_check_header_mongrel "$LINENO" "stdlib.h" "ac_cv_header_stdlib_h" "$ac_includes_default" if test "x$ac_cv_header_stdlib_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_STDLIB_H 1 _ACEOF fi done { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU libc compatible realloc" >&5 $as_echo_n "checking for GNU libc compatible realloc... " >&6; } if ${ac_cv_func_realloc_0_nonnull+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_func_realloc_0_nonnull=no else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #if defined STDC_HEADERS || defined HAVE_STDLIB_H # include #else char *realloc (); #endif int main () { return ! realloc (0, 0); ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_func_realloc_0_nonnull=yes else ac_cv_func_realloc_0_nonnull=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_realloc_0_nonnull" >&5 $as_echo "$ac_cv_func_realloc_0_nonnull" >&6; } if test $ac_cv_func_realloc_0_nonnull = yes; then : $as_echo "#define HAVE_REALLOC 1" >>confdefs.h else $as_echo "#define HAVE_REALLOC 0" >>confdefs.h case " $LIBOBJS " in *" realloc.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS realloc.$ac_objext" ;; esac $as_echo "#define realloc rpl_realloc" >>confdefs.h fi for ac_func in floor memset pow sqrt do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done # For test harness for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_AWK+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_AWK="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 $as_echo "$AWK" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$AWK" && break done ac_config_files="$ac_config_files Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs { $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 $as_echo_n "checking that generated files are newer than configure... " >&6; } if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 $as_echo "done" >&6; } if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' else am__EXEEXT_TRUE='#' am__EXEEXT_FALSE= fi if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then as_fn_error $? "conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by Stacks $as_me 1.35, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac case $ac_config_headers in *" "*) set x $ac_config_headers; shift; ac_config_headers=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" config_headers="$ac_config_headers" config_commands="$ac_config_commands" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE --header=FILE[:TEMPLATE] instantiate the configuration header FILE Configuration files: $config_files Configuration headers: $config_headers Configuration commands: $config_commands Report bugs to the package provider." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ Stacks config.status 1.35 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" Copyright (C) 2012 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' INSTALL='$INSTALL' MKDIR_P='$MKDIR_P' AWK='$AWK' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) $as_echo "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append CONFIG_HEADERS " '$ac_optarg'" ac_need_defaults=false;; --he | --h) # Conflict between --help and --header as_fn_error $? "ambiguous option: \`$1' Try \`$0 --help' for more information.";; --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # # INIT-COMMANDS # AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" # Set up the scripts for CONFIG_HEADERS section. # No need to generate them if there are no CONFIG_HEADERS. # This happens for instance with `./config.status Makefile'. if test -n "$CONFIG_HEADERS"; then cat >"$ac_tmp/defines.awk" <<\_ACAWK || BEGIN { _ACEOF # Transform confdefs.h into an awk script `defines.awk', embedded as # here-document in config.status, that substitutes the proper values into # config.h.in to produce config.h. # Create a delimiter string that does not exist in confdefs.h, to ease # handling of long lines. ac_delim='%!_!# ' for ac_last_try in false false :; do ac_tt=`sed -n "/$ac_delim/p" confdefs.h` if test -z "$ac_tt"; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done # For the awk script, D is an array of macro values keyed by name, # likewise P contains macro parameters if any. Preserve backslash # newline sequences. ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* sed -n ' s/.\{148\}/&'"$ac_delim"'/g t rset :rset s/^[ ]*#[ ]*define[ ][ ]*/ / t def d :def s/\\$// t bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3"/p s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p d :bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3\\\\\\n"\\/p t cont s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p t cont d :cont n s/.\{148\}/&'"$ac_delim"'/g t clear :clear s/\\$// t bsnlc s/["\\]/\\&/g; s/^/"/; s/$/"/p d :bsnlc s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p b cont ' >$CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 for (key in D) D_is_set[key] = 1 FS = "" } /^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { line = \$ 0 split(line, arg, " ") if (arg[1] == "#") { defundef = arg[2] mac1 = arg[3] } else { defundef = substr(arg[1], 2) mac1 = arg[2] } split(mac1, mac2, "(") #) macro = mac2[1] prefix = substr(line, 1, index(line, defundef) - 1) if (D_is_set[macro]) { # Preserve the white space surrounding the "#". print prefix "define", macro P[macro] D[macro] next } else { # Replace #undef with comments. This is necessary, for example, # in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. if (defundef == "undef") { print "/*", prefix defundef, macro, "*/" next } } } { print } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 fi # test -n "$CONFIG_HEADERS" eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # case $INSTALL in [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; esac ac_MKDIR_P=$MKDIR_P case $MKDIR_P in [\\/$]* | ?:[\\/]* ) ;; */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; esac _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t s&@MKDIR_P@&$ac_MKDIR_P&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; :H) # # CONFIG_HEADER # if test x"$ac_file" != x-; then { $as_echo "/* $configure_input */" \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" } >"$ac_tmp/config.h" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 $as_echo "$as_me: $ac_file is unchanged" >&6;} else rm -f "$ac_file" mv "$ac_tmp/config.h" "$ac_file" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 fi else $as_echo "/* $configure_input */" \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ || as_fn_error $? "could not create -" "$LINENO" 5 fi # Compute "$ac_file"'s index in $config_headers. _am_arg="$ac_file" _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$_am_arg" : 'X\(//\)[^/]' \| \ X"$_am_arg" : 'X\(//\)$' \| \ X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$_am_arg" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'`/stamp-h$_am_stamp_count ;; :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 $as_echo "$as_me: executing $ac_file commands" >&6;} ;; esac case $ac_file$ac_mode in "depfiles":C) test x"$AMDEP_TRUE" != x"" || { # Older Autoconf quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. case $CONFIG_FILES in *\'*) eval set x "$CONFIG_FILES" ;; *) set x $CONFIG_FILES ;; esac shift for mf do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named 'Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then dirpart=`$as_dirname -- "$mf" || $as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$mf" : 'X\(//\)[^/]' \| \ X"$mf" : 'X\(//\)$' \| \ X"$mf" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running 'make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "$am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`$as_dirname -- "$file" || $as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$file" : 'X\(//\)[^/]' \| \ X"$file" : 'X\(//\)$' \| \ X"$file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir=$dirpart/$fdir; as_fn_mkdir_p # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done } ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi stacks-1.35/configure.ac000644 000765 000024 00000004657 12571641525 016014 0ustar00catchenstaff000000 000000 # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) AC_INIT([Stacks], [1.35]) AC_CONFIG_AUX_DIR([config]) AM_INIT_AUTOMAKE([-Wall -Werror foreign parallel-tests subdir-objects]) AC_CONFIG_SRCDIR([src/ustacks.cc]) AC_CONFIG_HEADERS([config.h]) m4_pattern_allow([AC_OPENMP]) # Get BAM library and include locations if requested AC_ARG_ENABLE([bam], AS_HELP_STRING([--enable-bam], [Enable Samtools' use of BAM files (requires BAM library to be installed).])) AS_IF([test "x$enable_bam" = "xyes"], [ AC_DEFINE([HAVE_BAM], [1], [Enable compilation with Samtools BAM library]) AC_ARG_WITH([bam-include-path], [AS_HELP_STRING([--with-bam-include-path], [location of Samtools BAM headers, defaults to /usr/include/bam])], [BAM_CFLAGS="-I$withval"], [BAM_CFLAGS='-I/usr/include/bam']) AC_SUBST([BAM_CFLAGS]) AC_ARG_WITH([bam-lib-path], [AS_HELP_STRING([--with-bam-lib-path], [location of Samtools BAM library])], [BAM_LIBS="$withval/libbam.a"], [BAM_LIBS='/usr/lib/libbam.a']) AC_SUBST([BAM_LIBS]) ]) # Enable use of Google Sparsehash and get include location if requested. AC_ARG_ENABLE([sparsehash], AS_HELP_STRING([--enable-sparsehash], [Enable the use of Google Sparsehash (must be installed).])) AS_IF([test "x$enable_sparsehash" = "xyes"], [ AC_DEFINE([HAVE_SPARSEHASH], [1], [Enable compilation with Google Sparsehash]) AC_ARG_WITH([sparsehash-include-path], [AS_HELP_STRING([--with-sparsehash-include-path], [location of Google Sparsehash headers])], [SPARSEHASH_CFLAGS="-I$withval"], [SPARSEHASH_CFLAGS=""]) AC_SUBST([SPARSEHASH_CFLAGS]) ]) # Checks for programs. AC_PROG_CXX AM_PROG_CC_C_O AX_CXX_COMPILE_STDCXX_11(, [mandatory]) # Checks for libraries. AC_CHECK_LIB([gomp], [omp_set_num_threads],, [AC_MSG_WARN([Unable to locate OpenMP library, you should probably specify '--disable-openmp'.])]) AC_CHECK_LIB([z], [gzread],, [AC_MSG_ERROR([Zlib not found, reading gzipped files will not be possible.])]) # Checks for header files. AC_HEADER_DIRENT AC_HEADER_STDC AC_CHECK_HEADERS([float.h limits.h stdlib.h string.h]) # Check for OpenMP parallel execution support AC_OPENMP # Checks for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL AC_C_CONST AC_TYPE_SIZE_T # Checks for library functions. AC_FUNC_CLOSEDIR_VOID AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CHECK_FUNCS([floor memset pow sqrt]) # For test harness AC_PROG_AWK AC_CONFIG_FILES([Makefile]) AC_OUTPUT stacks-1.35/INSTALL000644 000765 000024 00000022432 12335173442 014542 0ustar00catchenstaff000000 000000 Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc. This file is free documentation; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. Basic Installation ================== These are generic installation instructions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. (Caching is disabled by default to prevent problems with accidental use of stale cache files.) If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You only need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. If you're using `csh' on an old version of System V, you might need to type `sh ./configure' instead to prevent `csh' from trying to execute `configure' itself. Running `configure' takes awhile. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package. 4. Type `make install' to install the programs and any data files and documentation. 5. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you must use a version of `make' that supports the `VPATH' variable, such as GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. If you have to use a `make' that does not support the `VPATH' variable, you have to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Here is a another example: /bin/bash ./configure CONFIG_SHELL=/bin/bash Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent configuration-related scripts to be executed by `/bin/bash'. `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of the options to `configure', and exit. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. stacks-1.35/LICENSE000644 000765 000024 00000104513 12335173442 014517 0ustar00catchenstaff000000 000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . stacks-1.35/Makefile.am000644 000765 000024 00000025563 12571641525 015561 0ustar00catchenstaff000000 000000 bin_PROGRAMS = ustacks pstacks estacks cstacks sstacks rxstacks hstacks process_radtags process_shortreads \ kmer_filter clone_filter genotypes populations phasedstacks ustacks_SOURCES = src/ustacks.h src/ustacks.cc src/stacks.h src/stacks.cc src/constants.h \ src/mstack.h src/mstack.cc src/mst.h src/mst.cc src/cmb.h src/cmb.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/models.h src/models.cc src/utils.h src/utils.cc \ src/kmers.h src/kmers.cc \ src/input.h src/input.cc src/Tsv.h src/BowtieI.h src/FastaI.h src/FastqI.h src/SamI.h \ src/gzFasta.h src/gzFastq.h pstacks_SOURCES = src/pstacks.h src/pstacks.cc src/stacks.h src/stacks.cc src/constants.h \ src/mstack.h src/mstack.cc \ src/DNANSeq.h src/DNANSeq.cc src/DNASeq.h src/DNASeq.cc \ src/models.h src/models.cc src/utils.h src/utils.cc \ src/input.h src/input.cc src/Tsv.h src/BowtieI.h src/FastaI.h src/FastqI.h src/SamI.h \ src/BamI.h estacks_SOURCES = src/estacks.h src/estacks.cc src/stacks.h src/stacks.cc src/constants.h \ src/mstack.h src/mstack.cc \ src/models.h src/models.cc src/utils.h src/utils.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/input.h src/input.cc src/Tsv.h src/BowtieI.h src/FastaI.h src/FastqI.h src/SamI.h cstacks_SOURCES = src/cstacks.h src/cstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/kmers.h src/kmers.cc src/utils.h src/utils.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/input.h src/input.cc src/sql_utilities.h hstacks_SOURCES = src/hstacks.h src/hstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/kmers.h src/kmers.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/utils.h src/utils.cc \ src/input.h src/input.cc src/sql_utilities.h sstacks_SOURCES = src/sstacks.h src/sstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc src/utils.h src/utils.cc \ src/input.h src/input.cc src/sql_utilities.h rxstacks_SOURCES = src/rxstacks.h src/rxstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNANSeq.h src/DNANSeq.cc src/DNASeq.h src/DNASeq.cc \ src/mst.h src/mst.cc \ src/models.h src/models.cc \ src/utils.h src/utils.cc \ src/input.h src/input.cc src/sql_utilities.h process_radtags_SOURCES = src/process_radtags.h src/process_radtags.cc src/constants.h \ src/utils.h src/utils.cc src/log_utils.h src/log_utils.cc \ src/write.h src/write.cc \ src/clean.h src/clean.cc \ src/file_io.h src/file_io.cc \ src/input.h src/input.cc src/BustardI.h src/BamUnalignedI.h src/FastqI.h src/gzFastq.h \ src/renz.h process_shortreads_SOURCES = src/process_shortreads.h src/process_shortreads.cc src/constants.h \ src/clean.h src/clean.cc \ src/file_io.h src/file_io.cc \ src/utils.h src/utils.cc src/log_utils.h src/log_utils.cc \ src/write.h src/write.cc \ src/kmers.h src/kmers.cc \ src/input.h src/input.cc src/BustardI.h src/BamUnalignedI.h src/FastqI.h src/gzFastq.h kmer_filter_SOURCES = src/kmer_filter.h src/kmer_filter.cc src/constants.h \ src/utils.h src/utils.cc \ src/write.h src/write.cc \ src/kmers.h src/kmers.cc \ src/input.h src/input.cc src/BustardI.h src/FastqI.h src/FastaI.h \ src/gzFastq.h src/gzFasta.h clone_filter_SOURCES = src/clone_filter.h src/clone_filter.cc src/constants.h \ src/kmers.h src/kmers.cc \ src/utils.h src/utils.cc \ src/write.h src/write.cc \ src/clean.h src/clean.cc src/file_io.h src/file_io.cc \ src/input.h src/input.cc src/BustardI.h src/FastqI.h src/FastaI.h \ src/gzFastq.h src/gzFasta.h genotypes_SOURCES = src/genotypes.h src/genotypes.cc src/constants.h \ src/utils.h src/utils.cc src/catalog_utils.h src/catalog_utils.cc \ src/log_utils.h src/log_utils.cc \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/PopMap.h src/genotype_dictionaries.h \ src/input.h src/input.cc src/sql_utilities.h src/renz.h populations_SOURCES = src/populations.h src/populations.cc src/constants.h \ src/utils.h src/utils.cc src/catalog_utils.h src/catalog_utils.cc \ src/log_utils.h src/log_utils.cc \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/PopMap.h src/PopSum.h src/genotype_dictionaries.h \ src/input.h src/input.cc src/sql_utilities.h src/renz.h \ src/bootstrap.h src/ordered.h src/smoothing.h src/smoothing_utils.h phasedstacks_SOURCES = src/phasedstacks.h src/phasedstacks.cc src/constants.h \ src/locus.h src/locus.cc \ src/input.h src/input.cc src/sql_utilities.h \ src/log_utils.h src/log_utils.cc \ src/utils.h src/utils.cc src/catalog_utils.h src/catalog_utils.cc ustacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) ustacks_LDFLAGS = $(OPENMP_CFLAGS) pstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS) $(SPARSEHASH_CFLAGS) pstacks_LDFLAGS = $(OPENMP_CFLAGS) pstacks_LDADD = $(BAM_LIBS) estacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) estacks_LDFLAGS = $(OPENMP_CFLAGS) cstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) cstacks_LDFLAGS = $(OPENMP_CFLAGS) hstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) hstacks_LDFLAGS = $(OPENMP_CFLAGS) sstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) sstacks_LDFLAGS = $(OPENMP_CFLAGS) rxstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) rxstacks_LDFLAGS = $(OPENMP_CFLAGS) process_radtags_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) $(BAM_CFLAGS) process_radtags_LDFLAGS = $(OPENMP_CFLAGS) process_radtags_LDADD = $(BAM_LIBS) process_shortreads_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) $(BAM_CFLAGS) process_shortreads_LDFLAGS = $(OPENMP_CFLAGS) process_shortreads_LDADD = $(BAM_LIBS) kmer_filter_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) kmer_filter_LDFLAGS = $(OPENMP_CFLAGS) genotypes_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) genotypes_LDFLAGS = $(OPENMP_CFLAGS) populations_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) populations_LDFLAGS = $(OPENMP_CFLAGS) phasedstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) phasedstacks_LDFLAGS = $(OPENMP_CFLAGS) clone_filter_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) $(BAM_CFLAGS) clone_filter_LDFLAGS = $(OPENMP_CFLAGS) clone_filter_LDADD = $(BAM_LIBS) dist_bin_SCRIPTS = scripts/denovo_map.pl scripts/ref_map.pl scripts/export_sql.pl \ scripts/sort_read_pairs.pl scripts/exec_velvet.pl scripts/load_sequences.pl \ scripts/index_radtags.pl scripts/load_radtags.pl scripts/stacks_export_notify.pl dist_noinst_SCRIPTS = autogen.sh scripts/extract_interpop_chars.pl scripts/convert_stacks.pl nobase_pkgdata_DATA = sql/mysql.cnf.dist sql/catalog_index.sql sql/stacks.sql sql/tag_index.sql sql/chr_index.sql \ php/CatalogClass.php php/annotate_marker.php php/constants.php.dist php/index.php php/tags.php \ php/Locus.php php/catalog.php php/correct_genotypes.php php/correct_genotype.php php/export_batch.php php/last_modified.php \ php/version.php php/catalog_genotypes.php php/db_functions.php php/header.php php/samples.php \ php/stacks_functions.php php/view_sequence.php php/sequence_blast.php \ php/pop_view.php php/sumstat_view.php php/hapstat_view.php php/fst_view.php php/phist_view.php php/stack_view.php \ php/population_view.js php/ajax.js php/annotate.js php/stacks.js php/export.js php/stacks.css \ php/images/caret-d.png php/images/caret-u.png php/images/excel_icon.png php/images/l-arrow-disabled.png \ php/images/l-arrow.png php/images/r-arrow-disabled.png php/images/r-arrow.png php/images/stacks_bg.png \ php/images/stacks_logo_rev_small.png LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) $(abs_top_srcdir)/tests/tap-driver.sh TESTS = tests/process_radtags.t tests/kmer_filter.t tests/ustacks.t tests/pstacks.t EXTRA_DIST = $(nobase_pkgdata_DATA) LICENSE INSTALL README ChangeLog $(TESTS) pkglocalstatedir = $(localstatedir)/$(PACKAGE) debug: $(MAKE) all "CXXFLAGS=-g -Wall -DDEBUG -std=gnu++0x" install-data-hook: sed -e 's,_VERSION_,$(VERSION),' -e 's,_BINDIR_,$(bindir)/,g' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/denovo_map.pl > $(DESTDIR)$(bindir)/denovo_map.pl.subst mv $(DESTDIR)$(bindir)/denovo_map.pl.subst $(DESTDIR)$(bindir)/denovo_map.pl chmod +x $(DESTDIR)$(bindir)/denovo_map.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_BINDIR_,$(bindir)/,g' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/ref_map.pl > $(DESTDIR)$(bindir)/ref_map.pl.subst mv $(DESTDIR)$(bindir)/ref_map.pl.subst $(DESTDIR)$(bindir)/ref_map.pl chmod +x $(DESTDIR)$(bindir)/ref_map.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/export_sql.pl > $(DESTDIR)$(bindir)/export_sql.pl.subst mv $(DESTDIR)$(bindir)/export_sql.pl.subst $(DESTDIR)$(bindir)/export_sql.pl chmod +x $(DESTDIR)$(bindir)/export_sql.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/index_radtags.pl > $(DESTDIR)$(bindir)/index_radtags.pl.subst mv $(DESTDIR)$(bindir)/index_radtags.pl.subst $(DESTDIR)$(bindir)/index_radtags.pl chmod +x $(DESTDIR)$(bindir)/index_radtags.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/load_radtags.pl > $(DESTDIR)$(bindir)/load_radtags.pl.subst mv $(DESTDIR)$(bindir)/load_radtags.pl.subst $(DESTDIR)$(bindir)/load_radtags.pl chmod +x $(DESTDIR)$(bindir)/load_radtags.pl sed -e 's,_VERSION_,$(VERSION),' $(DESTDIR)$(bindir)/sort_read_pairs.pl > $(DESTDIR)$(bindir)/sort_read_pairs.pl.subst mv $(DESTDIR)$(bindir)/sort_read_pairs.pl.subst $(DESTDIR)$(bindir)/sort_read_pairs.pl chmod +x $(DESTDIR)$(bindir)/sort_read_pairs.pl sed -e 's,_VERSION_,$(VERSION),' $(DESTDIR)$(bindir)/exec_velvet.pl > $(DESTDIR)$(bindir)/exec_velvet.pl.subst mv $(DESTDIR)$(bindir)/exec_velvet.pl.subst $(DESTDIR)$(bindir)/exec_velvet.pl chmod +x $(DESTDIR)$(bindir)/exec_velvet.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/load_sequences.pl > $(DESTDIR)$(bindir)/load_sequences.pl.subst mv $(DESTDIR)$(bindir)/load_sequences.pl.subst $(DESTDIR)$(bindir)/load_sequences.pl chmod +x $(DESTDIR)$(bindir)/load_sequences.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' -e 's,_BINDIR_,$(bindir)/,g' \ $(DESTDIR)$(bindir)/stacks_export_notify.pl > $(DESTDIR)$(bindir)/stacks_export_notify.pl.subst mv $(DESTDIR)$(bindir)/stacks_export_notify.pl.subst $(DESTDIR)$(bindir)/stacks_export_notify.pl chmod +x $(DESTDIR)$(bindir)/stacks_export_notify.pl sed -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' -e 's,_BINDIR_,$(bindir)/,g' \ $(DESTDIR)$(pkgdatadir)/php/constants.php.dist > $(DESTDIR)$(pkgdatadir)/php/constants.php.dist.subst mv $(DESTDIR)$(pkgdatadir)/php/constants.php.dist.subst $(DESTDIR)$(pkgdatadir)/php/constants.php.dist echo $(VERSION) > $(DESTDIR)$(pkgdatadir)/php/version.php $(install_sh) -d -m 755 $(DESTDIR)$(pkgdatadir)/php/export uninstall-hook: rm -rf $(DESTDIR)$(pkglocalstatedir) stacks-1.35/Makefile.in000644 000765 000024 00001112502 12571641550 015557 0ustar00catchenstaff000000 000000 # Makefile.in generated by automake 1.14.1 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2013 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : bin_PROGRAMS = ustacks$(EXEEXT) pstacks$(EXEEXT) estacks$(EXEEXT) \ cstacks$(EXEEXT) sstacks$(EXEEXT) rxstacks$(EXEEXT) \ hstacks$(EXEEXT) process_radtags$(EXEEXT) \ process_shortreads$(EXEEXT) kmer_filter$(EXEEXT) \ clone_filter$(EXEEXT) genotypes$(EXEEXT) populations$(EXEEXT) \ phasedstacks$(EXEEXT) subdir = . DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/configure $(am__configure_deps) \ $(srcdir)/config.h.in $(dist_bin_SCRIPTS) \ $(dist_noinst_SCRIPTS) $(top_srcdir)/config/depcomp \ $(top_srcdir)/config/test-driver ChangeLog INSTALL README \ config/compile config/depcomp config/install-sh config/missing \ $(top_srcdir)/config/compile $(top_srcdir)/config/install-sh \ $(top_srcdir)/config/missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(pkgdatadir)" PROGRAMS = $(bin_PROGRAMS) am__dirstamp = $(am__leading_dot)dirstamp am_clone_filter_OBJECTS = src/clone_filter-clone_filter.$(OBJEXT) \ src/clone_filter-kmers.$(OBJEXT) \ src/clone_filter-utils.$(OBJEXT) \ src/clone_filter-write.$(OBJEXT) \ src/clone_filter-clean.$(OBJEXT) \ src/clone_filter-file_io.$(OBJEXT) \ src/clone_filter-input.$(OBJEXT) clone_filter_OBJECTS = $(am_clone_filter_OBJECTS) am__DEPENDENCIES_1 = clone_filter_DEPENDENCIES = $(am__DEPENDENCIES_1) clone_filter_LINK = $(CXXLD) $(clone_filter_CXXFLAGS) $(CXXFLAGS) \ $(clone_filter_LDFLAGS) $(LDFLAGS) -o $@ am_cstacks_OBJECTS = src/cstacks-cstacks.$(OBJEXT) \ src/cstacks-stacks.$(OBJEXT) src/cstacks-locus.$(OBJEXT) \ src/cstacks-kmers.$(OBJEXT) src/cstacks-utils.$(OBJEXT) \ src/cstacks-DNASeq.$(OBJEXT) src/cstacks-DNANSeq.$(OBJEXT) \ src/cstacks-input.$(OBJEXT) cstacks_OBJECTS = $(am_cstacks_OBJECTS) cstacks_LDADD = $(LDADD) cstacks_LINK = $(CXXLD) $(cstacks_CXXFLAGS) $(CXXFLAGS) \ $(cstacks_LDFLAGS) $(LDFLAGS) -o $@ am_estacks_OBJECTS = src/estacks-estacks.$(OBJEXT) \ src/estacks-stacks.$(OBJEXT) src/estacks-mstack.$(OBJEXT) \ src/estacks-models.$(OBJEXT) src/estacks-utils.$(OBJEXT) \ src/estacks-DNASeq.$(OBJEXT) src/estacks-DNANSeq.$(OBJEXT) \ src/estacks-input.$(OBJEXT) estacks_OBJECTS = $(am_estacks_OBJECTS) estacks_LDADD = $(LDADD) estacks_LINK = $(CXXLD) $(estacks_CXXFLAGS) $(CXXFLAGS) \ $(estacks_LDFLAGS) $(LDFLAGS) -o $@ am_genotypes_OBJECTS = src/genotypes-genotypes.$(OBJEXT) \ src/genotypes-utils.$(OBJEXT) \ src/genotypes-catalog_utils.$(OBJEXT) \ src/genotypes-log_utils.$(OBJEXT) \ src/genotypes-stacks.$(OBJEXT) src/genotypes-locus.$(OBJEXT) \ src/genotypes-DNASeq.$(OBJEXT) src/genotypes-DNANSeq.$(OBJEXT) \ src/genotypes-input.$(OBJEXT) genotypes_OBJECTS = $(am_genotypes_OBJECTS) genotypes_LDADD = $(LDADD) genotypes_LINK = $(CXXLD) $(genotypes_CXXFLAGS) $(CXXFLAGS) \ $(genotypes_LDFLAGS) $(LDFLAGS) -o $@ am_hstacks_OBJECTS = src/hstacks-hstacks.$(OBJEXT) \ src/hstacks-stacks.$(OBJEXT) src/hstacks-locus.$(OBJEXT) \ src/hstacks-kmers.$(OBJEXT) src/hstacks-DNASeq.$(OBJEXT) \ src/hstacks-DNANSeq.$(OBJEXT) src/hstacks-utils.$(OBJEXT) \ src/hstacks-input.$(OBJEXT) hstacks_OBJECTS = $(am_hstacks_OBJECTS) hstacks_LDADD = $(LDADD) hstacks_LINK = $(CXXLD) $(hstacks_CXXFLAGS) $(CXXFLAGS) \ $(hstacks_LDFLAGS) $(LDFLAGS) -o $@ am_kmer_filter_OBJECTS = src/kmer_filter-kmer_filter.$(OBJEXT) \ src/kmer_filter-utils.$(OBJEXT) \ src/kmer_filter-write.$(OBJEXT) \ src/kmer_filter-kmers.$(OBJEXT) \ src/kmer_filter-input.$(OBJEXT) kmer_filter_OBJECTS = $(am_kmer_filter_OBJECTS) kmer_filter_LDADD = $(LDADD) kmer_filter_LINK = $(CXXLD) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) \ $(kmer_filter_LDFLAGS) $(LDFLAGS) -o $@ am_phasedstacks_OBJECTS = src/phasedstacks-phasedstacks.$(OBJEXT) \ src/phasedstacks-locus.$(OBJEXT) \ src/phasedstacks-input.$(OBJEXT) \ src/phasedstacks-log_utils.$(OBJEXT) \ src/phasedstacks-utils.$(OBJEXT) \ src/phasedstacks-catalog_utils.$(OBJEXT) phasedstacks_OBJECTS = $(am_phasedstacks_OBJECTS) phasedstacks_LDADD = $(LDADD) phasedstacks_LINK = $(CXXLD) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) \ $(phasedstacks_LDFLAGS) $(LDFLAGS) -o $@ am_populations_OBJECTS = src/populations-populations.$(OBJEXT) \ src/populations-utils.$(OBJEXT) \ src/populations-catalog_utils.$(OBJEXT) \ src/populations-log_utils.$(OBJEXT) \ src/populations-stacks.$(OBJEXT) \ src/populations-locus.$(OBJEXT) \ src/populations-DNASeq.$(OBJEXT) \ src/populations-DNANSeq.$(OBJEXT) \ src/populations-input.$(OBJEXT) populations_OBJECTS = $(am_populations_OBJECTS) populations_LDADD = $(LDADD) populations_LINK = $(CXXLD) $(populations_CXXFLAGS) $(CXXFLAGS) \ $(populations_LDFLAGS) $(LDFLAGS) -o $@ am_process_radtags_OBJECTS = \ src/process_radtags-process_radtags.$(OBJEXT) \ src/process_radtags-utils.$(OBJEXT) \ src/process_radtags-log_utils.$(OBJEXT) \ src/process_radtags-write.$(OBJEXT) \ src/process_radtags-clean.$(OBJEXT) \ src/process_radtags-file_io.$(OBJEXT) \ src/process_radtags-input.$(OBJEXT) process_radtags_OBJECTS = $(am_process_radtags_OBJECTS) process_radtags_DEPENDENCIES = $(am__DEPENDENCIES_1) process_radtags_LINK = $(CXXLD) $(process_radtags_CXXFLAGS) \ $(CXXFLAGS) $(process_radtags_LDFLAGS) $(LDFLAGS) -o $@ am_process_shortreads_OBJECTS = \ src/process_shortreads-process_shortreads.$(OBJEXT) \ src/process_shortreads-clean.$(OBJEXT) \ src/process_shortreads-file_io.$(OBJEXT) \ src/process_shortreads-utils.$(OBJEXT) \ src/process_shortreads-log_utils.$(OBJEXT) \ src/process_shortreads-write.$(OBJEXT) \ src/process_shortreads-kmers.$(OBJEXT) \ src/process_shortreads-input.$(OBJEXT) process_shortreads_OBJECTS = $(am_process_shortreads_OBJECTS) process_shortreads_DEPENDENCIES = $(am__DEPENDENCIES_1) process_shortreads_LINK = $(CXXLD) $(process_shortreads_CXXFLAGS) \ $(CXXFLAGS) $(process_shortreads_LDFLAGS) $(LDFLAGS) -o $@ am_pstacks_OBJECTS = src/pstacks-pstacks.$(OBJEXT) \ src/pstacks-stacks.$(OBJEXT) src/pstacks-mstack.$(OBJEXT) \ src/pstacks-DNANSeq.$(OBJEXT) src/pstacks-DNASeq.$(OBJEXT) \ src/pstacks-models.$(OBJEXT) src/pstacks-utils.$(OBJEXT) \ src/pstacks-input.$(OBJEXT) pstacks_OBJECTS = $(am_pstacks_OBJECTS) pstacks_DEPENDENCIES = $(am__DEPENDENCIES_1) pstacks_LINK = $(CXXLD) $(pstacks_CXXFLAGS) $(CXXFLAGS) \ $(pstacks_LDFLAGS) $(LDFLAGS) -o $@ am_rxstacks_OBJECTS = src/rxstacks-rxstacks.$(OBJEXT) \ src/rxstacks-stacks.$(OBJEXT) src/rxstacks-locus.$(OBJEXT) \ src/rxstacks-DNANSeq.$(OBJEXT) src/rxstacks-DNASeq.$(OBJEXT) \ src/rxstacks-mst.$(OBJEXT) src/rxstacks-models.$(OBJEXT) \ src/rxstacks-utils.$(OBJEXT) src/rxstacks-input.$(OBJEXT) rxstacks_OBJECTS = $(am_rxstacks_OBJECTS) rxstacks_LDADD = $(LDADD) rxstacks_LINK = $(CXXLD) $(rxstacks_CXXFLAGS) $(CXXFLAGS) \ $(rxstacks_LDFLAGS) $(LDFLAGS) -o $@ am_sstacks_OBJECTS = src/sstacks-sstacks.$(OBJEXT) \ src/sstacks-stacks.$(OBJEXT) src/sstacks-locus.$(OBJEXT) \ src/sstacks-DNASeq.$(OBJEXT) src/sstacks-DNANSeq.$(OBJEXT) \ src/sstacks-utils.$(OBJEXT) src/sstacks-input.$(OBJEXT) sstacks_OBJECTS = $(am_sstacks_OBJECTS) sstacks_LDADD = $(LDADD) sstacks_LINK = $(CXXLD) $(sstacks_CXXFLAGS) $(CXXFLAGS) \ $(sstacks_LDFLAGS) $(LDFLAGS) -o $@ am_ustacks_OBJECTS = src/ustacks-ustacks.$(OBJEXT) \ src/ustacks-stacks.$(OBJEXT) src/ustacks-mstack.$(OBJEXT) \ src/ustacks-mst.$(OBJEXT) src/ustacks-cmb.$(OBJEXT) \ src/ustacks-DNASeq.$(OBJEXT) src/ustacks-DNANSeq.$(OBJEXT) \ src/ustacks-models.$(OBJEXT) src/ustacks-utils.$(OBJEXT) \ src/ustacks-kmers.$(OBJEXT) src/ustacks-input.$(OBJEXT) ustacks_OBJECTS = $(am_ustacks_OBJECTS) ustacks_LDADD = $(LDADD) ustacks_LINK = $(CXXLD) $(ustacks_CXXFLAGS) $(CXXFLAGS) \ $(ustacks_LDFLAGS) $(LDFLAGS) -o $@ am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } SCRIPTS = $(dist_bin_SCRIPTS) $(dist_noinst_SCRIPTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/config/depcomp am__depfiles_maybe = depfiles am__mv = mv -f AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) AM_V_CXX = $(am__v_CXX_@AM_V@) am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) am__v_CXX_0 = @echo " CXX " $@; am__v_CXX_1 = CXXLD = $(CXX) CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ -o $@ AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) am__v_CXXLD_0 = @echo " CXXLD " $@; am__v_CXXLD_1 = COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(clone_filter_SOURCES) $(cstacks_SOURCES) \ $(estacks_SOURCES) $(genotypes_SOURCES) $(hstacks_SOURCES) \ $(kmer_filter_SOURCES) $(phasedstacks_SOURCES) \ $(populations_SOURCES) $(process_radtags_SOURCES) \ $(process_shortreads_SOURCES) $(pstacks_SOURCES) \ $(rxstacks_SOURCES) $(sstacks_SOURCES) $(ustacks_SOURCES) DIST_SOURCES = $(clone_filter_SOURCES) $(cstacks_SOURCES) \ $(estacks_SOURCES) $(genotypes_SOURCES) $(hstacks_SOURCES) \ $(kmer_filter_SOURCES) $(phasedstacks_SOURCES) \ $(populations_SOURCES) $(process_radtags_SOURCES) \ $(process_shortreads_SOURCES) $(pstacks_SOURCES) \ $(rxstacks_SOURCES) $(sstacks_SOURCES) $(ustacks_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac DATA = $(nobase_pkgdata_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \ $(LISP)config.h.in # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` ETAGS = etags CTAGS = ctags CSCOPE = cscope AM_RECURSIVE_TARGETS = cscope check recheck am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/config/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ if test -d "$(distdir)"; then \ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -rf "$(distdir)" \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi am__post_remove_distdir = $(am__remove_distdir) DIST_ARCHIVES = $(distdir).tar.gz GZIP_ENV = --best DIST_TARGETS = dist-gzip distuninstallcheck_listfiles = find . -type f -print am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' distcleancheck_listfiles = find . -type f -print ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BAM_CFLAGS = @BAM_CFLAGS@ BAM_LIBS = @BAM_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SPARSEHASH_CFLAGS = @SPARSEHASH_CFLAGS@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build_alias = @build_alias@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host_alias = @host_alias@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ ustacks_SOURCES = src/ustacks.h src/ustacks.cc src/stacks.h src/stacks.cc src/constants.h \ src/mstack.h src/mstack.cc src/mst.h src/mst.cc src/cmb.h src/cmb.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/models.h src/models.cc src/utils.h src/utils.cc \ src/kmers.h src/kmers.cc \ src/input.h src/input.cc src/Tsv.h src/BowtieI.h src/FastaI.h src/FastqI.h src/SamI.h \ src/gzFasta.h src/gzFastq.h pstacks_SOURCES = src/pstacks.h src/pstacks.cc src/stacks.h src/stacks.cc src/constants.h \ src/mstack.h src/mstack.cc \ src/DNANSeq.h src/DNANSeq.cc src/DNASeq.h src/DNASeq.cc \ src/models.h src/models.cc src/utils.h src/utils.cc \ src/input.h src/input.cc src/Tsv.h src/BowtieI.h src/FastaI.h src/FastqI.h src/SamI.h \ src/BamI.h estacks_SOURCES = src/estacks.h src/estacks.cc src/stacks.h src/stacks.cc src/constants.h \ src/mstack.h src/mstack.cc \ src/models.h src/models.cc src/utils.h src/utils.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/input.h src/input.cc src/Tsv.h src/BowtieI.h src/FastaI.h src/FastqI.h src/SamI.h cstacks_SOURCES = src/cstacks.h src/cstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/kmers.h src/kmers.cc src/utils.h src/utils.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/input.h src/input.cc src/sql_utilities.h hstacks_SOURCES = src/hstacks.h src/hstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/kmers.h src/kmers.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/utils.h src/utils.cc \ src/input.h src/input.cc src/sql_utilities.h sstacks_SOURCES = src/sstacks.h src/sstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc src/utils.h src/utils.cc \ src/input.h src/input.cc src/sql_utilities.h rxstacks_SOURCES = src/rxstacks.h src/rxstacks.cc src/constants.h \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNANSeq.h src/DNANSeq.cc src/DNASeq.h src/DNASeq.cc \ src/mst.h src/mst.cc \ src/models.h src/models.cc \ src/utils.h src/utils.cc \ src/input.h src/input.cc src/sql_utilities.h process_radtags_SOURCES = src/process_radtags.h src/process_radtags.cc src/constants.h \ src/utils.h src/utils.cc src/log_utils.h src/log_utils.cc \ src/write.h src/write.cc \ src/clean.h src/clean.cc \ src/file_io.h src/file_io.cc \ src/input.h src/input.cc src/BustardI.h src/BamUnalignedI.h src/FastqI.h src/gzFastq.h \ src/renz.h process_shortreads_SOURCES = src/process_shortreads.h src/process_shortreads.cc src/constants.h \ src/clean.h src/clean.cc \ src/file_io.h src/file_io.cc \ src/utils.h src/utils.cc src/log_utils.h src/log_utils.cc \ src/write.h src/write.cc \ src/kmers.h src/kmers.cc \ src/input.h src/input.cc src/BustardI.h src/BamUnalignedI.h src/FastqI.h src/gzFastq.h kmer_filter_SOURCES = src/kmer_filter.h src/kmer_filter.cc src/constants.h \ src/utils.h src/utils.cc \ src/write.h src/write.cc \ src/kmers.h src/kmers.cc \ src/input.h src/input.cc src/BustardI.h src/FastqI.h src/FastaI.h \ src/gzFastq.h src/gzFasta.h clone_filter_SOURCES = src/clone_filter.h src/clone_filter.cc src/constants.h \ src/kmers.h src/kmers.cc \ src/utils.h src/utils.cc \ src/write.h src/write.cc \ src/clean.h src/clean.cc src/file_io.h src/file_io.cc \ src/input.h src/input.cc src/BustardI.h src/FastqI.h src/FastaI.h \ src/gzFastq.h src/gzFasta.h genotypes_SOURCES = src/genotypes.h src/genotypes.cc src/constants.h \ src/utils.h src/utils.cc src/catalog_utils.h src/catalog_utils.cc \ src/log_utils.h src/log_utils.cc \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/PopMap.h src/genotype_dictionaries.h \ src/input.h src/input.cc src/sql_utilities.h src/renz.h populations_SOURCES = src/populations.h src/populations.cc src/constants.h \ src/utils.h src/utils.cc src/catalog_utils.h src/catalog_utils.cc \ src/log_utils.h src/log_utils.cc \ src/stacks.h src/stacks.cc src/locus.h src/locus.cc \ src/DNASeq.h src/DNASeq.cc src/DNANSeq.h src/DNANSeq.cc \ src/PopMap.h src/PopSum.h src/genotype_dictionaries.h \ src/input.h src/input.cc src/sql_utilities.h src/renz.h \ src/bootstrap.h src/ordered.h src/smoothing.h src/smoothing_utils.h phasedstacks_SOURCES = src/phasedstacks.h src/phasedstacks.cc src/constants.h \ src/locus.h src/locus.cc \ src/input.h src/input.cc src/sql_utilities.h \ src/log_utils.h src/log_utils.cc \ src/utils.h src/utils.cc src/catalog_utils.h src/catalog_utils.cc ustacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) ustacks_LDFLAGS = $(OPENMP_CFLAGS) pstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS) $(SPARSEHASH_CFLAGS) pstacks_LDFLAGS = $(OPENMP_CFLAGS) pstacks_LDADD = $(BAM_LIBS) estacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) estacks_LDFLAGS = $(OPENMP_CFLAGS) cstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) cstacks_LDFLAGS = $(OPENMP_CFLAGS) hstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) hstacks_LDFLAGS = $(OPENMP_CFLAGS) sstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) sstacks_LDFLAGS = $(OPENMP_CFLAGS) rxstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) rxstacks_LDFLAGS = $(OPENMP_CFLAGS) process_radtags_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) $(BAM_CFLAGS) process_radtags_LDFLAGS = $(OPENMP_CFLAGS) process_radtags_LDADD = $(BAM_LIBS) process_shortreads_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) $(BAM_CFLAGS) process_shortreads_LDFLAGS = $(OPENMP_CFLAGS) process_shortreads_LDADD = $(BAM_LIBS) kmer_filter_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) kmer_filter_LDFLAGS = $(OPENMP_CFLAGS) genotypes_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) genotypes_LDFLAGS = $(OPENMP_CFLAGS) populations_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) populations_LDFLAGS = $(OPENMP_CFLAGS) phasedstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) phasedstacks_LDFLAGS = $(OPENMP_CFLAGS) clone_filter_CXXFLAGS = $(OPENMP_CFLAGS) $(SPARSEHASH_CFLAGS) $(BAM_CFLAGS) clone_filter_LDFLAGS = $(OPENMP_CFLAGS) clone_filter_LDADD = $(BAM_LIBS) dist_bin_SCRIPTS = scripts/denovo_map.pl scripts/ref_map.pl scripts/export_sql.pl \ scripts/sort_read_pairs.pl scripts/exec_velvet.pl scripts/load_sequences.pl \ scripts/index_radtags.pl scripts/load_radtags.pl scripts/stacks_export_notify.pl dist_noinst_SCRIPTS = autogen.sh scripts/extract_interpop_chars.pl scripts/convert_stacks.pl nobase_pkgdata_DATA = sql/mysql.cnf.dist sql/catalog_index.sql sql/stacks.sql sql/tag_index.sql sql/chr_index.sql \ php/CatalogClass.php php/annotate_marker.php php/constants.php.dist php/index.php php/tags.php \ php/Locus.php php/catalog.php php/correct_genotypes.php php/correct_genotype.php php/export_batch.php php/last_modified.php \ php/version.php php/catalog_genotypes.php php/db_functions.php php/header.php php/samples.php \ php/stacks_functions.php php/view_sequence.php php/sequence_blast.php \ php/pop_view.php php/sumstat_view.php php/hapstat_view.php php/fst_view.php php/phist_view.php php/stack_view.php \ php/population_view.js php/ajax.js php/annotate.js php/stacks.js php/export.js php/stacks.css \ php/images/caret-d.png php/images/caret-u.png php/images/excel_icon.png php/images/l-arrow-disabled.png \ php/images/l-arrow.png php/images/r-arrow-disabled.png php/images/r-arrow.png php/images/stacks_bg.png \ php/images/stacks_logo_rev_small.png LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) $(abs_top_srcdir)/tests/tap-driver.sh TESTS = tests/process_radtags.t tests/kmer_filter.t tests/ustacks.t tests/pstacks.t EXTRA_DIST = $(nobase_pkgdata_DATA) LICENSE INSTALL README ChangeLog $(TESTS) pkglocalstatedir = $(localstatedir)/$(PACKAGE) all: config.h $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .cc .log .o .obj .test .test$(EXEEXT) .trs am--refresh: Makefile @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): config.h: stamp-h1 @test -f $@ || rm -f stamp-h1 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status config.h $(srcdir)/config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f stamp-h1 touch $@ distclean-hdr: -rm -f config.h stamp-h1 install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(bindir)" && rm -f $$files clean-binPROGRAMS: -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) src/$(am__dirstamp): @$(MKDIR_P) src @: > src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) src/$(DEPDIR) @: > src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-clone_filter.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-kmers.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-write.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-clean.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-file_io.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/clone_filter-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) clone_filter$(EXEEXT): $(clone_filter_OBJECTS) $(clone_filter_DEPENDENCIES) $(EXTRA_clone_filter_DEPENDENCIES) @rm -f clone_filter$(EXEEXT) $(AM_V_CXXLD)$(clone_filter_LINK) $(clone_filter_OBJECTS) $(clone_filter_LDADD) $(LIBS) src/cstacks-cstacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-kmers.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/cstacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) cstacks$(EXEEXT): $(cstacks_OBJECTS) $(cstacks_DEPENDENCIES) $(EXTRA_cstacks_DEPENDENCIES) @rm -f cstacks$(EXEEXT) $(AM_V_CXXLD)$(cstacks_LINK) $(cstacks_OBJECTS) $(cstacks_LDADD) $(LIBS) src/estacks-estacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-mstack.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-models.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/estacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) estacks$(EXEEXT): $(estacks_OBJECTS) $(estacks_DEPENDENCIES) $(EXTRA_estacks_DEPENDENCIES) @rm -f estacks$(EXEEXT) $(AM_V_CXXLD)$(estacks_LINK) $(estacks_OBJECTS) $(estacks_LDADD) $(LIBS) src/genotypes-genotypes.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-catalog_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-log_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/genotypes-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) genotypes$(EXEEXT): $(genotypes_OBJECTS) $(genotypes_DEPENDENCIES) $(EXTRA_genotypes_DEPENDENCIES) @rm -f genotypes$(EXEEXT) $(AM_V_CXXLD)$(genotypes_LINK) $(genotypes_OBJECTS) $(genotypes_LDADD) $(LIBS) src/hstacks-hstacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-kmers.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/hstacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) hstacks$(EXEEXT): $(hstacks_OBJECTS) $(hstacks_DEPENDENCIES) $(EXTRA_hstacks_DEPENDENCIES) @rm -f hstacks$(EXEEXT) $(AM_V_CXXLD)$(hstacks_LINK) $(hstacks_OBJECTS) $(hstacks_LDADD) $(LIBS) src/kmer_filter-kmer_filter.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/kmer_filter-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/kmer_filter-write.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/kmer_filter-kmers.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/kmer_filter-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) kmer_filter$(EXEEXT): $(kmer_filter_OBJECTS) $(kmer_filter_DEPENDENCIES) $(EXTRA_kmer_filter_DEPENDENCIES) @rm -f kmer_filter$(EXEEXT) $(AM_V_CXXLD)$(kmer_filter_LINK) $(kmer_filter_OBJECTS) $(kmer_filter_LDADD) $(LIBS) src/phasedstacks-phasedstacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/phasedstacks-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/phasedstacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/phasedstacks-log_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/phasedstacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/phasedstacks-catalog_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) phasedstacks$(EXEEXT): $(phasedstacks_OBJECTS) $(phasedstacks_DEPENDENCIES) $(EXTRA_phasedstacks_DEPENDENCIES) @rm -f phasedstacks$(EXEEXT) $(AM_V_CXXLD)$(phasedstacks_LINK) $(phasedstacks_OBJECTS) $(phasedstacks_LDADD) $(LIBS) src/populations-populations.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-catalog_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-log_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/populations-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) populations$(EXEEXT): $(populations_OBJECTS) $(populations_DEPENDENCIES) $(EXTRA_populations_DEPENDENCIES) @rm -f populations$(EXEEXT) $(AM_V_CXXLD)$(populations_LINK) $(populations_OBJECTS) $(populations_LDADD) $(LIBS) src/process_radtags-process_radtags.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_radtags-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_radtags-log_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_radtags-write.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_radtags-clean.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_radtags-file_io.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_radtags-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) process_radtags$(EXEEXT): $(process_radtags_OBJECTS) $(process_radtags_DEPENDENCIES) $(EXTRA_process_radtags_DEPENDENCIES) @rm -f process_radtags$(EXEEXT) $(AM_V_CXXLD)$(process_radtags_LINK) $(process_radtags_OBJECTS) $(process_radtags_LDADD) $(LIBS) src/process_shortreads-process_shortreads.$(OBJEXT): \ src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-clean.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-file_io.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-log_utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-write.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-kmers.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/process_shortreads-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) process_shortreads$(EXEEXT): $(process_shortreads_OBJECTS) $(process_shortreads_DEPENDENCIES) $(EXTRA_process_shortreads_DEPENDENCIES) @rm -f process_shortreads$(EXEEXT) $(AM_V_CXXLD)$(process_shortreads_LINK) $(process_shortreads_OBJECTS) $(process_shortreads_LDADD) $(LIBS) src/pstacks-pstacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-mstack.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-models.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/pstacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) pstacks$(EXEEXT): $(pstacks_OBJECTS) $(pstacks_DEPENDENCIES) $(EXTRA_pstacks_DEPENDENCIES) @rm -f pstacks$(EXEEXT) $(AM_V_CXXLD)$(pstacks_LINK) $(pstacks_OBJECTS) $(pstacks_LDADD) $(LIBS) src/rxstacks-rxstacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-mst.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-models.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/rxstacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) rxstacks$(EXEEXT): $(rxstacks_OBJECTS) $(rxstacks_DEPENDENCIES) $(EXTRA_rxstacks_DEPENDENCIES) @rm -f rxstacks$(EXEEXT) $(AM_V_CXXLD)$(rxstacks_LINK) $(rxstacks_OBJECTS) $(rxstacks_LDADD) $(LIBS) src/sstacks-sstacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/sstacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/sstacks-locus.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/sstacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/sstacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/sstacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/sstacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) sstacks$(EXEEXT): $(sstacks_OBJECTS) $(sstacks_DEPENDENCIES) $(EXTRA_sstacks_DEPENDENCIES) @rm -f sstacks$(EXEEXT) $(AM_V_CXXLD)$(sstacks_LINK) $(sstacks_OBJECTS) $(sstacks_LDADD) $(LIBS) src/ustacks-ustacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-stacks.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-mstack.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-mst.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-cmb.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-DNASeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-DNANSeq.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-models.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-utils.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-kmers.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/ustacks-input.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) ustacks$(EXEEXT): $(ustacks_OBJECTS) $(ustacks_DEPENDENCIES) $(EXTRA_ustacks_DEPENDENCIES) @rm -f ustacks$(EXEEXT) $(AM_V_CXXLD)$(ustacks_LINK) $(ustacks_OBJECTS) $(ustacks_LDADD) $(LIBS) install-dist_binSCRIPTS: $(dist_bin_SCRIPTS) @$(NORMAL_INSTALL) @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n' \ -e 'h;s|.*|.|' \ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) { files[d] = files[d] " " $$1; \ if (++n[d] == $(am__install_max)) { \ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ else { print "f", d "/" $$4, $$1 } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-dist_binSCRIPTS: @$(NORMAL_UNINSTALL) @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 's,.*/,,;$(transform)'`; \ dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f src/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-clean.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-clone_filter.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-file_io.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-kmers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/clone_filter-write.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-cstacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-kmers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/cstacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-estacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-models.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-mstack.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/estacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-catalog_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-genotypes.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-log_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/genotypes-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-hstacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-kmers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/hstacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/kmer_filter-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/kmer_filter-kmer_filter.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/kmer_filter-kmers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/kmer_filter-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/kmer_filter-write.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/phasedstacks-catalog_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/phasedstacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/phasedstacks-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/phasedstacks-log_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/phasedstacks-phasedstacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/phasedstacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-catalog_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-log_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-populations.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/populations-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-clean.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-file_io.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-log_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-process_radtags.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_radtags-write.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-clean.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-file_io.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-kmers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-log_utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-process_shortreads.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/process_shortreads-write.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-models.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-mstack.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-pstacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pstacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-models.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-mst.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-rxstacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/rxstacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-locus.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-sstacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/sstacks-utils.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-DNANSeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-DNASeq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-cmb.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-input.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-kmers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-models.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-mst.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-mstack.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-stacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-ustacks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/ustacks-utils.Po@am__quote@ .cc.o: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< .cc.obj: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` src/clone_filter-clone_filter.o: src/clone_filter.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-clone_filter.o -MD -MP -MF src/$(DEPDIR)/clone_filter-clone_filter.Tpo -c -o src/clone_filter-clone_filter.o `test -f 'src/clone_filter.cc' || echo '$(srcdir)/'`src/clone_filter.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-clone_filter.Tpo src/$(DEPDIR)/clone_filter-clone_filter.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clone_filter.cc' object='src/clone_filter-clone_filter.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-clone_filter.o `test -f 'src/clone_filter.cc' || echo '$(srcdir)/'`src/clone_filter.cc src/clone_filter-clone_filter.obj: src/clone_filter.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-clone_filter.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-clone_filter.Tpo -c -o src/clone_filter-clone_filter.obj `if test -f 'src/clone_filter.cc'; then $(CYGPATH_W) 'src/clone_filter.cc'; else $(CYGPATH_W) '$(srcdir)/src/clone_filter.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-clone_filter.Tpo src/$(DEPDIR)/clone_filter-clone_filter.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clone_filter.cc' object='src/clone_filter-clone_filter.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-clone_filter.obj `if test -f 'src/clone_filter.cc'; then $(CYGPATH_W) 'src/clone_filter.cc'; else $(CYGPATH_W) '$(srcdir)/src/clone_filter.cc'; fi` src/clone_filter-kmers.o: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-kmers.o -MD -MP -MF src/$(DEPDIR)/clone_filter-kmers.Tpo -c -o src/clone_filter-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-kmers.Tpo src/$(DEPDIR)/clone_filter-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/clone_filter-kmers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc src/clone_filter-kmers.obj: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-kmers.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-kmers.Tpo -c -o src/clone_filter-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-kmers.Tpo src/$(DEPDIR)/clone_filter-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/clone_filter-kmers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` src/clone_filter-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-utils.o -MD -MP -MF src/$(DEPDIR)/clone_filter-utils.Tpo -c -o src/clone_filter-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-utils.Tpo src/$(DEPDIR)/clone_filter-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/clone_filter-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/clone_filter-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-utils.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-utils.Tpo -c -o src/clone_filter-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-utils.Tpo src/$(DEPDIR)/clone_filter-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/clone_filter-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/clone_filter-write.o: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-write.o -MD -MP -MF src/$(DEPDIR)/clone_filter-write.Tpo -c -o src/clone_filter-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-write.Tpo src/$(DEPDIR)/clone_filter-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/clone_filter-write.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc src/clone_filter-write.obj: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-write.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-write.Tpo -c -o src/clone_filter-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-write.Tpo src/$(DEPDIR)/clone_filter-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/clone_filter-write.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` src/clone_filter-clean.o: src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-clean.o -MD -MP -MF src/$(DEPDIR)/clone_filter-clean.Tpo -c -o src/clone_filter-clean.o `test -f 'src/clean.cc' || echo '$(srcdir)/'`src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-clean.Tpo src/$(DEPDIR)/clone_filter-clean.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clean.cc' object='src/clone_filter-clean.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-clean.o `test -f 'src/clean.cc' || echo '$(srcdir)/'`src/clean.cc src/clone_filter-clean.obj: src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-clean.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-clean.Tpo -c -o src/clone_filter-clean.obj `if test -f 'src/clean.cc'; then $(CYGPATH_W) 'src/clean.cc'; else $(CYGPATH_W) '$(srcdir)/src/clean.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-clean.Tpo src/$(DEPDIR)/clone_filter-clean.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clean.cc' object='src/clone_filter-clean.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-clean.obj `if test -f 'src/clean.cc'; then $(CYGPATH_W) 'src/clean.cc'; else $(CYGPATH_W) '$(srcdir)/src/clean.cc'; fi` src/clone_filter-file_io.o: src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-file_io.o -MD -MP -MF src/$(DEPDIR)/clone_filter-file_io.Tpo -c -o src/clone_filter-file_io.o `test -f 'src/file_io.cc' || echo '$(srcdir)/'`src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-file_io.Tpo src/$(DEPDIR)/clone_filter-file_io.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/file_io.cc' object='src/clone_filter-file_io.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-file_io.o `test -f 'src/file_io.cc' || echo '$(srcdir)/'`src/file_io.cc src/clone_filter-file_io.obj: src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-file_io.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-file_io.Tpo -c -o src/clone_filter-file_io.obj `if test -f 'src/file_io.cc'; then $(CYGPATH_W) 'src/file_io.cc'; else $(CYGPATH_W) '$(srcdir)/src/file_io.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-file_io.Tpo src/$(DEPDIR)/clone_filter-file_io.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/file_io.cc' object='src/clone_filter-file_io.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-file_io.obj `if test -f 'src/file_io.cc'; then $(CYGPATH_W) 'src/file_io.cc'; else $(CYGPATH_W) '$(srcdir)/src/file_io.cc'; fi` src/clone_filter-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-input.o -MD -MP -MF src/$(DEPDIR)/clone_filter-input.Tpo -c -o src/clone_filter-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-input.Tpo src/$(DEPDIR)/clone_filter-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/clone_filter-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/clone_filter-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -MT src/clone_filter-input.obj -MD -MP -MF src/$(DEPDIR)/clone_filter-input.Tpo -c -o src/clone_filter-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/clone_filter-input.Tpo src/$(DEPDIR)/clone_filter-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/clone_filter-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(clone_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/clone_filter-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/cstacks-cstacks.o: src/cstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-cstacks.o -MD -MP -MF src/$(DEPDIR)/cstacks-cstacks.Tpo -c -o src/cstacks-cstacks.o `test -f 'src/cstacks.cc' || echo '$(srcdir)/'`src/cstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-cstacks.Tpo src/$(DEPDIR)/cstacks-cstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/cstacks.cc' object='src/cstacks-cstacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-cstacks.o `test -f 'src/cstacks.cc' || echo '$(srcdir)/'`src/cstacks.cc src/cstacks-cstacks.obj: src/cstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-cstacks.obj -MD -MP -MF src/$(DEPDIR)/cstacks-cstacks.Tpo -c -o src/cstacks-cstacks.obj `if test -f 'src/cstacks.cc'; then $(CYGPATH_W) 'src/cstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/cstacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-cstacks.Tpo src/$(DEPDIR)/cstacks-cstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/cstacks.cc' object='src/cstacks-cstacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-cstacks.obj `if test -f 'src/cstacks.cc'; then $(CYGPATH_W) 'src/cstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/cstacks.cc'; fi` src/cstacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-stacks.o -MD -MP -MF src/$(DEPDIR)/cstacks-stacks.Tpo -c -o src/cstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-stacks.Tpo src/$(DEPDIR)/cstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/cstacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/cstacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/cstacks-stacks.Tpo -c -o src/cstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-stacks.Tpo src/$(DEPDIR)/cstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/cstacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/cstacks-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-locus.o -MD -MP -MF src/$(DEPDIR)/cstacks-locus.Tpo -c -o src/cstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-locus.Tpo src/$(DEPDIR)/cstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/cstacks-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/cstacks-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-locus.obj -MD -MP -MF src/$(DEPDIR)/cstacks-locus.Tpo -c -o src/cstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-locus.Tpo src/$(DEPDIR)/cstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/cstacks-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/cstacks-kmers.o: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-kmers.o -MD -MP -MF src/$(DEPDIR)/cstacks-kmers.Tpo -c -o src/cstacks-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-kmers.Tpo src/$(DEPDIR)/cstacks-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/cstacks-kmers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc src/cstacks-kmers.obj: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-kmers.obj -MD -MP -MF src/$(DEPDIR)/cstacks-kmers.Tpo -c -o src/cstacks-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-kmers.Tpo src/$(DEPDIR)/cstacks-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/cstacks-kmers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` src/cstacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-utils.o -MD -MP -MF src/$(DEPDIR)/cstacks-utils.Tpo -c -o src/cstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-utils.Tpo src/$(DEPDIR)/cstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/cstacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/cstacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-utils.obj -MD -MP -MF src/$(DEPDIR)/cstacks-utils.Tpo -c -o src/cstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-utils.Tpo src/$(DEPDIR)/cstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/cstacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/cstacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/cstacks-DNASeq.Tpo -c -o src/cstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-DNASeq.Tpo src/$(DEPDIR)/cstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/cstacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/cstacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/cstacks-DNASeq.Tpo -c -o src/cstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-DNASeq.Tpo src/$(DEPDIR)/cstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/cstacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/cstacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/cstacks-DNANSeq.Tpo -c -o src/cstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-DNANSeq.Tpo src/$(DEPDIR)/cstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/cstacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/cstacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/cstacks-DNANSeq.Tpo -c -o src/cstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-DNANSeq.Tpo src/$(DEPDIR)/cstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/cstacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/cstacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-input.o -MD -MP -MF src/$(DEPDIR)/cstacks-input.Tpo -c -o src/cstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-input.Tpo src/$(DEPDIR)/cstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/cstacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/cstacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -MT src/cstacks-input.obj -MD -MP -MF src/$(DEPDIR)/cstacks-input.Tpo -c -o src/cstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/cstacks-input.Tpo src/$(DEPDIR)/cstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/cstacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/cstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/estacks-estacks.o: src/estacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-estacks.o -MD -MP -MF src/$(DEPDIR)/estacks-estacks.Tpo -c -o src/estacks-estacks.o `test -f 'src/estacks.cc' || echo '$(srcdir)/'`src/estacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-estacks.Tpo src/$(DEPDIR)/estacks-estacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/estacks.cc' object='src/estacks-estacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-estacks.o `test -f 'src/estacks.cc' || echo '$(srcdir)/'`src/estacks.cc src/estacks-estacks.obj: src/estacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-estacks.obj -MD -MP -MF src/$(DEPDIR)/estacks-estacks.Tpo -c -o src/estacks-estacks.obj `if test -f 'src/estacks.cc'; then $(CYGPATH_W) 'src/estacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/estacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-estacks.Tpo src/$(DEPDIR)/estacks-estacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/estacks.cc' object='src/estacks-estacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-estacks.obj `if test -f 'src/estacks.cc'; then $(CYGPATH_W) 'src/estacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/estacks.cc'; fi` src/estacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-stacks.o -MD -MP -MF src/$(DEPDIR)/estacks-stacks.Tpo -c -o src/estacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-stacks.Tpo src/$(DEPDIR)/estacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/estacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/estacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/estacks-stacks.Tpo -c -o src/estacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-stacks.Tpo src/$(DEPDIR)/estacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/estacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/estacks-mstack.o: src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-mstack.o -MD -MP -MF src/$(DEPDIR)/estacks-mstack.Tpo -c -o src/estacks-mstack.o `test -f 'src/mstack.cc' || echo '$(srcdir)/'`src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-mstack.Tpo src/$(DEPDIR)/estacks-mstack.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mstack.cc' object='src/estacks-mstack.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-mstack.o `test -f 'src/mstack.cc' || echo '$(srcdir)/'`src/mstack.cc src/estacks-mstack.obj: src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-mstack.obj -MD -MP -MF src/$(DEPDIR)/estacks-mstack.Tpo -c -o src/estacks-mstack.obj `if test -f 'src/mstack.cc'; then $(CYGPATH_W) 'src/mstack.cc'; else $(CYGPATH_W) '$(srcdir)/src/mstack.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-mstack.Tpo src/$(DEPDIR)/estacks-mstack.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mstack.cc' object='src/estacks-mstack.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-mstack.obj `if test -f 'src/mstack.cc'; then $(CYGPATH_W) 'src/mstack.cc'; else $(CYGPATH_W) '$(srcdir)/src/mstack.cc'; fi` src/estacks-models.o: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-models.o -MD -MP -MF src/$(DEPDIR)/estacks-models.Tpo -c -o src/estacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-models.Tpo src/$(DEPDIR)/estacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/estacks-models.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc src/estacks-models.obj: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-models.obj -MD -MP -MF src/$(DEPDIR)/estacks-models.Tpo -c -o src/estacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-models.Tpo src/$(DEPDIR)/estacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/estacks-models.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` src/estacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-utils.o -MD -MP -MF src/$(DEPDIR)/estacks-utils.Tpo -c -o src/estacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-utils.Tpo src/$(DEPDIR)/estacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/estacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/estacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-utils.obj -MD -MP -MF src/$(DEPDIR)/estacks-utils.Tpo -c -o src/estacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-utils.Tpo src/$(DEPDIR)/estacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/estacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/estacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/estacks-DNASeq.Tpo -c -o src/estacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-DNASeq.Tpo src/$(DEPDIR)/estacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/estacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/estacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/estacks-DNASeq.Tpo -c -o src/estacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-DNASeq.Tpo src/$(DEPDIR)/estacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/estacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/estacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/estacks-DNANSeq.Tpo -c -o src/estacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-DNANSeq.Tpo src/$(DEPDIR)/estacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/estacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/estacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/estacks-DNANSeq.Tpo -c -o src/estacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-DNANSeq.Tpo src/$(DEPDIR)/estacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/estacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/estacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-input.o -MD -MP -MF src/$(DEPDIR)/estacks-input.Tpo -c -o src/estacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-input.Tpo src/$(DEPDIR)/estacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/estacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/estacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -MT src/estacks-input.obj -MD -MP -MF src/$(DEPDIR)/estacks-input.Tpo -c -o src/estacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/estacks-input.Tpo src/$(DEPDIR)/estacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/estacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(estacks_CXXFLAGS) $(CXXFLAGS) -c -o src/estacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/genotypes-genotypes.o: src/genotypes.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-genotypes.o -MD -MP -MF src/$(DEPDIR)/genotypes-genotypes.Tpo -c -o src/genotypes-genotypes.o `test -f 'src/genotypes.cc' || echo '$(srcdir)/'`src/genotypes.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-genotypes.Tpo src/$(DEPDIR)/genotypes-genotypes.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/genotypes.cc' object='src/genotypes-genotypes.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-genotypes.o `test -f 'src/genotypes.cc' || echo '$(srcdir)/'`src/genotypes.cc src/genotypes-genotypes.obj: src/genotypes.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-genotypes.obj -MD -MP -MF src/$(DEPDIR)/genotypes-genotypes.Tpo -c -o src/genotypes-genotypes.obj `if test -f 'src/genotypes.cc'; then $(CYGPATH_W) 'src/genotypes.cc'; else $(CYGPATH_W) '$(srcdir)/src/genotypes.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-genotypes.Tpo src/$(DEPDIR)/genotypes-genotypes.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/genotypes.cc' object='src/genotypes-genotypes.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-genotypes.obj `if test -f 'src/genotypes.cc'; then $(CYGPATH_W) 'src/genotypes.cc'; else $(CYGPATH_W) '$(srcdir)/src/genotypes.cc'; fi` src/genotypes-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-utils.o -MD -MP -MF src/$(DEPDIR)/genotypes-utils.Tpo -c -o src/genotypes-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-utils.Tpo src/$(DEPDIR)/genotypes-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/genotypes-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/genotypes-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-utils.obj -MD -MP -MF src/$(DEPDIR)/genotypes-utils.Tpo -c -o src/genotypes-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-utils.Tpo src/$(DEPDIR)/genotypes-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/genotypes-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/genotypes-catalog_utils.o: src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-catalog_utils.o -MD -MP -MF src/$(DEPDIR)/genotypes-catalog_utils.Tpo -c -o src/genotypes-catalog_utils.o `test -f 'src/catalog_utils.cc' || echo '$(srcdir)/'`src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-catalog_utils.Tpo src/$(DEPDIR)/genotypes-catalog_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/catalog_utils.cc' object='src/genotypes-catalog_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-catalog_utils.o `test -f 'src/catalog_utils.cc' || echo '$(srcdir)/'`src/catalog_utils.cc src/genotypes-catalog_utils.obj: src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-catalog_utils.obj -MD -MP -MF src/$(DEPDIR)/genotypes-catalog_utils.Tpo -c -o src/genotypes-catalog_utils.obj `if test -f 'src/catalog_utils.cc'; then $(CYGPATH_W) 'src/catalog_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/catalog_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-catalog_utils.Tpo src/$(DEPDIR)/genotypes-catalog_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/catalog_utils.cc' object='src/genotypes-catalog_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-catalog_utils.obj `if test -f 'src/catalog_utils.cc'; then $(CYGPATH_W) 'src/catalog_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/catalog_utils.cc'; fi` src/genotypes-log_utils.o: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-log_utils.o -MD -MP -MF src/$(DEPDIR)/genotypes-log_utils.Tpo -c -o src/genotypes-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-log_utils.Tpo src/$(DEPDIR)/genotypes-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/genotypes-log_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc src/genotypes-log_utils.obj: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-log_utils.obj -MD -MP -MF src/$(DEPDIR)/genotypes-log_utils.Tpo -c -o src/genotypes-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-log_utils.Tpo src/$(DEPDIR)/genotypes-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/genotypes-log_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` src/genotypes-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-stacks.o -MD -MP -MF src/$(DEPDIR)/genotypes-stacks.Tpo -c -o src/genotypes-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-stacks.Tpo src/$(DEPDIR)/genotypes-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/genotypes-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/genotypes-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-stacks.obj -MD -MP -MF src/$(DEPDIR)/genotypes-stacks.Tpo -c -o src/genotypes-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-stacks.Tpo src/$(DEPDIR)/genotypes-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/genotypes-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/genotypes-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-locus.o -MD -MP -MF src/$(DEPDIR)/genotypes-locus.Tpo -c -o src/genotypes-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-locus.Tpo src/$(DEPDIR)/genotypes-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/genotypes-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/genotypes-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-locus.obj -MD -MP -MF src/$(DEPDIR)/genotypes-locus.Tpo -c -o src/genotypes-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-locus.Tpo src/$(DEPDIR)/genotypes-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/genotypes-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/genotypes-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-DNASeq.o -MD -MP -MF src/$(DEPDIR)/genotypes-DNASeq.Tpo -c -o src/genotypes-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-DNASeq.Tpo src/$(DEPDIR)/genotypes-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/genotypes-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/genotypes-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/genotypes-DNASeq.Tpo -c -o src/genotypes-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-DNASeq.Tpo src/$(DEPDIR)/genotypes-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/genotypes-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/genotypes-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/genotypes-DNANSeq.Tpo -c -o src/genotypes-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-DNANSeq.Tpo src/$(DEPDIR)/genotypes-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/genotypes-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/genotypes-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/genotypes-DNANSeq.Tpo -c -o src/genotypes-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-DNANSeq.Tpo src/$(DEPDIR)/genotypes-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/genotypes-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/genotypes-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-input.o -MD -MP -MF src/$(DEPDIR)/genotypes-input.Tpo -c -o src/genotypes-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-input.Tpo src/$(DEPDIR)/genotypes-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/genotypes-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/genotypes-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -MT src/genotypes-input.obj -MD -MP -MF src/$(DEPDIR)/genotypes-input.Tpo -c -o src/genotypes-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/genotypes-input.Tpo src/$(DEPDIR)/genotypes-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/genotypes-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(genotypes_CXXFLAGS) $(CXXFLAGS) -c -o src/genotypes-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/hstacks-hstacks.o: src/hstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-hstacks.o -MD -MP -MF src/$(DEPDIR)/hstacks-hstacks.Tpo -c -o src/hstacks-hstacks.o `test -f 'src/hstacks.cc' || echo '$(srcdir)/'`src/hstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-hstacks.Tpo src/$(DEPDIR)/hstacks-hstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/hstacks.cc' object='src/hstacks-hstacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-hstacks.o `test -f 'src/hstacks.cc' || echo '$(srcdir)/'`src/hstacks.cc src/hstacks-hstacks.obj: src/hstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-hstacks.obj -MD -MP -MF src/$(DEPDIR)/hstacks-hstacks.Tpo -c -o src/hstacks-hstacks.obj `if test -f 'src/hstacks.cc'; then $(CYGPATH_W) 'src/hstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/hstacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-hstacks.Tpo src/$(DEPDIR)/hstacks-hstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/hstacks.cc' object='src/hstacks-hstacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-hstacks.obj `if test -f 'src/hstacks.cc'; then $(CYGPATH_W) 'src/hstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/hstacks.cc'; fi` src/hstacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-stacks.o -MD -MP -MF src/$(DEPDIR)/hstacks-stacks.Tpo -c -o src/hstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-stacks.Tpo src/$(DEPDIR)/hstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/hstacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/hstacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/hstacks-stacks.Tpo -c -o src/hstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-stacks.Tpo src/$(DEPDIR)/hstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/hstacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/hstacks-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-locus.o -MD -MP -MF src/$(DEPDIR)/hstacks-locus.Tpo -c -o src/hstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-locus.Tpo src/$(DEPDIR)/hstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/hstacks-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/hstacks-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-locus.obj -MD -MP -MF src/$(DEPDIR)/hstacks-locus.Tpo -c -o src/hstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-locus.Tpo src/$(DEPDIR)/hstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/hstacks-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/hstacks-kmers.o: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-kmers.o -MD -MP -MF src/$(DEPDIR)/hstacks-kmers.Tpo -c -o src/hstacks-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-kmers.Tpo src/$(DEPDIR)/hstacks-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/hstacks-kmers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc src/hstacks-kmers.obj: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-kmers.obj -MD -MP -MF src/$(DEPDIR)/hstacks-kmers.Tpo -c -o src/hstacks-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-kmers.Tpo src/$(DEPDIR)/hstacks-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/hstacks-kmers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` src/hstacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/hstacks-DNASeq.Tpo -c -o src/hstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-DNASeq.Tpo src/$(DEPDIR)/hstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/hstacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/hstacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/hstacks-DNASeq.Tpo -c -o src/hstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-DNASeq.Tpo src/$(DEPDIR)/hstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/hstacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/hstacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/hstacks-DNANSeq.Tpo -c -o src/hstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-DNANSeq.Tpo src/$(DEPDIR)/hstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/hstacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/hstacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/hstacks-DNANSeq.Tpo -c -o src/hstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-DNANSeq.Tpo src/$(DEPDIR)/hstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/hstacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/hstacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-utils.o -MD -MP -MF src/$(DEPDIR)/hstacks-utils.Tpo -c -o src/hstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-utils.Tpo src/$(DEPDIR)/hstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/hstacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/hstacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-utils.obj -MD -MP -MF src/$(DEPDIR)/hstacks-utils.Tpo -c -o src/hstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-utils.Tpo src/$(DEPDIR)/hstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/hstacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/hstacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-input.o -MD -MP -MF src/$(DEPDIR)/hstacks-input.Tpo -c -o src/hstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-input.Tpo src/$(DEPDIR)/hstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/hstacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/hstacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -MT src/hstacks-input.obj -MD -MP -MF src/$(DEPDIR)/hstacks-input.Tpo -c -o src/hstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/hstacks-input.Tpo src/$(DEPDIR)/hstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/hstacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(hstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/hstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/kmer_filter-kmer_filter.o: src/kmer_filter.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-kmer_filter.o -MD -MP -MF src/$(DEPDIR)/kmer_filter-kmer_filter.Tpo -c -o src/kmer_filter-kmer_filter.o `test -f 'src/kmer_filter.cc' || echo '$(srcdir)/'`src/kmer_filter.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-kmer_filter.Tpo src/$(DEPDIR)/kmer_filter-kmer_filter.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmer_filter.cc' object='src/kmer_filter-kmer_filter.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-kmer_filter.o `test -f 'src/kmer_filter.cc' || echo '$(srcdir)/'`src/kmer_filter.cc src/kmer_filter-kmer_filter.obj: src/kmer_filter.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-kmer_filter.obj -MD -MP -MF src/$(DEPDIR)/kmer_filter-kmer_filter.Tpo -c -o src/kmer_filter-kmer_filter.obj `if test -f 'src/kmer_filter.cc'; then $(CYGPATH_W) 'src/kmer_filter.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmer_filter.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-kmer_filter.Tpo src/$(DEPDIR)/kmer_filter-kmer_filter.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmer_filter.cc' object='src/kmer_filter-kmer_filter.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-kmer_filter.obj `if test -f 'src/kmer_filter.cc'; then $(CYGPATH_W) 'src/kmer_filter.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmer_filter.cc'; fi` src/kmer_filter-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-utils.o -MD -MP -MF src/$(DEPDIR)/kmer_filter-utils.Tpo -c -o src/kmer_filter-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-utils.Tpo src/$(DEPDIR)/kmer_filter-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/kmer_filter-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/kmer_filter-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-utils.obj -MD -MP -MF src/$(DEPDIR)/kmer_filter-utils.Tpo -c -o src/kmer_filter-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-utils.Tpo src/$(DEPDIR)/kmer_filter-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/kmer_filter-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/kmer_filter-write.o: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-write.o -MD -MP -MF src/$(DEPDIR)/kmer_filter-write.Tpo -c -o src/kmer_filter-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-write.Tpo src/$(DEPDIR)/kmer_filter-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/kmer_filter-write.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc src/kmer_filter-write.obj: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-write.obj -MD -MP -MF src/$(DEPDIR)/kmer_filter-write.Tpo -c -o src/kmer_filter-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-write.Tpo src/$(DEPDIR)/kmer_filter-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/kmer_filter-write.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` src/kmer_filter-kmers.o: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-kmers.o -MD -MP -MF src/$(DEPDIR)/kmer_filter-kmers.Tpo -c -o src/kmer_filter-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-kmers.Tpo src/$(DEPDIR)/kmer_filter-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/kmer_filter-kmers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc src/kmer_filter-kmers.obj: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-kmers.obj -MD -MP -MF src/$(DEPDIR)/kmer_filter-kmers.Tpo -c -o src/kmer_filter-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-kmers.Tpo src/$(DEPDIR)/kmer_filter-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/kmer_filter-kmers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` src/kmer_filter-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-input.o -MD -MP -MF src/$(DEPDIR)/kmer_filter-input.Tpo -c -o src/kmer_filter-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-input.Tpo src/$(DEPDIR)/kmer_filter-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/kmer_filter-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/kmer_filter-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -MT src/kmer_filter-input.obj -MD -MP -MF src/$(DEPDIR)/kmer_filter-input.Tpo -c -o src/kmer_filter-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/kmer_filter-input.Tpo src/$(DEPDIR)/kmer_filter-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/kmer_filter-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(kmer_filter_CXXFLAGS) $(CXXFLAGS) -c -o src/kmer_filter-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/phasedstacks-phasedstacks.o: src/phasedstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-phasedstacks.o -MD -MP -MF src/$(DEPDIR)/phasedstacks-phasedstacks.Tpo -c -o src/phasedstacks-phasedstacks.o `test -f 'src/phasedstacks.cc' || echo '$(srcdir)/'`src/phasedstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-phasedstacks.Tpo src/$(DEPDIR)/phasedstacks-phasedstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/phasedstacks.cc' object='src/phasedstacks-phasedstacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-phasedstacks.o `test -f 'src/phasedstacks.cc' || echo '$(srcdir)/'`src/phasedstacks.cc src/phasedstacks-phasedstacks.obj: src/phasedstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-phasedstacks.obj -MD -MP -MF src/$(DEPDIR)/phasedstacks-phasedstacks.Tpo -c -o src/phasedstacks-phasedstacks.obj `if test -f 'src/phasedstacks.cc'; then $(CYGPATH_W) 'src/phasedstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/phasedstacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-phasedstacks.Tpo src/$(DEPDIR)/phasedstacks-phasedstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/phasedstacks.cc' object='src/phasedstacks-phasedstacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-phasedstacks.obj `if test -f 'src/phasedstacks.cc'; then $(CYGPATH_W) 'src/phasedstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/phasedstacks.cc'; fi` src/phasedstacks-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-locus.o -MD -MP -MF src/$(DEPDIR)/phasedstacks-locus.Tpo -c -o src/phasedstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-locus.Tpo src/$(DEPDIR)/phasedstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/phasedstacks-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/phasedstacks-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-locus.obj -MD -MP -MF src/$(DEPDIR)/phasedstacks-locus.Tpo -c -o src/phasedstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-locus.Tpo src/$(DEPDIR)/phasedstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/phasedstacks-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/phasedstacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-input.o -MD -MP -MF src/$(DEPDIR)/phasedstacks-input.Tpo -c -o src/phasedstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-input.Tpo src/$(DEPDIR)/phasedstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/phasedstacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/phasedstacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-input.obj -MD -MP -MF src/$(DEPDIR)/phasedstacks-input.Tpo -c -o src/phasedstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-input.Tpo src/$(DEPDIR)/phasedstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/phasedstacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/phasedstacks-log_utils.o: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-log_utils.o -MD -MP -MF src/$(DEPDIR)/phasedstacks-log_utils.Tpo -c -o src/phasedstacks-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-log_utils.Tpo src/$(DEPDIR)/phasedstacks-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/phasedstacks-log_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc src/phasedstacks-log_utils.obj: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-log_utils.obj -MD -MP -MF src/$(DEPDIR)/phasedstacks-log_utils.Tpo -c -o src/phasedstacks-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-log_utils.Tpo src/$(DEPDIR)/phasedstacks-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/phasedstacks-log_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` src/phasedstacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-utils.o -MD -MP -MF src/$(DEPDIR)/phasedstacks-utils.Tpo -c -o src/phasedstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-utils.Tpo src/$(DEPDIR)/phasedstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/phasedstacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/phasedstacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-utils.obj -MD -MP -MF src/$(DEPDIR)/phasedstacks-utils.Tpo -c -o src/phasedstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-utils.Tpo src/$(DEPDIR)/phasedstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/phasedstacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/phasedstacks-catalog_utils.o: src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-catalog_utils.o -MD -MP -MF src/$(DEPDIR)/phasedstacks-catalog_utils.Tpo -c -o src/phasedstacks-catalog_utils.o `test -f 'src/catalog_utils.cc' || echo '$(srcdir)/'`src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-catalog_utils.Tpo src/$(DEPDIR)/phasedstacks-catalog_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/catalog_utils.cc' object='src/phasedstacks-catalog_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-catalog_utils.o `test -f 'src/catalog_utils.cc' || echo '$(srcdir)/'`src/catalog_utils.cc src/phasedstacks-catalog_utils.obj: src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -MT src/phasedstacks-catalog_utils.obj -MD -MP -MF src/$(DEPDIR)/phasedstacks-catalog_utils.Tpo -c -o src/phasedstacks-catalog_utils.obj `if test -f 'src/catalog_utils.cc'; then $(CYGPATH_W) 'src/catalog_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/catalog_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/phasedstacks-catalog_utils.Tpo src/$(DEPDIR)/phasedstacks-catalog_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/catalog_utils.cc' object='src/phasedstacks-catalog_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(phasedstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/phasedstacks-catalog_utils.obj `if test -f 'src/catalog_utils.cc'; then $(CYGPATH_W) 'src/catalog_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/catalog_utils.cc'; fi` src/populations-populations.o: src/populations.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-populations.o -MD -MP -MF src/$(DEPDIR)/populations-populations.Tpo -c -o src/populations-populations.o `test -f 'src/populations.cc' || echo '$(srcdir)/'`src/populations.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-populations.Tpo src/$(DEPDIR)/populations-populations.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/populations.cc' object='src/populations-populations.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-populations.o `test -f 'src/populations.cc' || echo '$(srcdir)/'`src/populations.cc src/populations-populations.obj: src/populations.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-populations.obj -MD -MP -MF src/$(DEPDIR)/populations-populations.Tpo -c -o src/populations-populations.obj `if test -f 'src/populations.cc'; then $(CYGPATH_W) 'src/populations.cc'; else $(CYGPATH_W) '$(srcdir)/src/populations.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-populations.Tpo src/$(DEPDIR)/populations-populations.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/populations.cc' object='src/populations-populations.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-populations.obj `if test -f 'src/populations.cc'; then $(CYGPATH_W) 'src/populations.cc'; else $(CYGPATH_W) '$(srcdir)/src/populations.cc'; fi` src/populations-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-utils.o -MD -MP -MF src/$(DEPDIR)/populations-utils.Tpo -c -o src/populations-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-utils.Tpo src/$(DEPDIR)/populations-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/populations-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/populations-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-utils.obj -MD -MP -MF src/$(DEPDIR)/populations-utils.Tpo -c -o src/populations-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-utils.Tpo src/$(DEPDIR)/populations-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/populations-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/populations-catalog_utils.o: src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-catalog_utils.o -MD -MP -MF src/$(DEPDIR)/populations-catalog_utils.Tpo -c -o src/populations-catalog_utils.o `test -f 'src/catalog_utils.cc' || echo '$(srcdir)/'`src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-catalog_utils.Tpo src/$(DEPDIR)/populations-catalog_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/catalog_utils.cc' object='src/populations-catalog_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-catalog_utils.o `test -f 'src/catalog_utils.cc' || echo '$(srcdir)/'`src/catalog_utils.cc src/populations-catalog_utils.obj: src/catalog_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-catalog_utils.obj -MD -MP -MF src/$(DEPDIR)/populations-catalog_utils.Tpo -c -o src/populations-catalog_utils.obj `if test -f 'src/catalog_utils.cc'; then $(CYGPATH_W) 'src/catalog_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/catalog_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-catalog_utils.Tpo src/$(DEPDIR)/populations-catalog_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/catalog_utils.cc' object='src/populations-catalog_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-catalog_utils.obj `if test -f 'src/catalog_utils.cc'; then $(CYGPATH_W) 'src/catalog_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/catalog_utils.cc'; fi` src/populations-log_utils.o: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-log_utils.o -MD -MP -MF src/$(DEPDIR)/populations-log_utils.Tpo -c -o src/populations-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-log_utils.Tpo src/$(DEPDIR)/populations-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/populations-log_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc src/populations-log_utils.obj: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-log_utils.obj -MD -MP -MF src/$(DEPDIR)/populations-log_utils.Tpo -c -o src/populations-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-log_utils.Tpo src/$(DEPDIR)/populations-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/populations-log_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` src/populations-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-stacks.o -MD -MP -MF src/$(DEPDIR)/populations-stacks.Tpo -c -o src/populations-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-stacks.Tpo src/$(DEPDIR)/populations-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/populations-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/populations-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-stacks.obj -MD -MP -MF src/$(DEPDIR)/populations-stacks.Tpo -c -o src/populations-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-stacks.Tpo src/$(DEPDIR)/populations-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/populations-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/populations-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-locus.o -MD -MP -MF src/$(DEPDIR)/populations-locus.Tpo -c -o src/populations-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-locus.Tpo src/$(DEPDIR)/populations-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/populations-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/populations-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-locus.obj -MD -MP -MF src/$(DEPDIR)/populations-locus.Tpo -c -o src/populations-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-locus.Tpo src/$(DEPDIR)/populations-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/populations-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/populations-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-DNASeq.o -MD -MP -MF src/$(DEPDIR)/populations-DNASeq.Tpo -c -o src/populations-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-DNASeq.Tpo src/$(DEPDIR)/populations-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/populations-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/populations-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/populations-DNASeq.Tpo -c -o src/populations-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-DNASeq.Tpo src/$(DEPDIR)/populations-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/populations-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/populations-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/populations-DNANSeq.Tpo -c -o src/populations-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-DNANSeq.Tpo src/$(DEPDIR)/populations-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/populations-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/populations-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/populations-DNANSeq.Tpo -c -o src/populations-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-DNANSeq.Tpo src/$(DEPDIR)/populations-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/populations-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/populations-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-input.o -MD -MP -MF src/$(DEPDIR)/populations-input.Tpo -c -o src/populations-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-input.Tpo src/$(DEPDIR)/populations-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/populations-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/populations-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -MT src/populations-input.obj -MD -MP -MF src/$(DEPDIR)/populations-input.Tpo -c -o src/populations-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/populations-input.Tpo src/$(DEPDIR)/populations-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/populations-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(populations_CXXFLAGS) $(CXXFLAGS) -c -o src/populations-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/process_radtags-process_radtags.o: src/process_radtags.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-process_radtags.o -MD -MP -MF src/$(DEPDIR)/process_radtags-process_radtags.Tpo -c -o src/process_radtags-process_radtags.o `test -f 'src/process_radtags.cc' || echo '$(srcdir)/'`src/process_radtags.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-process_radtags.Tpo src/$(DEPDIR)/process_radtags-process_radtags.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/process_radtags.cc' object='src/process_radtags-process_radtags.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-process_radtags.o `test -f 'src/process_radtags.cc' || echo '$(srcdir)/'`src/process_radtags.cc src/process_radtags-process_radtags.obj: src/process_radtags.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-process_radtags.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-process_radtags.Tpo -c -o src/process_radtags-process_radtags.obj `if test -f 'src/process_radtags.cc'; then $(CYGPATH_W) 'src/process_radtags.cc'; else $(CYGPATH_W) '$(srcdir)/src/process_radtags.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-process_radtags.Tpo src/$(DEPDIR)/process_radtags-process_radtags.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/process_radtags.cc' object='src/process_radtags-process_radtags.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-process_radtags.obj `if test -f 'src/process_radtags.cc'; then $(CYGPATH_W) 'src/process_radtags.cc'; else $(CYGPATH_W) '$(srcdir)/src/process_radtags.cc'; fi` src/process_radtags-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-utils.o -MD -MP -MF src/$(DEPDIR)/process_radtags-utils.Tpo -c -o src/process_radtags-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-utils.Tpo src/$(DEPDIR)/process_radtags-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/process_radtags-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/process_radtags-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-utils.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-utils.Tpo -c -o src/process_radtags-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-utils.Tpo src/$(DEPDIR)/process_radtags-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/process_radtags-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/process_radtags-log_utils.o: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-log_utils.o -MD -MP -MF src/$(DEPDIR)/process_radtags-log_utils.Tpo -c -o src/process_radtags-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-log_utils.Tpo src/$(DEPDIR)/process_radtags-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/process_radtags-log_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc src/process_radtags-log_utils.obj: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-log_utils.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-log_utils.Tpo -c -o src/process_radtags-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-log_utils.Tpo src/$(DEPDIR)/process_radtags-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/process_radtags-log_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` src/process_radtags-write.o: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-write.o -MD -MP -MF src/$(DEPDIR)/process_radtags-write.Tpo -c -o src/process_radtags-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-write.Tpo src/$(DEPDIR)/process_radtags-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/process_radtags-write.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc src/process_radtags-write.obj: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-write.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-write.Tpo -c -o src/process_radtags-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-write.Tpo src/$(DEPDIR)/process_radtags-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/process_radtags-write.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` src/process_radtags-clean.o: src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-clean.o -MD -MP -MF src/$(DEPDIR)/process_radtags-clean.Tpo -c -o src/process_radtags-clean.o `test -f 'src/clean.cc' || echo '$(srcdir)/'`src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-clean.Tpo src/$(DEPDIR)/process_radtags-clean.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clean.cc' object='src/process_radtags-clean.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-clean.o `test -f 'src/clean.cc' || echo '$(srcdir)/'`src/clean.cc src/process_radtags-clean.obj: src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-clean.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-clean.Tpo -c -o src/process_radtags-clean.obj `if test -f 'src/clean.cc'; then $(CYGPATH_W) 'src/clean.cc'; else $(CYGPATH_W) '$(srcdir)/src/clean.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-clean.Tpo src/$(DEPDIR)/process_radtags-clean.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clean.cc' object='src/process_radtags-clean.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-clean.obj `if test -f 'src/clean.cc'; then $(CYGPATH_W) 'src/clean.cc'; else $(CYGPATH_W) '$(srcdir)/src/clean.cc'; fi` src/process_radtags-file_io.o: src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-file_io.o -MD -MP -MF src/$(DEPDIR)/process_radtags-file_io.Tpo -c -o src/process_radtags-file_io.o `test -f 'src/file_io.cc' || echo '$(srcdir)/'`src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-file_io.Tpo src/$(DEPDIR)/process_radtags-file_io.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/file_io.cc' object='src/process_radtags-file_io.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-file_io.o `test -f 'src/file_io.cc' || echo '$(srcdir)/'`src/file_io.cc src/process_radtags-file_io.obj: src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-file_io.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-file_io.Tpo -c -o src/process_radtags-file_io.obj `if test -f 'src/file_io.cc'; then $(CYGPATH_W) 'src/file_io.cc'; else $(CYGPATH_W) '$(srcdir)/src/file_io.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-file_io.Tpo src/$(DEPDIR)/process_radtags-file_io.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/file_io.cc' object='src/process_radtags-file_io.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-file_io.obj `if test -f 'src/file_io.cc'; then $(CYGPATH_W) 'src/file_io.cc'; else $(CYGPATH_W) '$(srcdir)/src/file_io.cc'; fi` src/process_radtags-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-input.o -MD -MP -MF src/$(DEPDIR)/process_radtags-input.Tpo -c -o src/process_radtags-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-input.Tpo src/$(DEPDIR)/process_radtags-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/process_radtags-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/process_radtags-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -MT src/process_radtags-input.obj -MD -MP -MF src/$(DEPDIR)/process_radtags-input.Tpo -c -o src/process_radtags-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_radtags-input.Tpo src/$(DEPDIR)/process_radtags-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/process_radtags-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_radtags_CXXFLAGS) $(CXXFLAGS) -c -o src/process_radtags-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/process_shortreads-process_shortreads.o: src/process_shortreads.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-process_shortreads.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-process_shortreads.Tpo -c -o src/process_shortreads-process_shortreads.o `test -f 'src/process_shortreads.cc' || echo '$(srcdir)/'`src/process_shortreads.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-process_shortreads.Tpo src/$(DEPDIR)/process_shortreads-process_shortreads.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/process_shortreads.cc' object='src/process_shortreads-process_shortreads.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-process_shortreads.o `test -f 'src/process_shortreads.cc' || echo '$(srcdir)/'`src/process_shortreads.cc src/process_shortreads-process_shortreads.obj: src/process_shortreads.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-process_shortreads.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-process_shortreads.Tpo -c -o src/process_shortreads-process_shortreads.obj `if test -f 'src/process_shortreads.cc'; then $(CYGPATH_W) 'src/process_shortreads.cc'; else $(CYGPATH_W) '$(srcdir)/src/process_shortreads.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-process_shortreads.Tpo src/$(DEPDIR)/process_shortreads-process_shortreads.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/process_shortreads.cc' object='src/process_shortreads-process_shortreads.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-process_shortreads.obj `if test -f 'src/process_shortreads.cc'; then $(CYGPATH_W) 'src/process_shortreads.cc'; else $(CYGPATH_W) '$(srcdir)/src/process_shortreads.cc'; fi` src/process_shortreads-clean.o: src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-clean.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-clean.Tpo -c -o src/process_shortreads-clean.o `test -f 'src/clean.cc' || echo '$(srcdir)/'`src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-clean.Tpo src/$(DEPDIR)/process_shortreads-clean.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clean.cc' object='src/process_shortreads-clean.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-clean.o `test -f 'src/clean.cc' || echo '$(srcdir)/'`src/clean.cc src/process_shortreads-clean.obj: src/clean.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-clean.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-clean.Tpo -c -o src/process_shortreads-clean.obj `if test -f 'src/clean.cc'; then $(CYGPATH_W) 'src/clean.cc'; else $(CYGPATH_W) '$(srcdir)/src/clean.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-clean.Tpo src/$(DEPDIR)/process_shortreads-clean.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/clean.cc' object='src/process_shortreads-clean.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-clean.obj `if test -f 'src/clean.cc'; then $(CYGPATH_W) 'src/clean.cc'; else $(CYGPATH_W) '$(srcdir)/src/clean.cc'; fi` src/process_shortreads-file_io.o: src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-file_io.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-file_io.Tpo -c -o src/process_shortreads-file_io.o `test -f 'src/file_io.cc' || echo '$(srcdir)/'`src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-file_io.Tpo src/$(DEPDIR)/process_shortreads-file_io.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/file_io.cc' object='src/process_shortreads-file_io.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-file_io.o `test -f 'src/file_io.cc' || echo '$(srcdir)/'`src/file_io.cc src/process_shortreads-file_io.obj: src/file_io.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-file_io.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-file_io.Tpo -c -o src/process_shortreads-file_io.obj `if test -f 'src/file_io.cc'; then $(CYGPATH_W) 'src/file_io.cc'; else $(CYGPATH_W) '$(srcdir)/src/file_io.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-file_io.Tpo src/$(DEPDIR)/process_shortreads-file_io.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/file_io.cc' object='src/process_shortreads-file_io.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-file_io.obj `if test -f 'src/file_io.cc'; then $(CYGPATH_W) 'src/file_io.cc'; else $(CYGPATH_W) '$(srcdir)/src/file_io.cc'; fi` src/process_shortreads-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-utils.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-utils.Tpo -c -o src/process_shortreads-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-utils.Tpo src/$(DEPDIR)/process_shortreads-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/process_shortreads-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/process_shortreads-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-utils.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-utils.Tpo -c -o src/process_shortreads-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-utils.Tpo src/$(DEPDIR)/process_shortreads-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/process_shortreads-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/process_shortreads-log_utils.o: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-log_utils.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-log_utils.Tpo -c -o src/process_shortreads-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-log_utils.Tpo src/$(DEPDIR)/process_shortreads-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/process_shortreads-log_utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-log_utils.o `test -f 'src/log_utils.cc' || echo '$(srcdir)/'`src/log_utils.cc src/process_shortreads-log_utils.obj: src/log_utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-log_utils.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-log_utils.Tpo -c -o src/process_shortreads-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-log_utils.Tpo src/$(DEPDIR)/process_shortreads-log_utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/log_utils.cc' object='src/process_shortreads-log_utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-log_utils.obj `if test -f 'src/log_utils.cc'; then $(CYGPATH_W) 'src/log_utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/log_utils.cc'; fi` src/process_shortreads-write.o: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-write.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-write.Tpo -c -o src/process_shortreads-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-write.Tpo src/$(DEPDIR)/process_shortreads-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/process_shortreads-write.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-write.o `test -f 'src/write.cc' || echo '$(srcdir)/'`src/write.cc src/process_shortreads-write.obj: src/write.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-write.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-write.Tpo -c -o src/process_shortreads-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-write.Tpo src/$(DEPDIR)/process_shortreads-write.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/write.cc' object='src/process_shortreads-write.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-write.obj `if test -f 'src/write.cc'; then $(CYGPATH_W) 'src/write.cc'; else $(CYGPATH_W) '$(srcdir)/src/write.cc'; fi` src/process_shortreads-kmers.o: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-kmers.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-kmers.Tpo -c -o src/process_shortreads-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-kmers.Tpo src/$(DEPDIR)/process_shortreads-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/process_shortreads-kmers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc src/process_shortreads-kmers.obj: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-kmers.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-kmers.Tpo -c -o src/process_shortreads-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-kmers.Tpo src/$(DEPDIR)/process_shortreads-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/process_shortreads-kmers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` src/process_shortreads-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-input.o -MD -MP -MF src/$(DEPDIR)/process_shortreads-input.Tpo -c -o src/process_shortreads-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-input.Tpo src/$(DEPDIR)/process_shortreads-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/process_shortreads-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/process_shortreads-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -MT src/process_shortreads-input.obj -MD -MP -MF src/$(DEPDIR)/process_shortreads-input.Tpo -c -o src/process_shortreads-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/process_shortreads-input.Tpo src/$(DEPDIR)/process_shortreads-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/process_shortreads-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(process_shortreads_CXXFLAGS) $(CXXFLAGS) -c -o src/process_shortreads-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/pstacks-pstacks.o: src/pstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-pstacks.o -MD -MP -MF src/$(DEPDIR)/pstacks-pstacks.Tpo -c -o src/pstacks-pstacks.o `test -f 'src/pstacks.cc' || echo '$(srcdir)/'`src/pstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-pstacks.Tpo src/$(DEPDIR)/pstacks-pstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/pstacks.cc' object='src/pstacks-pstacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-pstacks.o `test -f 'src/pstacks.cc' || echo '$(srcdir)/'`src/pstacks.cc src/pstacks-pstacks.obj: src/pstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-pstacks.obj -MD -MP -MF src/$(DEPDIR)/pstacks-pstacks.Tpo -c -o src/pstacks-pstacks.obj `if test -f 'src/pstacks.cc'; then $(CYGPATH_W) 'src/pstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/pstacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-pstacks.Tpo src/$(DEPDIR)/pstacks-pstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/pstacks.cc' object='src/pstacks-pstacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-pstacks.obj `if test -f 'src/pstacks.cc'; then $(CYGPATH_W) 'src/pstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/pstacks.cc'; fi` src/pstacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-stacks.o -MD -MP -MF src/$(DEPDIR)/pstacks-stacks.Tpo -c -o src/pstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-stacks.Tpo src/$(DEPDIR)/pstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/pstacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/pstacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/pstacks-stacks.Tpo -c -o src/pstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-stacks.Tpo src/$(DEPDIR)/pstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/pstacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/pstacks-mstack.o: src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-mstack.o -MD -MP -MF src/$(DEPDIR)/pstacks-mstack.Tpo -c -o src/pstacks-mstack.o `test -f 'src/mstack.cc' || echo '$(srcdir)/'`src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-mstack.Tpo src/$(DEPDIR)/pstacks-mstack.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mstack.cc' object='src/pstacks-mstack.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-mstack.o `test -f 'src/mstack.cc' || echo '$(srcdir)/'`src/mstack.cc src/pstacks-mstack.obj: src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-mstack.obj -MD -MP -MF src/$(DEPDIR)/pstacks-mstack.Tpo -c -o src/pstacks-mstack.obj `if test -f 'src/mstack.cc'; then $(CYGPATH_W) 'src/mstack.cc'; else $(CYGPATH_W) '$(srcdir)/src/mstack.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-mstack.Tpo src/$(DEPDIR)/pstacks-mstack.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mstack.cc' object='src/pstacks-mstack.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-mstack.obj `if test -f 'src/mstack.cc'; then $(CYGPATH_W) 'src/mstack.cc'; else $(CYGPATH_W) '$(srcdir)/src/mstack.cc'; fi` src/pstacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/pstacks-DNANSeq.Tpo -c -o src/pstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-DNANSeq.Tpo src/$(DEPDIR)/pstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/pstacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/pstacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/pstacks-DNANSeq.Tpo -c -o src/pstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-DNANSeq.Tpo src/$(DEPDIR)/pstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/pstacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/pstacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/pstacks-DNASeq.Tpo -c -o src/pstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-DNASeq.Tpo src/$(DEPDIR)/pstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/pstacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/pstacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/pstacks-DNASeq.Tpo -c -o src/pstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-DNASeq.Tpo src/$(DEPDIR)/pstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/pstacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/pstacks-models.o: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-models.o -MD -MP -MF src/$(DEPDIR)/pstacks-models.Tpo -c -o src/pstacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-models.Tpo src/$(DEPDIR)/pstacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/pstacks-models.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc src/pstacks-models.obj: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-models.obj -MD -MP -MF src/$(DEPDIR)/pstacks-models.Tpo -c -o src/pstacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-models.Tpo src/$(DEPDIR)/pstacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/pstacks-models.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` src/pstacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-utils.o -MD -MP -MF src/$(DEPDIR)/pstacks-utils.Tpo -c -o src/pstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-utils.Tpo src/$(DEPDIR)/pstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/pstacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/pstacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-utils.obj -MD -MP -MF src/$(DEPDIR)/pstacks-utils.Tpo -c -o src/pstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-utils.Tpo src/$(DEPDIR)/pstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/pstacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/pstacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-input.o -MD -MP -MF src/$(DEPDIR)/pstacks-input.Tpo -c -o src/pstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-input.Tpo src/$(DEPDIR)/pstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/pstacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/pstacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -MT src/pstacks-input.obj -MD -MP -MF src/$(DEPDIR)/pstacks-input.Tpo -c -o src/pstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pstacks-input.Tpo src/$(DEPDIR)/pstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/pstacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/pstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/rxstacks-rxstacks.o: src/rxstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-rxstacks.o -MD -MP -MF src/$(DEPDIR)/rxstacks-rxstacks.Tpo -c -o src/rxstacks-rxstacks.o `test -f 'src/rxstacks.cc' || echo '$(srcdir)/'`src/rxstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-rxstacks.Tpo src/$(DEPDIR)/rxstacks-rxstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/rxstacks.cc' object='src/rxstacks-rxstacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-rxstacks.o `test -f 'src/rxstacks.cc' || echo '$(srcdir)/'`src/rxstacks.cc src/rxstacks-rxstacks.obj: src/rxstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-rxstacks.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-rxstacks.Tpo -c -o src/rxstacks-rxstacks.obj `if test -f 'src/rxstacks.cc'; then $(CYGPATH_W) 'src/rxstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/rxstacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-rxstacks.Tpo src/$(DEPDIR)/rxstacks-rxstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/rxstacks.cc' object='src/rxstacks-rxstacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-rxstacks.obj `if test -f 'src/rxstacks.cc'; then $(CYGPATH_W) 'src/rxstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/rxstacks.cc'; fi` src/rxstacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-stacks.o -MD -MP -MF src/$(DEPDIR)/rxstacks-stacks.Tpo -c -o src/rxstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-stacks.Tpo src/$(DEPDIR)/rxstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/rxstacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/rxstacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-stacks.Tpo -c -o src/rxstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-stacks.Tpo src/$(DEPDIR)/rxstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/rxstacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/rxstacks-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-locus.o -MD -MP -MF src/$(DEPDIR)/rxstacks-locus.Tpo -c -o src/rxstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-locus.Tpo src/$(DEPDIR)/rxstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/rxstacks-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/rxstacks-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-locus.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-locus.Tpo -c -o src/rxstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-locus.Tpo src/$(DEPDIR)/rxstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/rxstacks-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/rxstacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/rxstacks-DNANSeq.Tpo -c -o src/rxstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-DNANSeq.Tpo src/$(DEPDIR)/rxstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/rxstacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/rxstacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-DNANSeq.Tpo -c -o src/rxstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-DNANSeq.Tpo src/$(DEPDIR)/rxstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/rxstacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/rxstacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/rxstacks-DNASeq.Tpo -c -o src/rxstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-DNASeq.Tpo src/$(DEPDIR)/rxstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/rxstacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/rxstacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-DNASeq.Tpo -c -o src/rxstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-DNASeq.Tpo src/$(DEPDIR)/rxstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/rxstacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/rxstacks-mst.o: src/mst.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-mst.o -MD -MP -MF src/$(DEPDIR)/rxstacks-mst.Tpo -c -o src/rxstacks-mst.o `test -f 'src/mst.cc' || echo '$(srcdir)/'`src/mst.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-mst.Tpo src/$(DEPDIR)/rxstacks-mst.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mst.cc' object='src/rxstacks-mst.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-mst.o `test -f 'src/mst.cc' || echo '$(srcdir)/'`src/mst.cc src/rxstacks-mst.obj: src/mst.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-mst.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-mst.Tpo -c -o src/rxstacks-mst.obj `if test -f 'src/mst.cc'; then $(CYGPATH_W) 'src/mst.cc'; else $(CYGPATH_W) '$(srcdir)/src/mst.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-mst.Tpo src/$(DEPDIR)/rxstacks-mst.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mst.cc' object='src/rxstacks-mst.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-mst.obj `if test -f 'src/mst.cc'; then $(CYGPATH_W) 'src/mst.cc'; else $(CYGPATH_W) '$(srcdir)/src/mst.cc'; fi` src/rxstacks-models.o: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-models.o -MD -MP -MF src/$(DEPDIR)/rxstacks-models.Tpo -c -o src/rxstacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-models.Tpo src/$(DEPDIR)/rxstacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/rxstacks-models.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc src/rxstacks-models.obj: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-models.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-models.Tpo -c -o src/rxstacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-models.Tpo src/$(DEPDIR)/rxstacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/rxstacks-models.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` src/rxstacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-utils.o -MD -MP -MF src/$(DEPDIR)/rxstacks-utils.Tpo -c -o src/rxstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-utils.Tpo src/$(DEPDIR)/rxstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/rxstacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/rxstacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-utils.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-utils.Tpo -c -o src/rxstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-utils.Tpo src/$(DEPDIR)/rxstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/rxstacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/rxstacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-input.o -MD -MP -MF src/$(DEPDIR)/rxstacks-input.Tpo -c -o src/rxstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-input.Tpo src/$(DEPDIR)/rxstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/rxstacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/rxstacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -MT src/rxstacks-input.obj -MD -MP -MF src/$(DEPDIR)/rxstacks-input.Tpo -c -o src/rxstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/rxstacks-input.Tpo src/$(DEPDIR)/rxstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/rxstacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rxstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/rxstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/sstacks-sstacks.o: src/sstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-sstacks.o -MD -MP -MF src/$(DEPDIR)/sstacks-sstacks.Tpo -c -o src/sstacks-sstacks.o `test -f 'src/sstacks.cc' || echo '$(srcdir)/'`src/sstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-sstacks.Tpo src/$(DEPDIR)/sstacks-sstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/sstacks.cc' object='src/sstacks-sstacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-sstacks.o `test -f 'src/sstacks.cc' || echo '$(srcdir)/'`src/sstacks.cc src/sstacks-sstacks.obj: src/sstacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-sstacks.obj -MD -MP -MF src/$(DEPDIR)/sstacks-sstacks.Tpo -c -o src/sstacks-sstacks.obj `if test -f 'src/sstacks.cc'; then $(CYGPATH_W) 'src/sstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/sstacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-sstacks.Tpo src/$(DEPDIR)/sstacks-sstacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/sstacks.cc' object='src/sstacks-sstacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-sstacks.obj `if test -f 'src/sstacks.cc'; then $(CYGPATH_W) 'src/sstacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/sstacks.cc'; fi` src/sstacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-stacks.o -MD -MP -MF src/$(DEPDIR)/sstacks-stacks.Tpo -c -o src/sstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-stacks.Tpo src/$(DEPDIR)/sstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/sstacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/sstacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/sstacks-stacks.Tpo -c -o src/sstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-stacks.Tpo src/$(DEPDIR)/sstacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/sstacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/sstacks-locus.o: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-locus.o -MD -MP -MF src/$(DEPDIR)/sstacks-locus.Tpo -c -o src/sstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-locus.Tpo src/$(DEPDIR)/sstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/sstacks-locus.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-locus.o `test -f 'src/locus.cc' || echo '$(srcdir)/'`src/locus.cc src/sstacks-locus.obj: src/locus.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-locus.obj -MD -MP -MF src/$(DEPDIR)/sstacks-locus.Tpo -c -o src/sstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-locus.Tpo src/$(DEPDIR)/sstacks-locus.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/locus.cc' object='src/sstacks-locus.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-locus.obj `if test -f 'src/locus.cc'; then $(CYGPATH_W) 'src/locus.cc'; else $(CYGPATH_W) '$(srcdir)/src/locus.cc'; fi` src/sstacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/sstacks-DNASeq.Tpo -c -o src/sstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-DNASeq.Tpo src/$(DEPDIR)/sstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/sstacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/sstacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/sstacks-DNASeq.Tpo -c -o src/sstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-DNASeq.Tpo src/$(DEPDIR)/sstacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/sstacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/sstacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/sstacks-DNANSeq.Tpo -c -o src/sstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-DNANSeq.Tpo src/$(DEPDIR)/sstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/sstacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/sstacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/sstacks-DNANSeq.Tpo -c -o src/sstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-DNANSeq.Tpo src/$(DEPDIR)/sstacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/sstacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/sstacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-utils.o -MD -MP -MF src/$(DEPDIR)/sstacks-utils.Tpo -c -o src/sstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-utils.Tpo src/$(DEPDIR)/sstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/sstacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/sstacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-utils.obj -MD -MP -MF src/$(DEPDIR)/sstacks-utils.Tpo -c -o src/sstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-utils.Tpo src/$(DEPDIR)/sstacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/sstacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/sstacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-input.o -MD -MP -MF src/$(DEPDIR)/sstacks-input.Tpo -c -o src/sstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-input.Tpo src/$(DEPDIR)/sstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/sstacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/sstacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -MT src/sstacks-input.obj -MD -MP -MF src/$(DEPDIR)/sstacks-input.Tpo -c -o src/sstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/sstacks-input.Tpo src/$(DEPDIR)/sstacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/sstacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sstacks_CXXFLAGS) $(CXXFLAGS) -c -o src/sstacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` src/ustacks-ustacks.o: src/ustacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-ustacks.o -MD -MP -MF src/$(DEPDIR)/ustacks-ustacks.Tpo -c -o src/ustacks-ustacks.o `test -f 'src/ustacks.cc' || echo '$(srcdir)/'`src/ustacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-ustacks.Tpo src/$(DEPDIR)/ustacks-ustacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/ustacks.cc' object='src/ustacks-ustacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-ustacks.o `test -f 'src/ustacks.cc' || echo '$(srcdir)/'`src/ustacks.cc src/ustacks-ustacks.obj: src/ustacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-ustacks.obj -MD -MP -MF src/$(DEPDIR)/ustacks-ustacks.Tpo -c -o src/ustacks-ustacks.obj `if test -f 'src/ustacks.cc'; then $(CYGPATH_W) 'src/ustacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/ustacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-ustacks.Tpo src/$(DEPDIR)/ustacks-ustacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/ustacks.cc' object='src/ustacks-ustacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-ustacks.obj `if test -f 'src/ustacks.cc'; then $(CYGPATH_W) 'src/ustacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/ustacks.cc'; fi` src/ustacks-stacks.o: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-stacks.o -MD -MP -MF src/$(DEPDIR)/ustacks-stacks.Tpo -c -o src/ustacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-stacks.Tpo src/$(DEPDIR)/ustacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/ustacks-stacks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-stacks.o `test -f 'src/stacks.cc' || echo '$(srcdir)/'`src/stacks.cc src/ustacks-stacks.obj: src/stacks.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-stacks.obj -MD -MP -MF src/$(DEPDIR)/ustacks-stacks.Tpo -c -o src/ustacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-stacks.Tpo src/$(DEPDIR)/ustacks-stacks.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/stacks.cc' object='src/ustacks-stacks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-stacks.obj `if test -f 'src/stacks.cc'; then $(CYGPATH_W) 'src/stacks.cc'; else $(CYGPATH_W) '$(srcdir)/src/stacks.cc'; fi` src/ustacks-mstack.o: src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-mstack.o -MD -MP -MF src/$(DEPDIR)/ustacks-mstack.Tpo -c -o src/ustacks-mstack.o `test -f 'src/mstack.cc' || echo '$(srcdir)/'`src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-mstack.Tpo src/$(DEPDIR)/ustacks-mstack.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mstack.cc' object='src/ustacks-mstack.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-mstack.o `test -f 'src/mstack.cc' || echo '$(srcdir)/'`src/mstack.cc src/ustacks-mstack.obj: src/mstack.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-mstack.obj -MD -MP -MF src/$(DEPDIR)/ustacks-mstack.Tpo -c -o src/ustacks-mstack.obj `if test -f 'src/mstack.cc'; then $(CYGPATH_W) 'src/mstack.cc'; else $(CYGPATH_W) '$(srcdir)/src/mstack.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-mstack.Tpo src/$(DEPDIR)/ustacks-mstack.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mstack.cc' object='src/ustacks-mstack.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-mstack.obj `if test -f 'src/mstack.cc'; then $(CYGPATH_W) 'src/mstack.cc'; else $(CYGPATH_W) '$(srcdir)/src/mstack.cc'; fi` src/ustacks-mst.o: src/mst.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-mst.o -MD -MP -MF src/$(DEPDIR)/ustacks-mst.Tpo -c -o src/ustacks-mst.o `test -f 'src/mst.cc' || echo '$(srcdir)/'`src/mst.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-mst.Tpo src/$(DEPDIR)/ustacks-mst.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mst.cc' object='src/ustacks-mst.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-mst.o `test -f 'src/mst.cc' || echo '$(srcdir)/'`src/mst.cc src/ustacks-mst.obj: src/mst.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-mst.obj -MD -MP -MF src/$(DEPDIR)/ustacks-mst.Tpo -c -o src/ustacks-mst.obj `if test -f 'src/mst.cc'; then $(CYGPATH_W) 'src/mst.cc'; else $(CYGPATH_W) '$(srcdir)/src/mst.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-mst.Tpo src/$(DEPDIR)/ustacks-mst.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/mst.cc' object='src/ustacks-mst.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-mst.obj `if test -f 'src/mst.cc'; then $(CYGPATH_W) 'src/mst.cc'; else $(CYGPATH_W) '$(srcdir)/src/mst.cc'; fi` src/ustacks-cmb.o: src/cmb.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-cmb.o -MD -MP -MF src/$(DEPDIR)/ustacks-cmb.Tpo -c -o src/ustacks-cmb.o `test -f 'src/cmb.cc' || echo '$(srcdir)/'`src/cmb.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-cmb.Tpo src/$(DEPDIR)/ustacks-cmb.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/cmb.cc' object='src/ustacks-cmb.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-cmb.o `test -f 'src/cmb.cc' || echo '$(srcdir)/'`src/cmb.cc src/ustacks-cmb.obj: src/cmb.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-cmb.obj -MD -MP -MF src/$(DEPDIR)/ustacks-cmb.Tpo -c -o src/ustacks-cmb.obj `if test -f 'src/cmb.cc'; then $(CYGPATH_W) 'src/cmb.cc'; else $(CYGPATH_W) '$(srcdir)/src/cmb.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-cmb.Tpo src/$(DEPDIR)/ustacks-cmb.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/cmb.cc' object='src/ustacks-cmb.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-cmb.obj `if test -f 'src/cmb.cc'; then $(CYGPATH_W) 'src/cmb.cc'; else $(CYGPATH_W) '$(srcdir)/src/cmb.cc'; fi` src/ustacks-DNASeq.o: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-DNASeq.o -MD -MP -MF src/$(DEPDIR)/ustacks-DNASeq.Tpo -c -o src/ustacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-DNASeq.Tpo src/$(DEPDIR)/ustacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/ustacks-DNASeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-DNASeq.o `test -f 'src/DNASeq.cc' || echo '$(srcdir)/'`src/DNASeq.cc src/ustacks-DNASeq.obj: src/DNASeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-DNASeq.obj -MD -MP -MF src/$(DEPDIR)/ustacks-DNASeq.Tpo -c -o src/ustacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-DNASeq.Tpo src/$(DEPDIR)/ustacks-DNASeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNASeq.cc' object='src/ustacks-DNASeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-DNASeq.obj `if test -f 'src/DNASeq.cc'; then $(CYGPATH_W) 'src/DNASeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNASeq.cc'; fi` src/ustacks-DNANSeq.o: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-DNANSeq.o -MD -MP -MF src/$(DEPDIR)/ustacks-DNANSeq.Tpo -c -o src/ustacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-DNANSeq.Tpo src/$(DEPDIR)/ustacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/ustacks-DNANSeq.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-DNANSeq.o `test -f 'src/DNANSeq.cc' || echo '$(srcdir)/'`src/DNANSeq.cc src/ustacks-DNANSeq.obj: src/DNANSeq.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-DNANSeq.obj -MD -MP -MF src/$(DEPDIR)/ustacks-DNANSeq.Tpo -c -o src/ustacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-DNANSeq.Tpo src/$(DEPDIR)/ustacks-DNANSeq.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/DNANSeq.cc' object='src/ustacks-DNANSeq.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-DNANSeq.obj `if test -f 'src/DNANSeq.cc'; then $(CYGPATH_W) 'src/DNANSeq.cc'; else $(CYGPATH_W) '$(srcdir)/src/DNANSeq.cc'; fi` src/ustacks-models.o: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-models.o -MD -MP -MF src/$(DEPDIR)/ustacks-models.Tpo -c -o src/ustacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-models.Tpo src/$(DEPDIR)/ustacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/ustacks-models.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-models.o `test -f 'src/models.cc' || echo '$(srcdir)/'`src/models.cc src/ustacks-models.obj: src/models.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-models.obj -MD -MP -MF src/$(DEPDIR)/ustacks-models.Tpo -c -o src/ustacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-models.Tpo src/$(DEPDIR)/ustacks-models.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/models.cc' object='src/ustacks-models.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-models.obj `if test -f 'src/models.cc'; then $(CYGPATH_W) 'src/models.cc'; else $(CYGPATH_W) '$(srcdir)/src/models.cc'; fi` src/ustacks-utils.o: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-utils.o -MD -MP -MF src/$(DEPDIR)/ustacks-utils.Tpo -c -o src/ustacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-utils.Tpo src/$(DEPDIR)/ustacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/ustacks-utils.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-utils.o `test -f 'src/utils.cc' || echo '$(srcdir)/'`src/utils.cc src/ustacks-utils.obj: src/utils.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-utils.obj -MD -MP -MF src/$(DEPDIR)/ustacks-utils.Tpo -c -o src/ustacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-utils.Tpo src/$(DEPDIR)/ustacks-utils.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/utils.cc' object='src/ustacks-utils.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-utils.obj `if test -f 'src/utils.cc'; then $(CYGPATH_W) 'src/utils.cc'; else $(CYGPATH_W) '$(srcdir)/src/utils.cc'; fi` src/ustacks-kmers.o: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-kmers.o -MD -MP -MF src/$(DEPDIR)/ustacks-kmers.Tpo -c -o src/ustacks-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-kmers.Tpo src/$(DEPDIR)/ustacks-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/ustacks-kmers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-kmers.o `test -f 'src/kmers.cc' || echo '$(srcdir)/'`src/kmers.cc src/ustacks-kmers.obj: src/kmers.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-kmers.obj -MD -MP -MF src/$(DEPDIR)/ustacks-kmers.Tpo -c -o src/ustacks-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-kmers.Tpo src/$(DEPDIR)/ustacks-kmers.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/kmers.cc' object='src/ustacks-kmers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-kmers.obj `if test -f 'src/kmers.cc'; then $(CYGPATH_W) 'src/kmers.cc'; else $(CYGPATH_W) '$(srcdir)/src/kmers.cc'; fi` src/ustacks-input.o: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-input.o -MD -MP -MF src/$(DEPDIR)/ustacks-input.Tpo -c -o src/ustacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-input.Tpo src/$(DEPDIR)/ustacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/ustacks-input.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-input.o `test -f 'src/input.cc' || echo '$(srcdir)/'`src/input.cc src/ustacks-input.obj: src/input.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -MT src/ustacks-input.obj -MD -MP -MF src/$(DEPDIR)/ustacks-input.Tpo -c -o src/ustacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/ustacks-input.Tpo src/$(DEPDIR)/ustacks-input.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/input.cc' object='src/ustacks-input.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ustacks_CXXFLAGS) $(CXXFLAGS) -c -o src/ustacks-input.obj `if test -f 'src/input.cc'; then $(CYGPATH_W) 'src/input.cc'; else $(CYGPATH_W) '$(srcdir)/src/input.cc'; fi` install-nobase_pkgdataDATA: $(nobase_pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(nobase_pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(pkgdatadir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(pkgdatadir)/$$dir" || exit $$?; }; \ done uninstall-nobase_pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(nobase_pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscope: cscope.files test ! -s cscope.files \ || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) clean-cscope: -rm -f cscope.files cscope.files: clean-cscope cscopelist cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags -rm -f cscope.out cscope.in.out cscope.po.out cscope.files # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ else \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? tests/process_radtags.t.log: tests/process_radtags.t @p='tests/process_radtags.t'; \ b='tests/process_radtags.t'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tests/kmer_filter.t.log: tests/kmer_filter.t @p='tests/kmer_filter.t'; \ b='tests/kmer_filter.t'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tests/ustacks.t.log: tests/ustacks.t @p='tests/ustacks.t'; \ b='tests/ustacks.t'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tests/pstacks.t.log: tests/pstacks.t @p='tests/pstacks.t'; \ b='tests/pstacks.t'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(DISTFILES) $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done -test -n "$(am__skip_mode_fix)" \ || find "$(distdir)" -type d ! -perm -755 \ -exec chmod u+rwx,go+rx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__post_remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__post_remove_distdir) dist-lzip: distdir tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz $(am__post_remove_distdir) dist-xz: distdir tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz $(am__post_remove_distdir) dist-tarZ: distdir @echo WARNING: "Support for shar distribution archives is" \ "deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__post_remove_distdir) dist-shar: distdir @echo WARNING: "Support for distribution archives compressed with" \ "legacy program 'compress' is deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz $(am__post_remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__post_remove_distdir) dist dist-all: $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' $(am__post_remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lz*) \ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ *.tar.xz*) \ xz -dc $(distdir).tar.xz | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ esac chmod -R a-w $(distdir) chmod u+w $(distdir) mkdir $(distdir)/_build $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build \ && ../configure \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ --srcdir=.. --prefix="$$dc_install_base" \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ && cd "$$am__cwd" \ || exit 1 $(am__post_remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @test -n '$(distuninstallcheck_dir)' || { \ echo 'ERROR: trying to run $@ with an empty' \ '$$(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ $(am__cd) '$(distuninstallcheck_dir)' || { \ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-am all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) config.h installdirs: for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f src/$(DEPDIR)/$(am__dirstamp) -rm -f src/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-binPROGRAMS clean-generic mostlyclean-am distclean: distclean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf src/$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-nobase_pkgdataDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-data-hook install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-binPROGRAMS install-dist_binSCRIPTS install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -rf src/$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS \ uninstall-nobase_pkgdataDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: all check-am install-am install-data-am install-strip \ uninstall-am .PHONY: CTAGS GTAGS TAGS all all-am am--refresh check check-TESTS \ check-am clean clean-binPROGRAMS clean-cscope clean-generic \ cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ distcheck distclean distclean-compile distclean-generic \ distclean-hdr distclean-tags distcleancheck distdir \ distuninstallcheck dvi dvi-am html html-am info info-am \ install install-am install-binPROGRAMS install-data \ install-data-am install-data-hook install-dist_binSCRIPTS \ install-dvi install-dvi-am install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-nobase_pkgdataDATA install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-binPROGRAMS \ uninstall-dist_binSCRIPTS uninstall-hook \ uninstall-nobase_pkgdataDATA debug: $(MAKE) all "CXXFLAGS=-g -Wall -DDEBUG -std=gnu++0x" install-data-hook: sed -e 's,_VERSION_,$(VERSION),' -e 's,_BINDIR_,$(bindir)/,g' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/denovo_map.pl > $(DESTDIR)$(bindir)/denovo_map.pl.subst mv $(DESTDIR)$(bindir)/denovo_map.pl.subst $(DESTDIR)$(bindir)/denovo_map.pl chmod +x $(DESTDIR)$(bindir)/denovo_map.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_BINDIR_,$(bindir)/,g' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/ref_map.pl > $(DESTDIR)$(bindir)/ref_map.pl.subst mv $(DESTDIR)$(bindir)/ref_map.pl.subst $(DESTDIR)$(bindir)/ref_map.pl chmod +x $(DESTDIR)$(bindir)/ref_map.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/export_sql.pl > $(DESTDIR)$(bindir)/export_sql.pl.subst mv $(DESTDIR)$(bindir)/export_sql.pl.subst $(DESTDIR)$(bindir)/export_sql.pl chmod +x $(DESTDIR)$(bindir)/export_sql.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/index_radtags.pl > $(DESTDIR)$(bindir)/index_radtags.pl.subst mv $(DESTDIR)$(bindir)/index_radtags.pl.subst $(DESTDIR)$(bindir)/index_radtags.pl chmod +x $(DESTDIR)$(bindir)/index_radtags.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/load_radtags.pl > $(DESTDIR)$(bindir)/load_radtags.pl.subst mv $(DESTDIR)$(bindir)/load_radtags.pl.subst $(DESTDIR)$(bindir)/load_radtags.pl chmod +x $(DESTDIR)$(bindir)/load_radtags.pl sed -e 's,_VERSION_,$(VERSION),' $(DESTDIR)$(bindir)/sort_read_pairs.pl > $(DESTDIR)$(bindir)/sort_read_pairs.pl.subst mv $(DESTDIR)$(bindir)/sort_read_pairs.pl.subst $(DESTDIR)$(bindir)/sort_read_pairs.pl chmod +x $(DESTDIR)$(bindir)/sort_read_pairs.pl sed -e 's,_VERSION_,$(VERSION),' $(DESTDIR)$(bindir)/exec_velvet.pl > $(DESTDIR)$(bindir)/exec_velvet.pl.subst mv $(DESTDIR)$(bindir)/exec_velvet.pl.subst $(DESTDIR)$(bindir)/exec_velvet.pl chmod +x $(DESTDIR)$(bindir)/exec_velvet.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/load_sequences.pl > $(DESTDIR)$(bindir)/load_sequences.pl.subst mv $(DESTDIR)$(bindir)/load_sequences.pl.subst $(DESTDIR)$(bindir)/load_sequences.pl chmod +x $(DESTDIR)$(bindir)/load_sequences.pl sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' -e 's,_BINDIR_,$(bindir)/,g' \ $(DESTDIR)$(bindir)/stacks_export_notify.pl > $(DESTDIR)$(bindir)/stacks_export_notify.pl.subst mv $(DESTDIR)$(bindir)/stacks_export_notify.pl.subst $(DESTDIR)$(bindir)/stacks_export_notify.pl chmod +x $(DESTDIR)$(bindir)/stacks_export_notify.pl sed -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' -e 's,_BINDIR_,$(bindir)/,g' \ $(DESTDIR)$(pkgdatadir)/php/constants.php.dist > $(DESTDIR)$(pkgdatadir)/php/constants.php.dist.subst mv $(DESTDIR)$(pkgdatadir)/php/constants.php.dist.subst $(DESTDIR)$(pkgdatadir)/php/constants.php.dist echo $(VERSION) > $(DESTDIR)$(pkgdatadir)/php/version.php $(install_sh) -d -m 755 $(DESTDIR)$(pkgdatadir)/php/export uninstall-hook: rm -rf $(DESTDIR)$(pkglocalstatedir) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: stacks-1.35/php/000755 000765 000024 00000000000 12574070564 014303 5ustar00catchenstaff000000 000000 stacks-1.35/README000644 000765 000024 00000020367 12574066143 014402 0ustar00catchenstaff000000 000000 The Stacks Pipeline ------------------- 0. Prerequisites Stacks should build on any standard UNIX-like environment (Apple OS X, Linux, etc.) Stacks is an independent pipeline and can be run without any additional external software. Note: Apple OS X does not use the GNU Compiler Collection, which is standard on Linux-based systems. Instead, Apple distributes CLANG, which is a nice compiler but does not yet support the OpenMP library which Stacks relies on for parallel processing. Stacks can still be built and run on an Apple system, however, you will have to disable building with OpenMP (supply the --disable-openmp flag to configure) and use non-parallelized code. If you want to install a parallelized version of Stacks, you can install GCC by hand, or using a package system such as Homebrew (http://brew.sh/) or MacPorts (http://www.macports.org/). Several Perl scripts are distributed with Stacks to run the pipeline components and upload pipeline output to the MySQL database serer. For these to work, you must have the Perl DBI module installed with the MySQL driver. Most Linux distributions will include the Perl DBI module, but if not Perl modules are easily installed with the cpan tool. 1. Install optional components: The performance of Stacks can be improved by installing two, optional external libraries. First, to enable reading of BAM files, install Samtools: http://samtools.sourceforge.net/. Note: Stacks is not yet compatible with newer Samtools versions that use HTSlib. For now, you must use Samtools version 0.1.19, which was the last non-HTSlib version: http://sourceforge.net/projects/samtools/files/samtools/0.1.19/ Second, to lower memory usage install Google's SparseHash class. http://code.google.com/p/sparsehash/ If you are running a version of Linux, the above software can be installed via the package manager. If you are using Ubuntu, you can install the following packages: % sudo apt-get install libdbd-mysql-perl % sudo apt-get install libsparsehash-dev % sudo apt-get install samtools % sudo apt-get install libbam-dev A similar set of commands can be executed on a RedHat derived Linux system using yum, or another package manager on other Linux distributions. 2. Build the software. Stacks uses the standard autotools install: % tar xfvz stacks_x.xx.tar.gz % cd stacks_x.xx % ./configure You can change the root of the install location (/usr/local/ on most operating systems) by specifying the --prefix command line option to the configure script. % ./configure --prefix=/home/smith/local You can enable Sparsehash and BAM by adding the following options: % ./configure --enable-sparsehash --enable-bam You probably need to specify the BAM library location: % ./configure --enable-bam \ --with-bam-include-path=/usr/local/include/bam \ --with-bam-lib-path=/usr/local/lib Or, if you installed with Ubuntu packages: % ./configure --enable-bam \ --with-bam-include-path=/usr/include/samtools \ --with-bam-lib-path=/usr/lib % make You can speed up the build if you have more than one processor: % make -j 8 3. Install the software. % sudo make install A default Stacks install will install files in the following way: /usr/local/bin - stacks executables and perl scripts /usr/local/share/stacks - PHP files for the web interface and SQL files for creating the MySQL database The pipeline is now ready to run. The remaining install instructions are to get the web interface up and running. The web interface is very useful for visualization and more or less required for building genetic maps. However, Stacks does not depend on the web interface to run. The Stacks Web Interface ------------------------ To visualize data, Stacks uses a web-based interface (written in PHP) that interacts with a MySQL database server. MySQL provides various functions to store, sort, and export data from a database. For this to work, you must have a PHP-enabled Apache web server installed as well as the MDB2 Pear module to provide MySQL interaction. 0. Prerequisites If you want to export data in Microsoft Excel Spreadsheets, you will need the Spreadsheet::WriteExcel Perl module. Most server installations will provide Apache, MySQL, Perl, and PHP by default. While installing these components is beyond these instructions, here are some links that might be useful: 1. MySQL Database: http://www.mysql.com/downloads/mysql/ 2. Spreadsheet Perl Module: http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel-2.37/ 3. MDB2 Pear Module: http://pear.php.net/package/MDB2/ 4. MDB2 MySQL driver: http://pear.php.net/package/MDB2_Driver_mysql/ (PHP modules are easily installed with the pear tool. Many operating systems already have these tools installed - such as OS X and some Linux distributions. Most Linux systems will provide all of these tools as packages that can be installed using yum, apt-get, or a similar piece of software.) 1. Edit the MySQL configuration file, installed in /usr/local/share/stacks/sql/mysql.cnf.dist, to enable access to the database from the Stacks scripts. % cd /usr/local/share/stacks/sql/ % cp mysql.cnf.dist mysql.cnf Edit the file to reflect the proper username, password, and host to use to access MySQL. The various scripts that access the database will search for a MySQL configuration file in your home directory before using the Stacks-distributed copy. If you already have a personal account set up and configured (in ~/.my.cnf) you can continue to use these credentials instead of setting up new, common ones. If you just installed MySQL and have not added any users, you can do so with these commands: % mysql mysql> GRANT ALL ON *.* TO 'stacks_user'@'localhost' IDENTIFIED BY 'stackspassword'; Edit /usr/local/share/stacks/sql/mysql.cnf to contain the username and password you specified to MySQL. (This information was taken from: http://dev.mysql.com/doc/refman/5.1/en/grant.html) 2. Enable the Stacks web interface in the Apache webserver. Add the following lines to your Apache configuration to make the Stacks PHP files visible to the web server and to provide a easily readable URL to access them: Order deny,allow Deny from all Allow from all Require all granted Alias /stacks "/usr/local/share/stacks/php" A sensible way to do this is to create the file stacks.conf with the above lines. If you are using Apache 2.3 or earlier: --------------------------------------- Place the stacks.conf file in either /etc/apache2/conf.d/ or /etc/httpd/conf.d/ directory (depending on your Linux distro) and restart the apache server: # vi /etc/apache2/conf.d/stacks.conf # apachectl restart (See the Apache configuration for more information on what these do: http://httpd.apache.org/docs/2.0/mod/core.html#directory) If you are using Apache 2.4 or later: --------------------------------------- Place the stacks.conf file in /etc/apache2/conf-available directory and then create a symlink to it in the /etc/apache2/conf-enabled directory. Then restart Apache. Like so: # vi /etc/apache2/conf-available/stacks.conf # ln -s /etc/apache2/conf-available/stacks.conf /etc/apache2/conf-enabled/stacks.conf # apachectl restart 3. Provide access to the MySQL database from the web interface Edit the PHP configuration file (constants.php.dist) to allow it access to the MySQL database. Change the file to include the proper database username ($db_user), password ($db_pass), and hostname ($db_host). Rename the distribution file so it is active. % cp /usr/local/share/stacks/php/constants.php.dist /usr/local/share/stacks/php/constants.php % vi /usr/local/share/stacks/php/constants.php You may find it advantageous to create a specific MySQL user with limited permissions - SELECT, UPDATE, and DELETE to allow users to interact with the database through the web interface. 4. Enable web-based exporting from the MySQL database. Edit the stacks_export_notify.pl script to specify the email and SMTP server to use in notification messages. Ensure that the permissions of the php/export directory allow the webserver to write to it. Assuming your web server user is 'www': % chown www /usr/local/share/stacks/php/export stacks-1.35/scripts/000755 000765 000024 00000000000 12574070564 015203 5ustar00catchenstaff000000 000000 stacks-1.35/sql/000755 000765 000024 00000000000 12574070564 014313 5ustar00catchenstaff000000 000000 stacks-1.35/src/000755 000765 000024 00000000000 12574070564 014303 5ustar00catchenstaff000000 000000 stacks-1.35/tests/000755 000765 000024 00000000000 12574070564 014656 5ustar00catchenstaff000000 000000 stacks-1.35/tests/kmer_filter.t000644 000765 000024 00000001363 12335173442 017343 0ustar00catchenstaff000000 000000 #!/usr/bin/env bash # Preamble test_path=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd) test_data_path="$test_path/"$(basename "${BASH_SOURCE[0]}" | sed -e 's@\.t$@@') source $test_path/setup.sh plan 5 #kmer_filter tests ok_ "filter out rare kmers" \ 000_rare \ "kmer_filter -f %in/in.fastq -o %out --rare" ok_ "filter out overly abundant kmers" \ 001_abundant \ "kmer_filter -f %in/in.fastq -o %out --abundant" ok_ "set max kmer frequency for abundance filtering" \ 002_mkf \ "kmer_filter -f %in/in.fastq -o %out --abundant --max_k_freq 10" skip_ "input gzfastq" \ 003_ingzfastq \ "kmer_filter -i gzfastq -f %in/in.fastq.gz -o out" ok_ "output fastq" \ 004_outfasta \ "kmer_filter -y fasta -i $freq_in -o %out"stacks-1.35/tests/process_radtags.t000644 000765 000024 00000005737 12441417455 020237 0ustar00catchenstaff000000 000000 #!/usr/bin/env bash # Preamble test_path=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd) test_data_path="$test_path/"$(basename "${BASH_SOURCE[0]}" | sed -e 's@\.t$@@') source $test_path/setup.sh # Setup barcodes=$test_data_path/frequent_data/Barcodes.txt freq_in=$test_data_path/frequent_data/in.fastq.gz freq_in2=$test_data_path/frequent_data/in.fastq plan 15 # # Example libtap tests. Uncomment to run. # ok "This test will pass" true # ok "This test will fail" ls -al /this/file/does/not/exist # diag 'I just love word plays ...' # ok "This test is expected to fail# TODO fix this" ls -al /neither/does/this/file # skip "This command doesn't make sense:" more /dev/null # process_radtags tests ok_ 'input gzfastq' \ 000_input_gzfastq \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI" ok_ 'input fastq' \ 001_input_fastq \ "process_radtags -i fastq -f $freq_in2 -o %out -b $barcodes -E phred33 -e sbfI" diag 'FIXME: Input files for this test are NOT actaully phred64 encoded! This is just an example test...' ok_ 'input phred64' \ 002_input_phred64 \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred64 -e sbfI" ok_ 'clean' \ 003_clean_data \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI -c" ok_ 'discarded reads' \ 004_discarded_reads \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI -D" ok_ 'fasta output' \ 005_output_fasta \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI -y fasta" ok_ 'discard low quality reads' \ 006_discard_lq \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI -q" ok_ 'rescue barcodes and radtags' \ 007_rescue_bcrt \ "process_radtags -i gzfastq -p %in -o %out -b $barcodes -E phred33 -e sbfI -r" ok_ 'truncate final read length' \ 008_truncate \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI -t 50" ok_ 'set window length' \ 009_winlen \ "process_radtags -i gzfastq -p %in -o %out -b $barcodes -E phred33 -e sbfI -q -w .12" ok_ 'minimum window score' \ 010_minscore \ "process_radtags -i gzfastq -f $freq_in -o %out -b $barcodes -E phred33 -e sbfI -q -s 15" ok_ 'merge output' \ 011_merge \ "process_radtags -i gzfastq -f %in/in.fastq.gz -o %out -E phred33 -e sbfI --merge" ok_ 'Remove sequences marked by Illumina as failing chastity/purity filter' \ 012_filt_ill \ "process_radtags -i gzfastq -f %in/in.fastq.gz -o %out -E phred33 -e sbfI -b $barcodes --filter_illumina" ok_ 'Disable checking for RAD site' \ 013_disrc \ "process_radtags -i gzfastq -f %in/in.fastq.gz -o %out -E phred33 -e sbfI -b $barcodes --disable_rad_check" ok_ 'Provide distance between barcodes for rescue' \ 014_bcdist \ "process_radtags -i gzfastq -f %in/in.fastq.gz -o %out -E phred33 -e sbfI -b $barcodes --barcode_dist 1 -r" # I'm not sure yet what finish() does. finish stacks-1.35/tests/pstacks.t000644 000765 000024 00000003656 12441417455 016522 0ustar00catchenstaff000000 000000 #!/usr/bin/env bash # Preamble test_path=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd) test_data_path="$test_path/"$(basename "${BASH_SOURCE[0]}" | sed -e 's@\.t$@@') source $test_path/setup.sh freq_in=$test_data_path/frequent_input/in.bam freq_in2=$test_data_path/frequent_input/in2.bam freq_in3=$test_data_path/frequent_input/in.sam plan 12 ok_ 'input bam' \ 000_inbam \ "pstacks -t bam -f $freq_in -o %out" ok_ 'input sam' \ 001_insam \ "pstacks -t sam -f $freq_in3 -o %out" skip_ 'input bowtie - need data input file type' \ 002_inbowtie \ "pstacks -t bowtie -f %in/??? -o %out" ok_ 'minimum coverage depth for contig report' \ 003_mincovdepth \ "pstacks -t bam -f $freq_in -o %out -m 2" ok_ 'R^2 significance level of 0.1 for calling homozygote/heterozygote' \ 004_alpha0.1 \ "pstacks -t bam -f %in/in.bam -o %out --alpha 0.1" ok_ 'R^2 significance level of 0.05 for calling homozygote/heterozygote' \ 005_alpha0.05 \ "pstacks -t bam -f %in/in.bam -o %out --alpha 0.05" ok_ 'R^2 significance level of 0.01 for calling homozygote/heterozygote' \ 006_alpha0.01 \ "pstacks -t bam -f %in/in.bam -o %out --alpha 0.01" ok_ 'R^2 significance level of 0.001 for calling homozygote/heterozygote' \ 007_alpha0.001 \ "pstacks -t bam -f %in/in.bam -o %out --alpha 0.001" ok_ 'For bounded model, specify upper bound' \ 008_bound_high \ "pstacks -t bam -f $freq_in2 -o %out --model_type bounded --bound_high 0.5" ok_ 'For bounded model, specify lower bound' \ 009_bound_low \ "pstacks -t bam -f $freq_in2 -o %out --model_type bounded --bound_low 0.2" ok_ 'For bounded model, specify upper and lower bounds' \ 010_bounded \ "pstacks -t bam -f $freq_in2 -o %out --model_type bounded --bound_high 0.5 --bound_low 0.2" ok_ 'For fixed model, specify barcode error frequency' \ 011_fixed \ "pstacks -t bam -f %in/in.bam -o %out --model_type fixed --bc_err_freq 0.39" finishstacks-1.35/tests/ustacks.t000644 000765 000024 00000006321 12441417455 016517 0ustar00catchenstaff000000 000000 #!/usr/bin/env bash # Preamble test_path=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd) test_data_path="$test_path/"$(basename "${BASH_SOURCE[0]}" | sed -e 's@\.t$@@') source $test_path/setup.sh freq_in=$test_data_path/frequent_inputs/in.fastq.gz freq_in2=$test_data_path/frequent_inputs/in2.fastq.gz freq_in3=$test_data_path/frequent_inputs/in.fasta freq_in4=$test_data_path/frequent_inputs/in.fasta.gz freq_in5=$test_data_path/frequent_inputs/in.fastq plan 21 ref_map ok_ 'input gzipped fastq' \ 000_ingzfastq \ "ustacks -t gzfastq -f $freq_in -o %out" \ ok_ 'input fastq' \ 001_infastq \ "ustacks -t fastq -f $freq_in5 -o %out" ok_ 'input fasta' \ 002_infasta \ "ustacks -t fasta -f $freq_in3 -o %out" ok_ 'inpus gzipped fasta' \ 003_ingzfasta \ "ustacks -t gzfasta -f $freq_in4 -o %out" ok_ 'set sample ID=1 (AKA MySQL column 2)' \ 004_sqlid \ "ustacks -t gzfastq -f $freq_in -o %out -i 1" ok_ 'specify minimum depth of coverage required to call a stack' \ 005_mindepcov \ "ustacks -t gzfastq -f $freq_in -o %out -m 3" ok_ 'specify maximum distance between stacks' \ 006_maxdistbtw \ "ustacks -t gzfastq -f $freq_in -o %out -M 3" ok_ 'specify max distance to align secondary reads to primary stacks' \ 007_maxdist_srps \ "ustacks -t gzfastq -f $freq_in -o %out -N 5" skip_ 'retain unused reads' \ 008_retain \ "ustacks -t gzfastq -f %in -o %out -R" ok_ 'disable haplotype calling from secondary reads' \ 009_hapcall \ "ustacks -t gzfastq -f $freq_in -o %out -H" ok_ 'remove highly repetative (likely error) reads' \ 010_remrep \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out -r" ok_ 'enable deleveraging algorithm' \ 011_deleverage \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out -d" \ zip_test ok_ 'specify max number of stacks at a de novo locus' \ 012_maxlocus \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out --max_locus_stacks 4" ok_ 'specify chi square significance level for calling heteroz/homozygote' \ 013_alpha0.1 \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out --alpha 0.1" ok_ 'specify chi square significance level for calling heteroz/homozygote' \ 014_alpha0.05 \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out --alpha 0.05" ok_ 'specify chi square significance level for calling heteroz/homozygote' \ 015_alpha0.01 \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out --alpha 0.01" ok_ 'specify chi square significance level for calling heteroz/homozygote' \ 016_alpha0.001 \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out --alpha 0.001" ok_ 'For bounded model, specify upper bound' \ 017_bound_high \ "ustacks -t gzfastq -f $freq_in2 -o %out --model_type bounded --bound_high 0.01" ok_ 'For bounded model, specify lower bound' \ 018_bound_low \ "ustacks -t gzfastq -f $freq_in2 -o %out --model_type bounded --bound_low 0.001" ok_ 'For bounded model, specify upper and lower bounds' \ 019_bounded \ "ustacks -t gzfastq -f $freq_in2 -o %out --model_type bounded --bound_low 0.001 --bound_high 0.01" ok_ 'For fixed model, specify barcode error frequency rate' \ 020_fixed \ "ustacks -t gzfastq -f %in/in.fastq.gz -o %out --model_type fixed --bc_err_freq 0.98" finishstacks-1.35/src/BamI.h000644 000765 000024 00000020126 12335173442 015257 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __BAMI_H__ #define __BAMI_H__ // // Code to parse binary BAM format. This format is created for // reads that have been aligned to a reference genome. // #ifdef HAVE_BAM #include "input.h" #include "bam.h" class Bam: public Input { bamFile bam_fh; bam1_t *aln; map chrs; int parse_header(); int parse_cigar(const char *, vector > &, bool); int parse_bam_cigar(vector > &, bool); int find_start_bp_pos(int, vector > &); int find_start_bp_neg(int, vector > &); int edit_gaps(vector > &, char *); public: Bam(const char *path) : Input() { this->path = string(path); this->bam_fh = bam_open(path, "r"); this->aln = bam_init1(); this->parse_header(); }; ~Bam() { bam_close(this->bam_fh); bam_destroy1(this->aln); }; Seq *next_seq(); int next_seq(Seq &) { return 0; }; }; int Bam::parse_header() { bam_header_t *bamh = bam_header_init(); bamh = bam_header_read(this->bam_fh); for (uint j = 0; j < (uint) bamh->n_targets; j++) { // // Record the mapping from integer ID to chromosome name that we will see in BAM records. // this->chrs[j] = string(bamh->target_name[j]); } bam_header_destroy(bamh); return 0; } Seq * Bam::next_seq() { int bytes_read = 0; int flag = 0; // // Read a record from the file, skipping unmapped reads, and place it in a Seq object. // do { bytes_read = bam_read1(this->bam_fh, this->aln); if (bytes_read <= 0) return NULL; flag = ((this->aln->core.flag & BAM_FUNMAP) != 0); } while (flag == 1); // // Check which strand this is aligned to: // SAM reference: FLAG bit 0x10 - sequence is reverse complemented // flag = ((this->aln->core.flag & BAM_FREVERSE) != 0); // // If the read was aligned on the reverse strand (and is therefore reverse complemented) // alter the start point of the alignment to reflect the right-side of the read, at the // end of the RAD cut site. // // To accomplish this, we must parse the alignment CIGAR string // vector > cigar; this->parse_bam_cigar(cigar, flag); uint bp = flag ? this->find_start_bp_neg(this->aln->core.pos, cigar) : this->find_start_bp_pos(this->aln->core.pos, cigar); // // Fetch the sequence. // string seq; uint8_t j; for (int i = 0; i < this->aln->core.l_qseq; i++) { j = bam1_seqi(bam1_seq(this->aln), i); switch(j) { case 1: seq += 'A'; break; case 2: seq += 'C'; break; case 4: seq += 'G'; break; case 8: seq += 'T'; break; case 15: seq += 'N'; break; } } // // Fetch the quality score. // string qual; uint8_t *q = bam1_qual(this->aln); for (int i = 0; i < this->aln->core.l_qseq; i++) { qual += char(int(q[i]) + 33); } string chr = this->chrs[this->aln->core.tid]; Seq *s = new Seq((const char *) bam1_qname(this->aln), seq.c_str(), qual.c_str(), chr.c_str(), bp, flag ? minus : plus); if (cigar.size() > 0) this->edit_gaps(cigar, s->seq); return s; } int Bam::parse_bam_cigar(vector > &cigar, bool orientation) { int op, len; char c; uint32_t *cgr = bam1_cigar(this->aln); for (int k = 0; k < this->aln->core.n_cigar; k++) { op = cgr[k] & BAM_CIGAR_MASK; len = cgr[k] >> BAM_CIGAR_SHIFT; switch(op) { case BAM_CMATCH: c = 'M'; break; case BAM_CINS: c = 'I'; break; case BAM_CDEL: c = 'D'; break; case BAM_CREF_SKIP: c = 'N'; break; case BAM_CSOFT_CLIP: c = 'S'; break; case BAM_CHARD_CLIP: c = 'H'; break; case BAM_CPAD: c = 'P'; break; } // // If aligned to the negative strand, sequence has been reverse complemented and // CIGAR string should be interpreted in reverse. // if (orientation == plus) cigar.push_back(make_pair(c, len)); else cigar.insert(cigar.begin(), make_pair(c, len)); } return 0; } int Bam::parse_cigar(const char *cigar_str, vector > &cigar, bool orientation) { char buf[id_len]; int dist; const char *p, *q; p = cigar_str; if (*p == '*') return 0; while (*p != '\0') { q = p + 1; while (*q != '\0' && isdigit(*q)) q++; strncpy(buf, p, q - p); buf[q-p] = '\0'; dist = atoi(buf); // // If aligned to the negative strand, sequence has been reverse complemented and // CIGAR string should be interpreted in reverse. // if (orientation == plus) cigar.push_back(make_pair(*q, dist)); else cigar.insert(cigar.begin(), make_pair(*q, dist)); p = q + 1; } return 0; } int Bam::find_start_bp_neg(int aln_bp, vector > &cigar) { uint size = cigar.size(); char op; uint dist; for (uint i = 0; i < size; i++) { op = cigar[i].first; dist = cigar[i].second; switch(op) { case 'I': break; case 'S': if (i < size - 1) aln_bp += dist; break; case 'M': case 'D': aln_bp += dist; break; } } return aln_bp - 1; } int Bam::find_start_bp_pos(int aln_bp, vector > &cigar) { char op; uint dist; op = cigar[0].first; dist = cigar[0].second; if (op == 'S') aln_bp -= dist; return aln_bp; } int Bam::edit_gaps(vector > &cigar, char *seq) { char buf[id_len]; uint size = cigar.size(); char op; uint dist, bp, len, buf_len, j, k, stop; len = strlen(seq); bp = 0; for (uint i = 0; i < size; i++) { op = cigar[i].first; dist = cigar[i].second; switch(op) { case 'S': stop = bp + dist; stop = stop > len ? len : stop; while (bp < stop) { seq[bp] = 'N'; bp++; } break; case 'D': // // A deletion has occured in the read relative to the reference genome. // Pad the read with sufficent Ns to match the deletion, shifting the existing // sequence down. Trim the final length to keep the read length consistent. // strncpy(buf, seq + bp, id_len - 1); buf[id_len - 1] = '\0'; buf_len = strlen(buf); stop = bp + dist; stop = stop > len ? len : stop; while (bp < stop) { seq[bp] = 'N'; bp++; } j = bp; k = 0; while (j < len && k < buf_len) { seq[j] = buf[k]; k++; j++; } break; case 'I': // // An insertion has occurred in the read relative to the reference genome. Delete the // inserted bases and padd the end of the read with Ns. // k = bp + dist; strncpy(buf, seq + k, id_len - 1); buf[id_len - 1] = '\0'; buf_len = strlen(buf); j = bp; k = 0; while (j < len && k < buf_len) { seq[j] = buf[k]; k++; j++; } stop = j + dist; stop = stop > len ? len : stop; while (j < stop) { seq[j] = 'N'; j++; } break; case 'M': bp += dist; break; } } return 0; } #else // If HAVE_BAM is undefined and BAM library is not present. #include "input.h" class Bam: public Input { public: Bam(const char *path) : Input() { cerr << "BAM support was not enabled when Stacks was compiled.\n"; }; ~Bam() {}; Seq *next_seq() { return NULL; }; int next_seq(Seq &) { return 0; }; }; #endif // HAVE_BAM #endif // __BAMI_H__ stacks-1.35/src/BamUnalignedI.h000644 000765 000024 00000006731 12571641525 017120 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __BAMUNALIGNEDI_H__ #define __BAMUNALIGNEDI_H__ // // Code to parse binary BAM format. This format is created for // reads that have NOT been aligned to a reference genome. // #ifdef HAVE_BAM #include "input.h" #include "bam.h" class BamUnAln: public Input { bamFile bam_fh; bam1_t *aln; map chrs; int parse_header(); public: BamUnAln(const char *path) : Input() { this->path = string(path); this->bam_fh = bam_open(path, "r"); this->aln = bam_init1(); this->parse_header(); }; BamUnAln(string path) : Input() { this->path = path; this->bam_fh = bam_open(path.c_str(), "r"); this->aln = bam_init1(); this->parse_header(); }; ~BamUnAln() { bam_close(this->bam_fh); bam_destroy1(this->aln); }; Seq *next_seq(); int next_seq(Seq &) { return 0; }; }; int BamUnAln::parse_header() { bam_header_t *bamh = bam_header_init(); bamh = bam_header_read(this->bam_fh); for (uint j = 0; j < (uint) bamh->n_targets; j++) { // // Record the mapping from integer ID to chromosome name that we will see in BAM records. // this->chrs[j] = string(bamh->target_name[j]); } bam_header_destroy(bamh); return 0; } Seq * BamUnAln::next_seq() { int bytes_read = 0; // // Read a record from the file and place it in a Seq object. // bytes_read = bam_read1(this->bam_fh, this->aln); if (bytes_read <= 0) return NULL; // // Fetch the sequence. // string seq; uint8_t j; for (int i = 0; i < this->aln->core.l_qseq; i++) { j = bam1_seqi(bam1_seq(this->aln), i); switch(j) { case 1: seq += 'A'; break; case 2: seq += 'C'; break; case 4: seq += 'G'; break; case 8: seq += 'T'; break; case 15: seq += 'N'; break; } } // // Fetch the quality score. // string qual; uint8_t *q = bam1_qual(this->aln); for (int i = 0; i < this->aln->core.l_qseq; i++) { qual += char(int(q[i]) + 33); } string chr = this->chrs[this->aln->core.tid]; // // Attempt to parse the query name for this read. // Seq *s = new Seq((const char *) bam1_qname(this->aln), seq.c_str(), qual.c_str()); return s; } #else // If HAVE_BAM is undefined and BAM library is not present. #include "input.h" class BamUnAln: public Input { public: BamUnAln(const char *path) : Input() { cerr << "BAM support was not enabled when Stacks was compiled.\n"; }; BamUnAln(string path) : Input() { cerr << "BAM support was not enabled when Stacks was compiled.\n"; }; ~BamUnAln() {}; Seq *next_seq() { return NULL; }; int next_seq(Seq &) { return 0; }; }; #endif // HAVE_BAM #endif // __BAMUNALIGNEDI_H__ stacks-1.35/src/bootstrap.h000644 000765 000024 00000021574 12335173442 016474 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __BOOTSTRAP_H__ #define __BOOTSTRAP_H__ #include #include using std::vector; #include "smoothing_utils.h" extern double sigma; extern int bootstrap_reps; extern bool bootstrap_wl; extern set bootstraplist; // // Bootstrap resamplign structure. // class BSample { public: int bp; int alleles; bool fixed; double stat[PopStatSize]; BSample() { this->bp = 0; this->alleles = 0; this->fixed = false; for (uint i = 0; i < PopStatSize; i++) this->stat[i] = 0.0; } }; template class Bootstrap { double *weights; // Weight matrix to apply while smoothing. vector > stats; uint num_stats; public: Bootstrap(uint size) { this->num_stats = size; this->weights = calc_weights(); this->stats.resize(size, vector()); } ~Bootstrap() { delete [] this->weights; } int add_data(vector &); int execute(vector &); int execute_mixed(vector &); double pval(double, vector &); }; template int Bootstrap::add_data(vector &sites) { for (uint i = 0; i < sites.size(); i++) { if (sites[i] != NULL && sites[i]->fixed == false) for (uint j = 0; j < this->num_stats; j++) this->stats[j].push_back(sites[i]->stat[j]); } return 0; } template int Bootstrap::execute(vector &sites) { #pragma omp parallel { PopStat *c; double final_weight, sum, weighted_stat[PopStatSize]; int dist, index; uint pos_l = 0; uint pos_u = 0; //#pragma omp for schedule(dynamic, 1) for (uint pos_c = 0; pos_c < sites.size(); pos_c++) { c = sites[pos_c]; if (c == NULL) continue; if (bootstrap_wl && bootstraplist.count(c->loc_id) == 0) continue; // cerr << "Bootstrapping " << c->loc_id << "; pos_c: " << pos_c << "; bp: " << c->bp << "\n"; determine_window_limits(sites, c->bp, pos_l, pos_u); int size = 0; for (uint i = pos_l; i < pos_u; i++) if (sites[i] != NULL) size++; // // Allocate an array of bootstrap resampling objects. // BSample *bs = new BSample[size]; // // Populate the BSample objects. // int j = 0; for (uint i = pos_l; i < pos_u; i++) { if (sites[i] == NULL) continue; bs[j].bp = sites[i]->bp; bs[j].alleles = sites[i]->alleles; j++; } vector > resampled_stats(this->num_stats, vector()); for (uint i = 0; i < this->num_stats; i++) resampled_stats[i].reserve(bootstrap_reps); // // Bootstrap this bitch. // for (int i = 0; i < bootstrap_reps; i++) { // if (i % 100 == 0) cerr << " Bootsrap rep " << i << "\n"; for (uint k = 0; k < this->num_stats; k++) weighted_stat[k] = 0.0; sum = 0.0; for (j = 0; j < size; j++) { // // Distance from center of window. // dist = bs[j].bp > c->bp ? bs[j].bp - c->bp : c->bp - bs[j].bp; // // Resample for this round of bootstrapping. // index = (int) (this->stats[0].size() * (random() / (RAND_MAX + 1.0))); for (uint k = 0; k < this->num_stats; k++) bs[j].stat[k] = this->stats[k][index]; final_weight = (bs[j].alleles - 1) * this->weights[dist]; for (uint k = 0; k < this->num_stats; k++) weighted_stat[k] += bs[j].stat[k] * final_weight; sum += final_weight; } // cerr << " New weighted Fst value: " << weighted_fst / sum << "\n"; for (uint k = 0; k < this->num_stats; k++) resampled_stats[k].push_back(weighted_stat[k] / sum); } // // Cacluate the p-value for this window based on the empirical Fst distribution. // for (uint k = 0; k < this->num_stats; k++) { sort(resampled_stats[k].begin(), resampled_stats[k].end()); c->bs[k] = this->pval(c->smoothed[k], resampled_stats[k]); } delete [] bs; } } return 0; } template int Bootstrap::execute_mixed(vector &sites) { #pragma omp parallel { PopStat *c; double final_weight, sum, weighted_stat[PopStatSize]; int dist, index; uint pos_l = 0; uint pos_u = 0; //#pragma omp for schedule(dynamic, 1) for (uint pos_c = 0; pos_c < sites.size(); pos_c++) { c = sites[pos_c]; if (c == NULL || c->fixed == true) continue; if (bootstrap_wl && bootstraplist.count(c->loc_id) == 0) continue; // cerr << "Bootstrapping " << c->loc_id << "; pos_c: " << pos_c << "; bp: " << c->bp << "\n"; determine_window_limits(sites, c->bp, pos_l, pos_u); int size = 0; for (uint i = pos_l; i < pos_u; i++) if (sites[i] != NULL) size++; // // Allocate an array of bootstrap resampling objects. // BSample *bs = new BSample[size]; // // Populate the BSample objects. // int j = 0; for (uint i = pos_l; i < pos_u; i++) { if (sites[i] == NULL) continue; bs[j].bp = sites[i]->bp; bs[j].alleles = sites[i]->alleles; bs[j].fixed = sites[i]->fixed; for (uint k = 0; k < this->num_stats; k++) bs[j].stat[k] = sites[i]->stat[k]; j++; } // // Precompute the fraction of the window that will not change during resampling. // double partial_weighted_stat[this->num_stats]; double partial_sum = 0.0; memset(partial_weighted_stat, 0, this->num_stats); for (j = 0; j < size; j++) { if (bs[j].fixed == false) continue; dist = bs[j].bp > c->bp ? bs[j].bp - c->bp : c->bp - bs[j].bp; final_weight = (bs[j].alleles - 1.0) * this->weights[dist]; partial_sum += final_weight; for (uint k = 0; k < this->num_stats; k++) partial_weighted_stat[k] += bs[j].stat[k] * final_weight; } vector > resampled_stats(this->num_stats, vector()); for (uint i = 0; i < this->num_stats; i++) resampled_stats[i].reserve(bootstrap_reps); // cerr << "Window starts at " << bs[0].bp << "; centered on " << c->bp << "\n"; // // Bootstrap this bitch. // for (int i = 0; i < bootstrap_reps; i++) { // if (i % 100 == 0) cerr << " Bootsrap rep " << i << "\n"; for (uint k = 0; k < this->num_stats; k++) weighted_stat[k] = partial_weighted_stat[k]; sum = partial_sum; for (j = 0; j < size; j++) { if (bs[j].fixed == true) continue; dist = bs[j].bp > c->bp ? bs[j].bp - c->bp : c->bp - bs[j].bp; // // Resample for this round of bootstrapping. // index = (int) (this->stats[0].size() * (random() / (RAND_MAX + 1.0))); for (uint k = 0; k < this->num_stats; k++) bs[j].stat[k] = this->stats[k][index]; final_weight = (bs[j].alleles - 1) * this->weights[dist]; for (uint k = 0; k < this->num_stats; k++) weighted_stat[k] += bs[j].stat[k] * final_weight; sum += final_weight; } // cerr << " New weighted value: " << (weighted_stat[0] / sum) << "\n"; for (uint k = 0; k < this->num_stats; k++) resampled_stats[k].push_back(weighted_stat[k] / sum); } // // Cacluate the p-value for this window based on the empirical Fst distribution. // for (uint k = 0; k < this->num_stats; k++) { sort(resampled_stats[k].begin(), resampled_stats[k].end()); c->bs[k] = this->pval(c->smoothed[k], resampled_stats[k]); } delete [] bs; } } return 0; } template double Bootstrap::pval(double stat, vector &dist) { vector::iterator up; double pos; up = upper_bound(dist.begin(), dist.end(), stat); if (up == dist.begin()) pos = 1; else if (up == dist.end()) pos = dist.size(); else pos = up - dist.begin() + 1; double res = 1.0 - (pos / (double) dist.size()); // cerr << "Generated Smoothed Fst Distribution:\n"; // for (uint n = 0; n < dist.size(); n++) // cerr << " n: " << n << "; Fst: " << dist[n] << "\n"; // cerr << "Comparing Fst value: " << stat // << " at position " << (up - dist.begin()) << " out of " // << dist.size() << " positions (converted position: " << pos << "); pvalue: " << res << ".\n"; return res; } #endif // __BOOTSTRAP_H__ stacks-1.35/src/BowtieI.h000644 000765 000024 00000004100 12335173442 016003 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __BOWTIEI_H__ #define __BOWTIEI_H__ // // Code to parse Bowtie's alignment format. This format is created for // reads that have been aligned to a reference genome. It takes the tab-separated form: // // // // One record per line. // #include "input.h" class Bowtie: public Input { public: Bowtie(const char *path) : Input(path) {}; ~Bowtie() {}; Seq *next_seq(); int next_seq(Seq &) { return 0; }; }; Seq *Bowtie::next_seq() { vector parts; // // Read a record from the file and place it in a Seq object // this->fh.getline(this->line, max_len); if (!this->fh.good()) { return NULL; } parse_tsv(this->line, parts); strand_type strand = parts[1] == "+" ? plus : minus; // // If the read was aligned on the reverse strand (and is therefore reverse complemented) // alter the start point of the alignment to reflect the right-side of the read, at the // end of the RAD cut site. // int bp = strand == plus ? atoi(parts[3].c_str()) : atoi(parts[3].c_str()) + parts[4].length(); Seq *s = new Seq(parts[0].c_str(), parts[4].c_str(), parts[5].c_str(), parts[2].c_str(), bp, strand); return s; } #endif // __BOWTIEI_H__ stacks-1.35/src/BustardI.h000644 000765 000024 00000005452 12335173442 016171 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // Code to parse Illumina's Bustard file format. It takes the tab-separated form: // // // // One record per line. // #ifndef __BUSTARDI_H__ #define __BUSTARDI_H__ #include "input.h" const uint num_bustd_fields = 11; class Bustard: public Input { public: Bustard(const char *path) : Input(path) {}; Bustard(string path) : Input(path.c_str()) {}; ~Bustard() {}; Seq *next_seq(); int next_seq(Seq &); }; Seq *Bustard::next_seq() { vector parts; // // Read a record from the file and place it in a Seq object // this->fh.getline(this->line, max_len); if (!this->fh.good()) { return NULL; } parse_tsv(this->line, parts); if (parts.size() != num_bustd_fields) { cerr << "Error parsing '" << this->path.c_str() << "' found " << parts.size() << " fields, but expecting " << num_bustd_fields << "). " << "Perhaps you should specify the input file type (-i)?\n"; return NULL; } Seq *s = new Seq; s->seq = new char[parts[8].length() + 1]; strcpy(s->seq, parts[8].c_str()); s->qual = new char[parts[9].length() + 1]; strcpy(s->qual, parts[9].c_str()); s->id = new char[id_len]; sprintf(s->id, "@%s:%s:%s:%s:%s#%s/%s", parts[0].c_str(), parts[2].c_str(), parts[3].c_str(), parts[4].c_str(), parts[5].c_str(), parts[6].c_str(), parts[7].c_str()); return s; } int Bustard::next_seq(Seq &s) { vector parts; // // Read a record from the file and place it in a Seq object // this->fh.getline(this->line, max_len); if (!this->fh.good()) { return 0; } parse_tsv(this->line, parts); strcpy(s.seq, parts[2].c_str()); strcpy(s.qual, parts[3].c_str()); sprintf(s.id, "@%s:%s:%s:%s:%s#%s/%s", parts[0].c_str(), parts[1].c_str(), parts[2].c_str(), parts[3].c_str(), parts[4].c_str(), parts[5].c_str(), parts[6].c_str()); return 1; } #endif // __BUSTARDI_H__ stacks-1.35/src/catalog_utils.cc000644 000765 000024 00000021121 12540304631 017425 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // catalog_utils.cc -- common routines for manipulating catalog objects. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #include "catalog_utils.h" int reduce_catalog(map &catalog, set &whitelist, set &blacklist) { map list; map::iterator it; CSLocus *loc; if (whitelist.size() == 0 && blacklist.size() == 0) return 0; int i = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (whitelist.size() > 0 && whitelist.count(loc->id) == 0) continue; if (blacklist.count(loc->id)) continue; list[it->first] = it->second; i++; } catalog = list; return i; } int implement_single_snp_whitelist(map &catalog, PopSum *psum, map > &whitelist) { map > new_wl; CSLocus *loc; LocTally *t; if (whitelist.size() > 0) { map >::iterator it; for (it = whitelist.begin(); it != whitelist.end(); it++) { loc = catalog[it->first]; t = psum->locus_tally(loc->id); // // If no specific SNPs are specified in the whitelist all SNPs are included, choose the first variant. // if (it->second.size() == 0) { for (uint i = 0; i < loc->snps.size(); i++) if (t->nucs[loc->snps[i]->col].fixed == false) { new_wl[loc->id].insert(loc->snps[i]->col); break; } } else { // // Otherwise, choose the first SNP that is already in the whitelist. // for (uint i = 0; i < loc->snps.size(); i++) { if (it->second.count(loc->snps[i]->col) == 0 || t->nucs[loc->snps[i]->col].fixed == true) continue; new_wl[loc->id].insert(loc->snps[i]->col); break; } } } } else { map::iterator it; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) if (t->nucs[loc->snps[i]->col].fixed == false) { new_wl[loc->id].insert(loc->snps[i]->col); break; } } } whitelist = new_wl; return 0; } int implement_random_snp_whitelist(map &catalog, PopSum *psum, map > &whitelist) { map > new_wl; CSLocus *loc; uint index; if (whitelist.size() > 0) { map >::iterator it; for (it = whitelist.begin(); it != whitelist.end(); it++) { loc = catalog[it->first]; if (loc->snps.size() == 0) continue; if (it->second.size() == 0) { index = rand() % loc->snps.size(); new_wl[loc->id].insert(loc->snps[index]->col); } else { do { index = rand() % loc->snps.size(); } while (it->second.count(loc->snps[index]->col) == 0); new_wl[loc->id].insert(loc->snps[index]->col); } } } else { map::iterator it; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->snps.size() > 0) { index = rand() % loc->snps.size(); new_wl[loc->id].insert(loc->snps[index]->col); } } } whitelist = new_wl; return 0; } int check_whitelist_integrity(map &catalog, map > &whitelist) { if (whitelist.size() == 0) return 0; int rm_snps = 0; int rm_loci = 0; CSLocus *loc; map >::iterator it; set::iterator sit; map > new_wl; cerr << "Checking the integrity of the whitelist..."; for (it = whitelist.begin(); it != whitelist.end(); it++) { if (catalog.count(it->first) == 0) { rm_loci++; cerr << "\n Removing locus " << it->first << " from whitelist as it does not exist in the catalog."; } else { loc = catalog[it->first]; if (it->second.size() == 0) { new_wl.insert(make_pair(it->first, std::set())); continue; } set cat_snps; for (uint i = 0; i < loc->snps.size(); i++) cat_snps.insert(loc->snps[i]->col); for (sit = it->second.begin(); sit != it->second.end(); sit++) if (cat_snps.count(*sit)) { new_wl[it->first].insert(*sit); } else { rm_snps++; cerr << "\n Removing SNP at column " << *sit << " in locus " << it->first << " from whitelist as it does not exist in the catalog."; } } } whitelist = new_wl; if (rm_loci > 0 || rm_snps > 0) cerr << "\n"; cerr << "done.\n" << "Removed " << rm_loci << " loci and " << rm_snps << " SNPs from the whitelist that were not found in the catalog.\n"; return 0; } int reduce_catalog(map &catalog, map > &whitelist, set &blacklist) { map list; map::iterator it; CSLocus *loc; if (whitelist.size() == 0 && blacklist.size() == 0) return 0; int i = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (whitelist.size() > 0 && whitelist.count(loc->id) == 0) continue; if (blacklist.count(loc->id)) continue; list[it->first] = it->second; i++; } catalog = list; return i; } int reduce_catalog_snps(map &catalog, map > &whitelist, PopMap *pmap) { map::iterator it; CSLocus *loc; Datum **d; if (whitelist.size() == 0) return 0; // // We want to prune out SNP objects that are not in the whitelist. // int pos; vector tmp; vector cols; map obshaps; map::iterator sit; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (whitelist[loc->id].size() == 0) continue; tmp.clear(); cols.clear(); d = pmap->locus(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { if (whitelist[loc->id].count(loc->snps[i]->col) > 0) { tmp.push_back(loc->snps[i]); cols.push_back(i); } else { // // Change the model calls in the samples to no longer contain this SNP. // pos = loc->snps[i]->col; for (int j = 0; j < pmap->sample_cnt(); j++) { if (d[j] == NULL || pos >= d[j]->len) continue; if (d[j]->model != NULL) { d[j]->model[pos] = 'U'; } } delete loc->snps[i]; } } loc->snps.clear(); for (uint i = 0; i < tmp.size(); i++) loc->snps.push_back(tmp[i]); map::iterator it; char allele_old[id_len], allele_new[id_len]; // // We need to adjust the catalog's list of haplotypes/alleles // for this locus to account for the pruned SNPs. // for (it = loc->alleles.begin(); it != loc->alleles.end(); it++) { strncpy(allele_old, it->first.c_str(), id_len - 2); allele_old[id_len - 1] = '\0'; for (uint k = 0; k < cols.size(); k++) allele_new[k] = allele_old[cols[k]]; allele_new[cols.size()] = '\0'; obshaps[string(allele_new)] += it->second; } loc->alleles.clear(); for (sit = obshaps.begin(); sit != obshaps.end(); sit++) { loc->alleles[sit->first] = sit->second; } obshaps.clear(); loc->populate_alleles(); // // Now we need to adjust the matched haplotypes to sync to // the SNPs left in the catalog. // // Reducing the lengths of the haplotypes may create // redundant (shorter) haplotypes, we need to remove these. // for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; for (uint j = 0; j < d[i]->obshap.size(); j++) { for (uint k = 0; k < cols.size(); k++) d[i]->obshap[j][k] = d[i]->obshap[j][cols[k]]; d[i]->obshap[j][cols.size()] = '\0'; obshaps[d[i]->obshap[j]] += d[i]->depth[j]; } uint j = 0; for (sit = obshaps.begin(); sit != obshaps.end(); sit++) { strcpy(d[i]->obshap[j], sit->first.c_str()); d[i]->depth[j] = sit->second; j++; } while (j < d[i]->obshap.size()) { delete [] d[i]->obshap[j]; j++; } d[i]->obshap.resize(obshaps.size()); d[i]->depth.resize(obshaps.size()); obshaps.clear(); } } return 0; } stacks-1.35/src/catalog_utils.h000644 000765 000024 00000003022 12533677757 017317 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __CATALOG_UTILS_H__ #define __CATALOG_UTILS_H__ #include using std::map; #include using std::set; #include "constants.h" #include "stacks.h" #include "locus.h" #include "PopMap.h" #include "PopSum.h" int check_whitelist_integrity(map &, map > &); int reduce_catalog(map &, set &, set &); int reduce_catalog(map &, map > &, set &); int reduce_catalog_snps(map &, map > &, PopMap *); int implement_single_snp_whitelist(map &, PopSum *, map > &); int implement_random_snp_whitelist(map &, PopSum *, map > &); #endif // __CATALOG_UTILS_H__ stacks-1.35/src/clean.cc000644 000765 000024 00000037727 12574066143 015712 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // clean.cc -- common routines for processing and cleaning raw seqeunce data. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #include "clean.h" int parse_illumina_v1(const char *file) { const char *p, *q; // // Parse a file name that looks like: s_7_1_0001_qseq.txt ... s_7_1_0120_qseq.txt // but exclude the paired-end files: s_7_2_0001_qseq.txt ... s_7_2_0120_qseq.txt // if (file[0] != 's') return 0; int underscore_cnt = 0; for (p = file; *p != '\0'; p++) { if (*p == '_') { underscore_cnt++; q = p; } } if (underscore_cnt != 4) return 0; // Check the file suffix. if (strncmp(q, "_qseq.txt", 8) != 0) return 0; // Make sure it is not the paired-end file p = file; p += 3; if (strncmp(p, "_1_", 3) != 0) return 0; // // Return the position of the paired-end number, so the other file name can be generated. // return (p + 1 - file); } int parse_illumina_v2(const char *file) { const char *p, *q; // // Parse a file name that looks like: lane6_NoIndex_L006_R1_003.fastq // but exclude the paired-end files: lane6_NoIndex_L006_R2_003.fastq // // Another example could be: GfddRAD1_001_ATCACG_L008_R1_001.fastq.gz // and excluding the paired-end file: GfddRAD1_001_ATCACG_L008_R2_001.fastq.gz // // Make sure it ends in "fastq" or "fastq.gz" // for (q = file; *q != '\0'; q++); for (p = q; *p != '.' && p > file; p--); if (strncmp(p, ".gz", 3) == 0) for (p--; *p != '.' && p > file; p--); if (strncmp(p, ".fastq", 6) != 0) return 0; // // Find the part of the name marking the pair, "_R1_", make sure it is not the paired-end file. // p = file; while (*p != '\0') { for (; *p != '_' && *p != '\0'; p++); if (*p == '\0') return 0; if (strncmp(p, "_R1_", 4) == 0) { // // Return the position of the paired-end number, so the other file name can be generated. // return (p + 2 - file); } p++; } return 0; } int parse_input_record(Seq *s, Read *r) { char *p, *q, *z; uint lim; // // Count the number of colons to differentiate Illumina version. // CASAVA 1.8+ has a FASTQ header like this: // @HWI-ST0747:155:C01WHABXX:8:1101:6455:26332 1:N:0: // Or, with the embedded barcode: // @HWI-ST1233:67:D12GNACXX:7:2307:14604:78978 1:N:0:ATCACG // // // Or, parse FASTQ header from previous versions that looks like this: // @HWI-ST0747_0141:4:1101:1240:2199#0/1 // @HWI-ST0747_0143:2:2208:21290:200914#0/1 // char *stop = s->id + strlen(s->id); int colon_cnt = 0; int hash_cnt = 0; for (p = s->id, q = p; q < stop; q++) { colon_cnt += *q == ':' ? 1 : 0; hash_cnt += *q == '#' ? 1 : 0; } if (colon_cnt == 9 && hash_cnt == 0) { r->fastq_type = illv2_fastq; // // According to Illumina manual, "CASAVA v1.8 User Guide" page 41: // @:::::: ::: // for (p = s->id, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; strcpy(r->machine, p); *q = ':'; } // Run number. for (p = q+1, q = p; *q != ':' && q < stop; q++); //*q = '\0'; // Flowcell ID. for (p = q+1, q = p; *q != ':' && q < stop; q++); //*q = '\0'; for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->lane = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->tile = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->x = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != ' ' && q < stop; q++); if (q < stop) { *q = '\0'; r->y = atoi(p); *q = ' '; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; // r->read = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->filter = *p == 'Y' ? true : false; *q = ':'; } // Control Number. for (p = q+1, q = p; *q != ':' && q < stop; q++); //*q = '\0'; // // Index barcode // // The index barcode appears identically in both single-end and paired-end reads. // If the barcode type is index_index, the barcode will appear as NNNNNN+NNNNNN // in both reads. If the specified barcode type is null_index we want to read only // the second half of the index, if the type is index_null, we want to read // only the first half, or the full string if there is no '+' character. // if (q < stop) for (p = q+1, q = p; q < stop; q++); else p = q; if (*p != '\0') { // // Check if there is a '+' character. // for (z = p; *z != '+' && *z != '\0'; z++); if (r->read == 1) { lim = z - p; switch (barcode_type) { case index_null: case index_index: case index_inline: lim = lim < max_bc_size_1 ? lim : max_bc_size_1; strncpy(r->index_bc, p, lim); r->index_bc[lim] = '\0'; break; case inline_index: lim = lim < max_bc_size_2 ? lim : max_bc_size_2; strncpy(r->index_bc, p, lim); r->index_bc[lim] = '\0'; break; default: break; } } else if (r->read == 2) { if (*z == '+') p = z + 1; switch (barcode_type) { case null_index: case index_index: case inline_index: strncpy(r->index_bc, p, max_bc_size_2); r->index_bc[max_bc_size_2] = '\0'; break; default: break; } } } } else if (colon_cnt == 4 && hash_cnt == 1) { r->fastq_type = illv1_fastq; for (p = s->id, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; strcpy(r->machine, p); *q = ':'; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->lane = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->tile = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != ':' && q < stop; q++); if (q < stop) { *q = '\0'; r->x = atoi(p); *q = ':'; } for (p = q+1, q = p; *q != '#' && q < stop; q++); if (q < stop) { *q = '\0'; r->y = atoi(p); *q = '#'; } for (p = q+1, q = p; *q != '/' && q < stop; q++); if (q < stop) { *q = '\0'; r->index = atoi(p); *q = '/'; } for (p = q+1, q = p; *q != '\0' && q < stop; q++); // r->read = atoi(p); } else { r->fastq_type = generic_fastq; strncpy(r->machine, s->id, id_len); r->machine[id_len] = '\0'; } uint len = strlen(s->seq); // // Resize the sequence/phred buffers if necessary. // if (len > r->size - 1) r->resize(len + 1); strncpy(r->seq, s->seq, r->size - 1); strncpy(r->phred, s->qual, r->size - 1); r->seq[r->size - 1] = '\0'; r->phred[r->size - 1] = '\0'; r->len = len; if (r->read == 1) { switch (barcode_type) { case inline_null: case inline_inline: case inline_index: strncpy(r->inline_bc, r->seq, max_bc_size_1); r->inline_bc[max_bc_size_1] = '\0'; break; case index_inline: strncpy(r->inline_bc, r->seq, max_bc_size_2); r->inline_bc[max_bc_size_2] = '\0'; break; default: break; } } else if (r->read == 2 && (barcode_type == inline_inline || barcode_type == index_inline)) { strncpy(r->inline_bc, r->seq, max_bc_size_2); r->inline_bc[max_bc_size_2] = '\0'; } r->retain = 1; return 0; } int rev_complement(char *seq, int offset, bool overhang) { char *p, *q; offset += overhang ? 1 : 0; q = seq + offset; int len = strlen(q); int j = 0; char *com = new char[len + 1]; for (p = q + len - 1; p >= q; p--) { switch (*p) { case 'A': case 'a': com[j] = 'T'; break; case 'C': case 'c': com[j] = 'G'; break; case 'G': case 'g': com[j] = 'C'; break; case 'T': case 't': com[j] = 'A'; break; } j++; } com[len] = '\0'; for (j = 0; j < len; j++) q[j] = com[j]; delete [] com; return 0; } int reverse_qual(char *qual, int offset, bool overhang) { char *p, *q; offset += overhang ? 1 : 0; q = qual + offset; int len = strlen(q); int j = 0; char *com = new char[len + 1]; for (p = q + len - 1; p >= q; p--) { com[j] = *p; j++; } com[len] = '\0'; for (j = 0; j < len; j++) q[j] = com[j]; delete [] com; return 0; } // // Functions for quality filtering based on phred scores. // int check_quality_scores(Read *href, int qual_offset, int score_limit, int len_limit, int offset) { // // Phred quality scores are discussed here: // http://en.wikipedia.org/wiki/FASTQ_format // // Illumina 1.3+ encodes phred scores between ASCII values 64 (0 quality) and 104 (40 quality) // // @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh // | | | | | // 64 74 84 94 104 // 0 10(90%) 20(99%) 30(99.9%) 40(99.99%) // // // Drop sequence if the average phred quality score drops below a threshold within a sliding window. // // // Convert the encoded quality scores to their integer values // // cerr << "Integer scores: "; for (uint j = 0; j < href->len; j++) { href->int_scores[j] = href->phred[j] - qual_offset; // cerr << href->int_scores[j] << " "; } // cerr << "\n"; double mean = 0.0; double working_sum = 0.0; int *p, *q, j; // cerr << "Window length: " << href->win_len << "; Stop position: " << href->stop_pos << "\n"; // // Populate the sliding window. // for (j = offset; j < href->win_len + offset; j++) working_sum += href->int_scores[j]; // cerr << "Populating the sliding window using position " << offset << " to " << href->win_len + offset - 1 << "; initial working sum: " << working_sum << "\n"; // // Set p pointer to the first element in the window, and q to one element past the last element in the window. // p = href->int_scores + offset; q = p + (int) href->win_len; j = offset; // cerr << "Setting pointers; P: " << (href->int_scores + offset) - href->int_scores << "; Q: " << p + (int) href->win_len - p << "; J: " << j << "\n"; do { mean = working_sum / href->win_len; // cerr << "J: " << j << "; Window contents: "; // for (int *r = p; r < q; r++) // cerr << *r << " "; // cerr << "\n"; // cerr << " Mean: " << mean << "\n"; if (mean < score_limit) { if (j < len_limit) { return 0; } else { href->len = j + 1; href->seq[j] = '\0'; href->phred[j] = '\0'; return -1; } } // // Advance the window: // Add the score from the front edge of the window, subtract the score // from the back edge of the window. // working_sum -= (double) *p; working_sum += (double) *q; // cerr << " Removing value of p: " << *p << " (position: " << p - (href->int_scores) << ")\n"; // cerr << " Adding value of q: " << *q << " (position: " << q - (href->int_scores) << ")\n"; p++; q++; j++; } while (j <= href->stop_pos); return 1; } bool correct_barcode(set &bcs, Read *href, seqt type, int num_errs) { if (recover == false) return false; // // The barcode_dist variable specifies how far apart in sequence space barcodes are. If barcodes // are off by two nucleotides in sequence space, than we can correct barcodes that have a single // sequencing error. // // If the barcode sequence is off by no more than barcodes_dist-1 nucleotides, correct it. We will // search the whole possible space of barcodes if more than one length of barcode was specified. // const char *p; char *q; char bc[id_len]; int d, close; string b; set::iterator it; close = 0; for (it = bcs.begin(); it != bcs.end(); it++) { // // Copy the proper subset of the barcode to match the length of the barcode in the bcs set. // strncpy(bc, type == single_end ? href->se_bc : href->pe_bc, it->length()); bc[it->length()] = '\0'; d = 0; for (p = it->c_str(), q = bc; *p != '\0'; p++, q++) if (*p != *q) d++; if (d <= num_errs) { close++; b = *it; break; } } if (close == 1) { // // Correct the barcode. // if (type == single_end) { strcpy(href->se_bc, b.c_str()); if (barcode_type == inline_null || barcode_type == inline_index || barcode_type == inline_inline) href->inline_bc_len = b.length(); } else { strcpy(href->pe_bc, b.c_str()); if (barcode_type == index_inline || barcode_type == inline_inline) href->inline_bc_len = b.length(); } return true; } return false; } // // Functions for filtering adapter sequence // int init_adapter_seq(int kmer_size, char *adapter, int &adp_len, AdapterHash &kmers) { string kmer; adp_len = strlen(adapter); int num_kmers = adp_len - kmer_size + 1; char *p = adapter; for (int i = 0; i < num_kmers; i++) { kmer.assign(p, kmer_size); kmers[kmer].push_back(i); p++; } return 0; } int filter_adapter_seq(Read *href, char *adapter, int adp_len, AdapterHash &adp_kmers, int kmer_size, int distance, int len_limit) { vector > hits; int num_kmers = href->len - kmer_size + 1; const char *p = href->seq; string kmer; // // Identify matching kmers and their locations of occurance. // for (int i = 0; i < num_kmers; i++) { kmer.assign(p, kmer_size); if (adp_kmers.count(kmer) > 0) { for (uint j = 0; j < adp_kmers[kmer].size(); j++) { // cerr << "Kmer hit " << kmer << " at query position " << i << " at hit position " << adp_kmers[kmer][j] << "\n"; hits.push_back(make_pair(i, adp_kmers[kmer][j])); } } p++; } // // Scan backwards from the position of the k-mer and then scan forwards // counting the number of mismatches. // int mismatches, i, j, start_pos; for (uint k = 0; k < hits.size(); k++) { mismatches = 0; i = hits[k].first; // Position in query sequence j = hits[k].second; // Position in adapter hit // cerr << "Starting comparison at i: "<< i << "; j: " << j << "\n"; while (i >= 0 && j >= 0) { if (href->seq[i] != adapter[j]) mismatches++; i--; j--; } if (mismatches > distance) continue; start_pos = i + 1; i = hits[k].first; j = hits[k].second; while (i < (int) href->len && j < adp_len && mismatches <= distance) { if (href->seq[i] != adapter[j]) mismatches++; i++; j++; } // cerr << "Starting position: " << start_pos << "; Query end (i): " << i << "; adapter end (j): " << j // << "; number of mismatches: " << mismatches << "; Seq Len: " << href->len << "; SeqSeq Len: " << strlen(href->seq) << "\n"; if (mismatches <= distance && (i == (int) href->len || j == adp_len)) { // cerr << " Trimming or dropping.\n"; if (start_pos < len_limit) { return 0; } else { href->len = start_pos + 1; href->seq[start_pos] = '\0'; href->phred[start_pos] = '\0'; return -1; } } } return 1; } stacks-1.35/src/clean.h000644 000765 000024 00000026357 12574066143 015551 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __CLEAN_H__ #define __CLEAN_H__ #include using std::string; #include using std::map; #include #include using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::unordered_map; #include "input.h" #include "kmers.h" enum fastqt {generic_fastq, illv1_fastq, illv2_fastq}; enum barcodet {null_null, null_index, inline_null, index_null, inline_inline, index_index, inline_index, index_inline}; enum seqt {single_end, paired_end}; typedef unordered_map, std::hash > AdapterHash; extern uint min_bc_size_1, max_bc_size_1, min_bc_size_2, max_bc_size_2; extern int barcode_dist_1, barcode_dist_2; extern barcodet barcode_type; extern uint truncate_seq; extern double win_size; extern bool paired; extern bool recover; class BarcodePair { public: string se; // Single-end barcode. string pe; // Paired-end barcode. string name; // Filename to open for this barcode combination. BarcodePair() { this->se = ""; this->pe = ""; } BarcodePair(char *p) { this->se = string(p); this->pe = ""; } BarcodePair(char *p, char *q, char *n) { if (p != NULL) this->se = string(p); if (q != NULL) this->pe = string(q); if (n != NULL) this->name = string(n); } BarcodePair(string se, string pe, string name) { this->se = se; this->pe = pe; this->name = name; } BarcodePair(string se) { this->se = se; this->pe = ""; } void set(char *p, char *q) { this->se = string(p); this->pe = string(q); } void set(char *p) { this->se = string(p); this->pe = ""; } void set(string p, string q) { this->se = p; this->pe = q; } void set(string p) { this->se = p; this->pe = ""; } string str() { if (this->pe.length() > 0) return string(this->se + "-" + this->pe); else return this->se; } bool name_exists() { if (this->name.length() > 0) return true; return false; } friend bool operator<(const BarcodePair &lhs, const BarcodePair &rhs) { if (lhs.se < rhs.se) return true; else if (lhs.se == rhs.se && lhs.pe < rhs.pe) return true; else return false; } friend bool operator==(const BarcodePair &lhs, const BarcodePair &rhs) { return (lhs.se == rhs.se && lhs.pe == rhs.pe); } friend ofstream& operator<<(ofstream &out, const BarcodePair &bp) { if (bp.pe.length() > 0) out << bp.se << "-" << bp.pe; else out << bp.se; return out; } }; class Read { public: fastqt fastq_type; char *inline_bc; char *index_bc; char *se_bc; char *pe_bc; char *machine; int lane; int tile; int x; int y; int index; int read; char *seq; char *phred; int *int_scores; bool filter; int inline_bc_len; int retain; uint size; uint len; double win_len; double stop_pos; Read(uint buf_len, int read, int barcode_size, double win_size) { this->inline_bc = new char[id_len + 1]; this->index_bc = new char[id_len + 1]; this->machine = new char[id_len + 1]; this->seq = new char[buf_len + 1]; this->phred = new char[buf_len + 1]; this->int_scores = new int[buf_len]; this->size = buf_len + 1; this->read = read; this->retain = 1; this->inline_bc_len = 0; this->tile = 0; this->lane = 0; this->x = 0; this->y = 0; this->index = 0; this->len = 0; this->inline_bc[0] = '\0'; this->index_bc[0] = '\0'; this->machine[0] = '\0'; this->seq[0] = '\0'; this->phred[0] = '\0'; this->set_len(buf_len); this->se_bc = NULL; this->pe_bc = NULL; if (this->read == 1) { switch(barcode_type) { case index_inline: this->se_bc = this->index_bc; this->pe_bc = this->inline_bc; break; case inline_index: this->se_bc = this->inline_bc; this->pe_bc = this->index_bc; this->inline_bc_len = barcode_size; break; case inline_null: case inline_inline: this->se_bc = this->inline_bc; this->inline_bc_len = barcode_size; break; case index_null: case index_index: this->se_bc = this->index_bc; break; default: break; } } else if (this->read == 2) { switch(barcode_type) { case inline_inline: case index_inline: this->pe_bc = this->inline_bc; this->inline_bc_len = barcode_size; break; case index_index: case inline_index: this->pe_bc = this->index_bc; break; default: break; } } } ~Read() { delete [] this->inline_bc; delete [] this->index_bc; delete [] this->machine; delete [] this->seq; delete [] this->phred; delete [] this->int_scores; } int resize(int size) { delete [] this->seq; delete [] this->phred; delete [] this->int_scores; this->size = size; this->seq = new char[this->size]; this->phred = new char[this->size]; this->int_scores = new int[this->size - 1]; this->set_len(size - 1); return 0; } int set_len(uint buf_len) { if (buf_len == this->len) return 0; if (buf_len > this->size - 1) buf_len = this->size - 1; this->seq[buf_len] = '\0'; this->phred[buf_len] = '\0'; // // Set the parameters for checking read quality later in processing. // Window length is 15% (rounded) of the sequence length. // this->len = buf_len - this->inline_bc_len; this->win_len = round((double) this->len * win_size); if (this->win_len < 1) this->win_len = 1; this->len += this->inline_bc_len; this->stop_pos = this->len - this->win_len; return 0; } }; int parse_illumina_v1(const char *); int parse_illumina_v2(const char *); int parse_input_record(Seq *, Read *); int rev_complement(char *, int, bool); int reverse_qual(char *, int, bool); bool correct_barcode(set &, Read *, seqt, int); int filter_adapter_seq(Read *, char *, int, AdapterHash &, int, int, int); int init_adapter_seq(int, char *, int &, AdapterHash &); int check_quality_scores(Read *, int, int, int, int); // // Templated function to process barcodes. // template int process_barcode(Read *href_1, Read *href_2, BarcodePair &bc, map &fhs, set &se_bc, set &pe_bc, map > &barcode_log, map &counter) { if (barcode_type == null_null) return 0; // // Is this a legitimate barcode? The barcode passed into this function is the maximally long // barcode. If we fail to find a match at maximum length, step down to minimum length and // continue to search for a match. // char *p; char bc_1[id_len]; char bc_2[id_len]; strcpy(bc_1, bc.se.c_str()); strcpy(bc_2, bc.pe.c_str()); bool valid_se_bc = false; bool valid_pe_bc = false; p = bc_1 + max_bc_size_1; // Point p at the end of string NULL. for (uint i = max_bc_size_1; i >= min_bc_size_1; i--) if (se_bc.count(bc_1) > 0) { valid_se_bc = true; break; } else { p--; *p = '\0'; } if (pe_bc.size() > 0) { p = bc_2 + max_bc_size_2; // Point p at the end of string NULL. for (uint i = max_bc_size_2; i >= min_bc_size_2; i--) if (pe_bc.count(bc_2) > 0) { valid_pe_bc = true; break; } else { p--; *p = '\0'; } } if (valid_se_bc == true && valid_pe_bc == true) bc.set(bc_1, bc_2); else if (valid_se_bc == true) bc.se = bc_1; else if (valid_pe_bc == true) bc.pe = bc_2; // // Log the barcodes we receive. // if (barcode_log.count(bc) == 0) { barcode_log[bc]["noradtag"] = 0; barcode_log[bc]["total"] = 0; barcode_log[bc]["low_qual"] = 0; barcode_log[bc]["retained"] = 0; } barcode_log[bc]["total"] += paired ? 2 : 1; // // If we have a perfectly matching barcode, set the barcode and length in the right places. // if (pe_bc.size() > 0 && valid_se_bc == true && valid_pe_bc == true) { if (fhs.count(bc) > 0) { if (paired) { strcpy(href_1->se_bc, bc_1); strcpy(href_2->pe_bc, bc_2); } else { strcpy(href_1->se_bc, bc_1); strcpy(href_1->pe_bc, bc_2); } if (barcode_type == inline_index || barcode_type == inline_inline) href_1->inline_bc_len = strlen(bc_1); if (barcode_type == index_inline || barcode_type == inline_inline) href_2->inline_bc_len = strlen(bc_2); return 0; } } else if (valid_se_bc == true) { strcpy(href_1->se_bc, bc_1); if (barcode_type == inline_null || barcode_type == inline_index || barcode_type == inline_inline) href_1->inline_bc_len = strlen(bc_1); } else if (valid_pe_bc == true) { if (paired) strcpy(href_2->pe_bc, bc_2); else strcpy(href_1->pe_bc, bc_2); if (barcode_type == index_inline || barcode_type == inline_inline) href_2->inline_bc_len = strlen(bc_2); } // // Try to correct the barcode. // BarcodePair old_barcode = bc; bool se_correct = false; bool pe_correct = false; if (paired) { if (se_bc.count(bc.se) == 0) se_correct = correct_barcode(se_bc, href_1, single_end, barcode_dist_1); if (pe_bc.size() > 0 && pe_bc.count(bc.pe) == 0) pe_correct = correct_barcode(pe_bc, href_2, paired_end, barcode_dist_2); if (se_correct) bc.se = string(href_1->se_bc); if (pe_bc.size() > 0 && pe_correct) bc.pe = string(href_2->pe_bc); // // After correcting the individual barcodes, check if the combination is valid. // if (fhs.count(bc) == 0) { counter["ambiguous"] += 2; href_1->retain = 0; href_2->retain = 0; } } else { if (se_bc.count(bc.se) == 0) se_correct = correct_barcode(se_bc, href_1, single_end, barcode_dist_1); if (pe_bc.size() > 0 && pe_bc.count(bc.pe) == 0) pe_correct = correct_barcode(pe_bc, href_1, paired_end, barcode_dist_2); if (se_correct) bc.se = string(href_1->se_bc); if (pe_bc.size() > 0 && pe_correct) bc.pe = string(href_1->pe_bc); if (fhs.count(bc) == 0) { counter["ambiguous"]++; href_1->retain = 0; } } if (href_1->retain && (se_correct || pe_correct)) { counter["recovered"] += paired ? 2 : 1; barcode_log[old_barcode]["total"] -= paired ? 2 : 1; if (barcode_log.count(bc) == 0) { barcode_log[bc]["total"] = 0; barcode_log[bc]["retained"] = 0; barcode_log[bc]["low_qual"] = 0; barcode_log[bc]["noradtag"] = 0; } barcode_log[bc]["total"] += paired ? 2 : 1; } return 0; } #endif // __CLEAN_H__ stacks-1.35/src/clone_filter.cc000644 000765 000024 00000106216 12574066143 017263 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // clone_filter -- find duplicate read pairs and reduce them to one representative // pair of sequences in the data set. These reads are assumed to be the product of // PCR amplification. // #include "clone_filter.h" // // Global variables to hold command-line options. // FileT in_file_type = FileT::unknown; FileT out_file_type = FileT::unknown; string in_file; string in_file_p1; string in_file_p2; string in_path_1; string in_path_2; string out_path; bool discards = false; bool interleaved = false; bool merge = false; bool paired = false; bool retain_oligo = false; barcodet barcode_type = null_null; int oligo_len_1 = 0; int oligo_len_2 = 0; // // These variables are required for other linked objects, but we won't use them in clone_filter. // int barcode_size; uint truncate_seq; bool ill_barcode; bool recover; uint min_bc_size_1; uint max_bc_size_1; uint min_bc_size_2; uint max_bc_size_2; double win_size; int main (int argc, char* argv[]) { parse_command_line(argc, argv); if (oligo_len_2 == 0) oligo_len_2 = oligo_len_1; min_bc_size_1 = oligo_len_1; max_bc_size_1 = oligo_len_1; min_bc_size_2 = oligo_len_2; max_bc_size_2 = oligo_len_2; // // If input files are gzipped, output gziped files, unless the user chooses an output type. // if (out_file_type == FileT::unknown) { if (in_file_type == FileT::gzfastq || in_file_type == FileT::bam) out_file_type = FileT::gzfastq; else out_file_type = FileT::fastq; } if (paired) cerr << "Processing paired-end data.\n"; else cerr << "Processing single-end data.\n"; switch(barcode_type) { case null_null: cerr << "No oligo sequence specified, will use single and paired-end reads to determine clones.\n"; break; case null_index: cerr << "Searching for index oligo (i7 Illumina read).\n"; break; case index_null: cerr << "Searching for index oligo (i5 Illumina read).\n"; break; case inline_null: cerr << "Searching for inline oligo on single-end read.\n"; break; case index_index: cerr << "Searching for index oligos (i5 and i7 Illumina reads).\n"; break; case inline_inline: cerr << "Searching for inline oligos on single and paired-end read.\n"; break; case inline_index: cerr << "Searching for inline oligo on single-end read and index oligo (i5 or i7 Illumina read).\n"; break; case index_inline: if (paired) cerr << "Searching for inline oligo on paired-end read and index oligo (i5 or i7 Illumina read).\n"; else cerr << "Searching for inline oligo on single-end read and index oligo (i5 or i7 Illumina read).\n"; break; } map counters; counters["total"] = 0; counters["red_reads"] = 0; counters["dis_reads"] = 0; vector > files; build_file_list(files); CloneHash clone_map; OligoHash oligo_map; map clone_dist; vector clone_map_keys; for (uint i = 0; i < files.size(); i++) { cerr << "Processing file " << i+1 << " of " << files.size() << " [" << files[i].first.c_str() << "]\n"; int result = 1; if (paired) { if (barcode_type == null_null) result = process_paired_reads_by_sequence(files[i].first, files[i].second, counters, clone_map, clone_map_keys); else result = process_paired_reads(files[i].first, files[i].second, counters, oligo_map); } else { result = process_reads(files[i].first, counters, oligo_map); } if (!result) { cerr << "Error processing reads.\n"; break; } } if (barcode_type == null_null) { write_clonereduced_sequence(files[0].first, files[0].second, clone_map, clone_dist, counters); } else { for (OligoHash::iterator i = oligo_map.begin(); i != oligo_map.end(); i++) for (map::iterator j = i->second.begin(); j != i->second.end(); j++) clone_dist[j->second]++; } if (clone_map_keys.size() > 0) { cerr << "Freeing hash key memory..."; free_hash(clone_map_keys); cerr << "done.\n"; } // // Determine and print the distribution of read clones. // cerr << "Calculating the distribution of cloned read pairs...\n"; vector bins; map::iterator it; for (it = clone_dist.begin(); it != clone_dist.end(); it++) bins.push_back(it->first); sort(bins.begin(), bins.end()); cout << "Num Clones\tCount\n"; for (uint i = 0; i < bins.size(); i++) cout << bins[i] << "\t" << clone_dist[bins[i]] << "\n"; char buf[32]; sprintf(buf, "%0.2f%%", ((double) (counters["total"] - counters["red_reads"]) / (double) counters["total"]) * 100); cerr << counters["total"] << " pairs of reads input. " << counters["red_reads"] << " pairs of reads output, discarded " << counters["dis_reads"] << " pairs of reads, " << buf << " clone reads.\n"; return 0; } int process_paired_reads_by_sequence(string prefix_1, string prefix_2, map &counters, CloneHash &clone_map, vector &clone_map_keys) { Input *fh_1, *fh_2; int return_val = 1; string path_1 = in_path_1 + prefix_1; string path_2 = in_path_2 + prefix_2; cerr << "Reading data from:\n " << path_1 << " and\n " << path_2 << "\n"; if (in_file_type == FileT::fastq) { fh_1 = new Fastq(path_1); fh_2 = new Fastq(path_2); } else if (in_file_type == FileT::fasta) { fh_1 = new Fasta(path_1); fh_2 = new Fasta(path_2); } else if (in_file_type == FileT::gzfastq) { fh_1 = new GzFastq(path_1); fh_2 = new GzFastq(path_2); } else if (in_file_type == FileT::gzfasta) { fh_1 = new GzFasta(path_1); fh_2 = new GzFasta(path_2); } else if (in_file_type == FileT::bustard) { fh_1 = new Bustard(path_1.c_str()); fh_2 = new Bustard(path_2.c_str()); } else { fh_1 = NULL; fh_2 = NULL; } // // Read in the first records, initializing the Seq objects, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); Seq *s_2 = fh_2->next_seq(); if (s_1 == NULL || s_2 == NULL) { cerr << "Unable to allocate Seq object.\n"; return -1; } long i = 1; bool exists; char *hash_key; uint seq_len = strlen(s_1->seq); do { if (i % 10000 == 0) cerr << "Processing short read " << i << " \r"; counters["total"]++; exists = clone_map.count(s_1->seq) == 0 ? false : true; if (exists) { hash_key = s_1->seq; } else { hash_key = new char [seq_len + 1]; strcpy(hash_key, s_1->seq); clone_map_keys.push_back(hash_key); } if (out_file_type == FileT::fastq) clone_map[hash_key][s_2->seq].push_back(Pair(s_1->id, s_2->id, s_1->qual, s_2->qual)); else if (out_file_type == FileT::fasta) clone_map[hash_key][s_2->seq].push_back(Pair(s_1->id, s_2->id)); delete s_1; delete s_2; i++; } while ((s_1 = fh_1->next_seq()) != NULL && (s_2 = fh_2->next_seq()) != NULL); cerr << "\n"; delete fh_1; delete fh_2; return return_val; } int write_clonereduced_sequence(string prefix_1, string prefix_2, CloneHash &clone_map, map &clone_dist, map &counters) { ofstream out_fh_1, out_fh_2, discard_fh_1, discard_fh_2; gzFile out_gzfh_1, out_gzfh_2; int return_val = 1; // // Open the input files. // string path_1; string path_2; // // Open the output files. // string suffix_1, suffix_2; if (out_file_type == FileT::gzfastq) { suffix_1 = ".1.fq.gz"; suffix_2 = ".2.fq.gz"; } else if (out_file_type == FileT::fastq) { suffix_1 = ".1.fq"; suffix_2 = ".2.fq"; } else if (out_file_type == FileT::gzfasta) { suffix_1 = ".1.fa.gz"; suffix_2 = ".2.fa.gz"; } else if (out_file_type == FileT::fasta) { suffix_1 = ".1.fa"; suffix_2 = ".2.fa"; } string file_1 = prefix_1; int pos = file_1.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file_1.substr(pos) == ".gz") { file_1 = file_1.substr(0, pos); pos = file_1.find_last_of("."); } path_1 = out_path + file_1 + suffix_1; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { out_gzfh_1 = gzopen(path_1.c_str(), "wb"); if (!(out_gzfh_1)) { cerr << "Error opening output file '" << path_1 << "'\n"; return -1; } } else { out_fh_1.open(path_1.c_str(), ifstream::out); if (out_fh_1.fail()) { cerr << "Error opening output file '" << path_1 << "'\n"; return -1; } } string file_2 = prefix_2; pos = file_2.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file_2.substr(pos) == ".gz") { file_2 = file_2.substr(0, pos); pos = file_2.find_last_of("."); } path_2 = out_path + file_2 + suffix_2; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { out_gzfh_2 = gzopen(path_2.c_str(), "wb"); if (!(out_gzfh_2)) { cerr << "Error opening output file '" << path_2 << "'\n"; return -1; } } else { out_fh_2.open(path_2.c_str(), ifstream::out); if (out_fh_2.fail()) { cerr << "Error opening output file '" << path_2 << "'\n"; return -1; } } // // Open files for recording discarded reads. // if (discards) { path_1 = out_path + file_1 + ".discards" + suffix_1; discard_fh_1.open(path_1.c_str(), ifstream::out); if (discard_fh_1.fail()) { cerr << "Error opening discard output file '" << path_1 << "'\n"; return -1; } path_2 = out_path + file_2 + ".discards" + suffix_2; discard_fh_2.open(path_2.c_str(), ifstream::out); if (discard_fh_2.fail()) { cerr << "Error opening discard output file '" << path_2 << "'\n"; return -1; } } CloneHash::iterator hash_it; map >::iterator map_it; stringstream sstr; cerr << "Writing filtered data..."; for (hash_it = clone_map.begin(); hash_it != clone_map.end(); hash_it++) { for (map_it = hash_it->second.begin(); map_it != hash_it->second.end(); map_it++) { if (out_file_type == FileT::gzfastq) { sstr.str(); sstr << "@" << map_it->second[0].p1_id << "\n" << hash_it->first << "\n" << "+\n" << map_it->second[0].p1_qual << "\n"; gzputs(out_gzfh_1, sstr.str().c_str()); sstr.str(); sstr << "@" << map_it->second[0].p2_id << "\n" << map_it->first << "\n" << "+\n" << map_it->second[0].p2_qual << "\n"; gzputs(out_gzfh_2, sstr.str().c_str()); } else if (out_file_type == FileT::fastq) { out_fh_1 << "@" << map_it->second[0].p1_id << "\n" << hash_it->first << "\n" << "+\n" << map_it->second[0].p1_qual << "\n"; out_fh_2 << "@" << map_it->second[0].p2_id << "\n" << map_it->first << "\n" << "+\n" << map_it->second[0].p2_qual << "\n"; } else if (out_file_type == FileT::gzfasta) { sstr.str(); sstr << ">" << map_it->second[0].p1_id << "\n" << hash_it->first << "\n"; gzputs(out_gzfh_1, sstr.str().c_str()); sstr.str(); sstr << ">" << map_it->second[0].p2_id << "\n" << map_it->first << "\n"; gzputs(out_gzfh_2, sstr.str().c_str()); } else if (out_file_type == FileT::fasta) { out_fh_1 << ">" << map_it->second[0].p1_id << "\n" << hash_it->first << "\n"; out_fh_2 << ">" << map_it->second[0].p2_id << "\n" << map_it->first << "\n"; } counters["dis_reads"] += map_it->second.size() - 1; clone_dist[map_it->second.size()]++; // // Write cloned read pairs that we are discarding // if (discards) for (uint i = 1; i < map_it->second.size(); i++) { if (out_file_type == FileT::fasta) { discard_fh_1 << ">" << map_it->second[i].p1_id << "\n" << hash_it->first << "\n"; discard_fh_2 << ">" << map_it->second[i].p2_id << "\n" << map_it->first << "\n"; } else if (out_file_type == FileT::fastq) { discard_fh_1 << "@" << map_it->second[i].p1_id << "\n" << hash_it->first << "\n" << "+\n" << map_it->second[i].p1_qual << "\n"; discard_fh_2 << "@" << map_it->second[i].p2_id << "\n" << map_it->first << "\n" << "+\n" << map_it->second[i].p2_qual << "\n"; } } counters["red_reads"]++; } } cerr << "done.\n"; out_fh_1.close(); out_fh_2.close(); if (discards) { discard_fh_1.close(); discard_fh_2.close(); } return return_val; } int process_paired_reads(string prefix_1, string prefix_2, map &counters, OligoHash &oligo_map) { Input *fh_1, *fh_2; Read *r_1, *r_2; ofstream out_fh_1, out_fh_2, discard_fh_1, discard_fh_2; gzFile out_gzfh_1, out_gzfh_2, discard_gzfh_1, discard_gzfh_2; int return_val = 1; // // Open the input files. // string path_1 = in_path_1 + prefix_1; string path_2 = in_path_2 + prefix_2; if (interleaved) cerr << " Reading data from:\n " << path_1 << "\n"; else cerr << " Reading data from:\n " << path_1 << " and\n " << path_2 << "\n"; switch (in_file_type) { case FileT::fastq: fh_1 = new Fastq(path_1); fh_2 = interleaved ? fh_1 : new Fastq(path_2); break; case FileT::gzfastq: fh_1 = new GzFastq(path_1.c_str()); fh_2 = interleaved ? fh_1 : new GzFastq(path_2); break; case FileT::fasta: fh_1 = new Fasta(path_1); fh_2 = interleaved ? fh_1 : new Fasta(path_2); break; case FileT::gzfasta: fh_1 = new GzFasta(path_1); fh_2 = interleaved ? fh_1 : new GzFasta(path_2); break; case FileT::bam: fh_1 = new BamUnAln(path_1); fh_2 = fh_1; break; case FileT::bustard: fh_1 = new Bustard(path_1); fh_2 = interleaved ? fh_1 : new Bustard(path_2); default: break; } // // Open the output files. // string suffix_1, suffix_2; if (out_file_type == FileT::gzfastq) { suffix_1 = ".1.fq.gz"; suffix_2 = ".2.fq.gz"; } else if (out_file_type == FileT::fastq) { suffix_1 = ".1.fq"; suffix_2 = ".2.fq"; } else if (out_file_type == FileT::gzfasta) { suffix_1 = ".1.fa.gz"; suffix_2 = ".2.fa.gz"; } else if (out_file_type == FileT::fasta) { suffix_1 = ".1.fa"; suffix_2 = ".2.fa"; } string file_1 = prefix_1; int pos_1 = file_1.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file_1.substr(pos_1) == ".gz") { file_1 = file_1.substr(0, pos_1); pos_1 = file_1.find_last_of("."); } path_1 = out_path + file_1.substr(0, pos_1) + suffix_1; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { out_gzfh_1 = gzopen(path_1.c_str(), "wb"); if (!(out_gzfh_1)) { cerr << "Error opening output file '" << path_1 << "'\n"; return -1; } } else { out_fh_1.open(path_1.c_str(), ifstream::out); if (out_fh_1.fail()) { cerr << "Error opening output file '" << path_1 << "'\n"; return -1; } } string file_2 = prefix_2; int pos_2 = file_2.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file_2.substr(pos_2) == ".gz") { file_2 = file_2.substr(0, pos_2); pos_2 = file_2.find_last_of("."); } path_2 = out_path + file_2.substr(0, pos_2) + suffix_2; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { out_gzfh_2 = gzopen(path_2.c_str(), "wb"); if (!(out_gzfh_2)) { cerr << "Error opening output file '" << path_2 << "'\n"; return -1; } } else { out_fh_2.open(path_2.c_str(), ifstream::out); if (out_fh_2.fail()) { cerr << "Error opening output file '" << path_2 << "'\n"; return -1; } } // // Open files for recording discarded reads. // if (discards) { path_1 = out_path + file_1.substr(0, pos_1) + ".discards" + suffix_1; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { discard_gzfh_1 = gzopen(path_1.c_str(), "wb"); if (!(discard_gzfh_1)) { cerr << "Error opening discard file '" << path_1 << "'\n"; return -1; } } else { discard_fh_1.open(path_1.c_str(), ifstream::out); if (discard_fh_1.fail()) { cerr << "Error opening discard file '" << path_1 << "'\n"; return -1; } } path_2 = out_path + file_2.substr(0, pos_2) + ".discards" + suffix_2; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { discard_gzfh_2 = gzopen(path_2.c_str(), "wb"); if (!(discard_gzfh_2)) { cerr << "Error opening discard file '" << path_2 << "'\n"; return -1; } } else { discard_fh_2.open(path_2.c_str(), ifstream::out); if (discard_fh_2.fail()) { cerr << "Error opening discard file '" << path_2 << "'\n"; return -1; } } } // // Determine how much sequence we need to trim to remove the oligo seqeunce before printing. // int offset_1, offset_2; switch (barcode_type) { case inline_null: case inline_index: offset_1 = oligo_len_1; offset_2 = 0; break; case index_null: case null_index: case index_index: offset_1 = 0; offset_2 = 0; break; case inline_inline: offset_1 = oligo_len_1; offset_2 = oligo_len_2; break; case index_inline: offset_1 = 0; offset_2 = oligo_len_2; default: break; } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); Seq *s_2 = fh_2->next_seq(); if (s_1 == NULL || s_2 == NULL) { cerr << "Attempting to read first pair of input records, unable to allocate " << "Seq object (Was the correct input type specified?).\n"; exit(1); } r_1 = new Read(strlen(s_1->seq), 1, min_bc_size_1, win_size); r_2 = new Read(strlen(s_2->seq), 2, min_bc_size_2, win_size); long i = 1; int result_1 = 1; int result_2 = 1; bool clone = false; string oligo_1, oligo_2, key, oligo; do { if (i % 10000 == 0) cerr << " Processing RAD-Tag " << i << " \r"; parse_input_record(s_1, r_1); parse_input_record(s_2, r_2); counters["total"]++; result_1 = 1; result_2 = 1; clone = false; // // Fetch the randomized oligo sequence from the proper position in the reads. // switch (barcode_type) { case inline_null: oligo_1 = r_1->inline_bc; break; case index_null: oligo_1 = r_1->index_bc; break; case null_index: oligo_1 = r_2->index_bc; break; case inline_inline: oligo_1 = r_1->inline_bc; oligo_2 = r_2->inline_bc; break; case index_index: oligo_1 = r_1->index_bc; oligo_2 = r_2->index_bc; break; case inline_index: oligo_1 = r_1->inline_bc; oligo_2 = r_2->index_bc; break; case index_inline: oligo_1 = r_1->index_bc; oligo_2 = r_2->inline_bc; default: break; } // // Have we seen this combination of oligos before for this read? // oligo = oligo_1 + oligo_2; key = string(s_1->seq + offset_1) + string(s_2->seq + offset_2); // cerr << "Oligo: '" << oligo << "'\n" // << "Seq: '" << s_1->seq << "'\n" // << "Key: '" << key << "'\n"; if (oligo_map.count(key) == 0) oligo_map[key] = map(); if (oligo_map[key].count(oligo) == 0) { oligo_map[key][oligo] = 1; clone = false; } else { oligo_map[key][oligo]++; clone = true; } if (clone == false) { counters["red_reads"]++; switch (out_file_type) { case FileT::fastq: result_1 = write_fastq(&out_fh_1, s_1, retain_oligo ? 0 : offset_1); result_2 = write_fastq(&out_fh_2, s_2, retain_oligo ? 0 : offset_2); break; case FileT::gzfastq: result_1 = write_fastq(&out_gzfh_1, s_1, retain_oligo ? 0 : offset_1); result_2 = write_fastq(&out_gzfh_2, s_2, retain_oligo ? 0 : offset_2); break; case FileT::fasta: result_1 = write_fasta(&out_fh_1, s_1, retain_oligo ? 0 : offset_1); result_2 = write_fasta(&out_fh_2, s_2, retain_oligo ? 0 : offset_2); break; case FileT::gzfasta: result_1 = write_fasta(&out_gzfh_1, s_1, retain_oligo ? 0 : offset_1); result_2 = write_fasta(&out_gzfh_2, s_2, retain_oligo ? 0 : offset_2); default: break; } if (!result_1 || !result_2) { cerr << "Error writing to output file for '" << file_1 << " / " << file_2 << "'\n"; return_val = -1; break; } } else if (clone == true && discards) { counters["dis_reads"]++; switch (out_file_type) { case FileT::fastq: result_1 = write_fastq(&discard_fh_1, s_1); result_2 = write_fastq(&discard_fh_2, s_2); break; case FileT::gzfastq: result_1 = write_fastq(&discard_gzfh_1, s_1); result_2 = write_fastq(&discard_gzfh_2, s_2); break; case FileT::fasta: result_1 = write_fasta(&discard_fh_1, s_1); result_2 = write_fasta(&discard_fh_2, s_2); break; case FileT::gzfasta: result_1 = write_fasta(&discard_gzfh_1, s_1); result_2 = write_fasta(&discard_gzfh_2, s_2); default: break; } if (!result_1 || !result_2) { cerr << "Error writing to discard file for '" << file_1 << " / " << file_2 << "'\n"; return_val = -1; break; } } delete s_1; delete s_2; i++; } while ((s_1 = fh_1->next_seq()) != NULL && (s_2 = fh_2->next_seq()) != NULL); if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) { gzclose(out_gzfh_1); gzclose(out_gzfh_2); if (discards) { gzclose(discard_gzfh_1); gzclose(discard_gzfh_2); } } else { out_fh_1.close(); out_fh_2.close(); if (discards) { discard_fh_1.close(); discard_fh_2.close(); } } delete fh_1; if (interleaved == false) delete fh_2; delete r_1; delete r_2; return return_val; } int process_reads(string prefix_1, map &counters, OligoHash &oligo_map) { Input *fh_1; Read *r_1; ofstream out_fh_1, discard_fh_1; gzFile out_gzfh_1, discard_gzfh_1; int return_val = 1; // // Open the input file. // string path_1 = in_path_1 + prefix_1; cerr << " Reading data from:\n " << path_1 << "\n"; switch(in_file_type) { case FileT::fastq: fh_1 = new Fastq(path_1); break; case FileT::gzfastq: fh_1 = new GzFastq(path_1.c_str()); break; case FileT::fasta: fh_1 = new Fasta(path_1); break; case FileT::gzfasta: fh_1 = new GzFasta(path_1); break; case FileT::bam: fh_1 = new BamUnAln(path_1); break; case FileT::bustard: fh_1 = new Bustard(path_1); default: break; } // // Open the output files. // string suffix_1; if (out_file_type == FileT::gzfastq) suffix_1 = ".fq.gz"; else if (out_file_type == FileT::fastq) suffix_1 = ".fq"; else if (out_file_type == FileT::gzfasta) suffix_1 = ".fa.gz"; else if (out_file_type == FileT::fasta) suffix_1 = ".fa"; string file_1 = prefix_1; int pos = file_1.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file_1.substr(pos) == ".gz") { file_1 = file_1.substr(0, pos); pos = file_1.find_last_of("."); } path_1 = out_path + file_1.substr(0, pos) + suffix_1; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { out_gzfh_1 = gzopen(path_1.c_str(), "wb"); if (!(out_gzfh_1)) { cerr << "Error opening output file '" << path_1 << "'\n"; return -1; } } else { out_fh_1.open(path_1.c_str(), ifstream::out); if (out_fh_1.fail()) { cerr << "Error opening output file '" << path_1 << "'\n"; return -1; } } // // Open files for recording discarded reads. // if (discards) { path_1 = out_path + file_1 + ".discards" + suffix_1; if (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) { discard_gzfh_1 = gzopen(path_1.c_str(), "wb"); if (!(discard_gzfh_1)) { cerr << "Error opening discard file '" << path_1 << "'\n"; return -1; } } else { discard_fh_1.open(path_1.c_str(), ifstream::out); if (discard_fh_1.fail()) { cerr << "Error opening discard file '" << path_1 << "'\n"; return -1; } } } // // Determine how much sequence we need to trim to remove the oligo seqeunce before printing. // int offset_1; switch (barcode_type) { case inline_null: case inline_index: case index_inline: offset_1 = oligo_len_1; break; default: offset_1 = 0; break; } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); if (s_1 == NULL) { cerr << "Attempting to read first pair of input records, unable to allocate " << "Seq object (Was the correct input type specified?).\n"; exit(1); } r_1 = new Read(strlen(s_1->seq), 1, min_bc_size_1, win_size); long i = 1; int result_1 = 1; bool clone = false; string key, oligo_1; do { if (i % 10000 == 0) cerr << " Processing RAD-Tag " << i << " \r"; parse_input_record(s_1, r_1); counters["total"]++; result_1 = 1; clone = false; // // Fetch the randomized oligo sequence from the proper position in the reads. // if (barcode_type == inline_null) oligo_1 = r_1->inline_bc; else if (barcode_type == index_null) oligo_1 = r_1->index_bc; // // Have we seen this combination of oligos before for this read? // key = string(s_1->seq + offset_1); if (oligo_map.count(key) == 0) oligo_map[key] = map(); if (oligo_map[key].count(oligo_1) == 0) { oligo_map[key][oligo_1] = 1; clone = false; } else { oligo_map[key][oligo_1]++; clone = true; } if (clone == false) { counters["red_reads"]++; switch (out_file_type) { case FileT::fastq: result_1 = write_fastq(&out_fh_1, s_1, retain_oligo ? 0 : offset_1); break; case FileT::gzfastq: result_1 = write_fastq(&out_gzfh_1, s_1, retain_oligo ? 0 : offset_1); break; case FileT::fasta: result_1 = write_fasta(&out_fh_1, s_1, retain_oligo ? 0 : offset_1); break; case FileT::gzfasta: result_1 = write_fasta(&out_gzfh_1, s_1, retain_oligo ? 0 : offset_1); default: break; } if (!result_1) { cerr << "Error writing to output file for '" << file_1 << "'\n"; return_val = -1; break; } } else if (clone == true && discards) { counters["dis_reads"]++; switch (out_file_type) { case FileT::fastq: result_1 = write_fastq(&discard_fh_1, s_1); break; case FileT::gzfastq: result_1 = write_fastq(&discard_gzfh_1, s_1); break; case FileT::fasta: result_1 = write_fasta(&discard_fh_1, s_1); break; case FileT::gzfasta: result_1 = write_fasta(&discard_gzfh_1, s_1); default: break; } if (!result_1) { cerr << "Error writing to discard file for '" << file_1 << "'\n"; return_val = -1; break; } } delete s_1; i++; } while ((s_1 = fh_1->next_seq()) != NULL); if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) { gzclose(out_gzfh_1); if (discards) gzclose(discard_gzfh_1); } else { out_fh_1.close(); if (discards) discard_fh_1.close(); } delete fh_1; delete r_1; return return_val; } int free_hash(vector &keys) { for (uint i = 0; i < keys.size(); i++) { delete [] keys[i]; } keys.clear(); return 0; } int parse_command_line(int argc, char* argv[]) { FileT ftype; int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"discards", no_argument, NULL, 'D'}, {"paired", no_argument, NULL, 'P'}, {"null_index", no_argument, NULL, 'U'}, {"index_null", no_argument, NULL, 'u'}, {"inline_null", no_argument, NULL, 'V'}, {"index_index", no_argument, NULL, 'W'}, {"inline_inline", no_argument, NULL, 'x'}, {"index_inline", no_argument, NULL, 'Y'}, {"inline_index", no_argument, NULL, 'Z'}, {"infile_type", required_argument, NULL, 'i'}, {"outfile_type", required_argument, NULL, 'y'}, {"file", required_argument, NULL, 'f'}, {"path", required_argument, NULL, 'p'}, {"file_p1", required_argument, NULL, '1'}, {"file_p2", required_argument, NULL, '2'}, {"outpath", required_argument, NULL, 'o'}, {"oligo_len_1", required_argument, NULL, 'O'}, {"oligo_len_2", required_argument, NULL, 'L'}, {"retain_oligo", required_argument, NULL, 'R'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvDPuUVWxYZi:y:f:p:1:2:o:O:L:R:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'i': if (strcasecmp(optarg, "bustard") == 0) in_file_type = FileT::bustard; else if (strcasecmp(optarg, "fasta") == 0) in_file_type = FileT::fasta; else if (strcasecmp(optarg, "gzfasta") == 0) in_file_type = FileT::gzfasta; else if (strcasecmp(optarg, "gzfastq") == 0) in_file_type = FileT::gzfastq; else in_file_type = FileT::fastq; break; case 'y': if (strcasecmp(optarg, "fasta") == 0) out_file_type = FileT::fasta; else out_file_type = FileT::fastq; break; case 'D': discards = true; break; case 'f': in_file = optarg; ftype = FileT::fastq; break; case 'p': in_path_1 = optarg; in_path_2 = in_path_1; ftype = FileT::fastq; break; case '1': paired = true; in_file_p1 = optarg; ftype = FileT::fastq; break; case '2': paired = true; in_file_p2 = optarg; ftype = FileT::fastq; break; case 'P': paired = true; break; case 'o': out_path = optarg; break; case 'U': barcode_type = null_index; break; case 'u': barcode_type = index_null; break; case 'V': barcode_type = inline_null; break; case 'W': barcode_type = index_index; break; case 'x': barcode_type = inline_inline; break; case 'Y': barcode_type = index_inline; break; case 'Z': barcode_type = inline_index; break; case 'O': oligo_len_1 = is_integer(optarg); break; case 'L': oligo_len_2 = is_integer(optarg); break; case 'R': retain_oligo = true; break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (in_file.length() == 0 && in_path_1.length() == 0 && in_file_p1.length() == 0) { cerr << "You must specify an input file of a directory path to a set of input files.\n"; help(); } if (in_file.length() > 0 && in_path_1.length() > 0) { cerr << "You must specify either a single input file (-f) or a directory path (-p), not both.\n"; help(); } if (in_file.length() > 0 && (in_file_p1.length() > 0 || in_file_p2.length() > 0)) { cerr << "You must specify either a single input file (-f) or a set of paired files (-1, -2), not both.\n"; help(); } if (in_path_1.length() > 0 && (in_file_p1.length() > 0 || in_file_p2.length() > 0)) { cerr << "You must specify either a file path (-p) or a set of paired files (-1, -2), not both.\n"; help(); } if (in_path_1.length() > 0 && in_path_1.at(in_path_1.length() - 1) != '/') in_path_1 += "/"; if (in_path_2.length() > 0 && in_path_2.at(in_path_2.length() - 1) != '/') in_path_2 += "/"; if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (in_file_type == FileT::unknown) in_file_type = ftype; if (paired == false && barcode_type == null_null) { cerr << "You must specify paired-end data if you do not have oligo sequences to differentiate cloned reads.\n"; help(); } if (barcode_type != null_null && oligo_len_1 == 0 && oligo_len_2 == 0) { cerr << "You must specify the length of the oligo sequences (--oligo_len_1 / --oligo_len_2).\n"; help(); } return 0; } void version() { std::cerr << "clone_filter " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "clone_filter " << VERSION << "\n" << "clone_filter [-f in_file | -p in_dir [-P] [-I] | -1 pair_1 -2 pair_2] -o out_dir [-i type] [-y type] [-D] [-h]\n" << " f: path to the input file if processing single-end sequences.\n" << " p: path to a directory of files.\n" << " P: files contained within directory specified by '-p' are paired.\n" << " 1: first input file in a set of paired-end sequences.\n" << " 2: second input file in a set of paired-end sequences.\n" << " i: input file type, either 'bustard' for the Illumina BUSTARD output files, 'fastq', 'fasta', 'gzfasta', or 'gzfastq' (default 'fastq').\n" << " o: path to output the processed files.\n" << " y: output type, either 'fastq' or 'fasta' (default fastq).\n" << " D: capture discarded reads to a file.\n" << " h: display this help messsage.\n" << " --oligo_len_1 len: length of the single-end oligo sequence in data set.\n" << " --oligo_len_2 len: length of the paired-end oligo sequence in data set.\n" << " --retain_oligo: do not trim off the random oligo sequence (if oligo is inline).\n\n" << " Oligo sequence options:\n" << " --inline_null: random oligo is inline with sequence, occurs only on single-end read (default).\n" << " --null_index: random oligo is provded in FASTQ header (Illumina i7 read if both i5 and i7 read are provided).\n" << " --index_null: random oligo is provded in FASTQ header (Illumina i5 or i7 read).\n" << " --inline_inline: random oligo is inline with sequence, occurs on single and paired-end read.\n" << " --index_index: random oligo is provded in FASTQ header (Illumina i5 and i7 read).\n" << " --inline_index: random oligo is inline with sequence on single-end read and second oligo occurs in FASTQ header.\n" << " --index_inline: random oligo occurs in FASTQ header (Illumina i5 or i7 read) and is inline with sequence on single-end read (if single read data) or paired-end read (if paired data).\n\n"; exit(0); } stacks-1.35/src/clone_filter.h000644 000765 000024 00000006202 12571641525 017117 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __CLONE_FILTER_H__ #define __CLONE_FILTER_H__ #include "constants.h" #include #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include #include using std::stringstream; using std::istream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::pair; #include using std::unordered_map; #ifdef HAVE_SPARSEHASH #include using google::sparse_hash_map; #endif #include "kmers.h" #include "BustardI.h" // Reading input files in Tab-separated Bustard format #include "FastqI.h" // Reading input files in FASTQ format #include "FastaI.h" // Reading input files in FASTA format #include "gzFasta.h" // Reading gzipped input files in FASTA format #include "gzFastq.h" // Reading gzipped input files in FASTQ format #include "BamUnalignedI.h" #include "clean.h" #include "file_io.h" #include "write.h" class Pair { public: string p1_id; string p2_id; string p1_qual; string p2_qual; Pair(string p1_id, string p2_id, string p1_qual, string p2_qual) { this->p1_id = p1_id; this->p2_id = p2_id; this->p1_qual = p1_qual; this->p2_qual = p2_qual; } Pair(string p1_id, string p2_id) { this->p1_id = p1_id; this->p2_id = p2_id; } }; #ifdef HAVE_SPARSEHASH typedef sparse_hash_map >, hash_charptr, eqstr> CloneHash; typedef sparse_hash_map > OligoHash; #else typedef unordered_map >, hash_charptr, eqstr> CloneHash; typedef unordered_map > OligoHash; #endif int process_paired_reads(string, string, map &, OligoHash &); int process_reads(string, map &, OligoHash &); int process_paired_reads_by_sequence(string, string, map &, CloneHash &, vector &); int write_clonereduced_sequence(string, string, CloneHash &, map &, map &); int free_hash(vector &); void help( void ); void version( void ); int parse_command_line(int, char**); #endif // __CLONE_FILTER_H__ stacks-1.35/src/cmb.cc000644 000765 000024 00000026701 12335173442 015353 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // cmb.cc -- routines to implement the Combination generating class: CombSet. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id: cmb.cc 1990 2010-11-03 04:49:22Z catchen $ // #include "cmb.h" // // A cache to store combinations generated as (N choose k) // for all set sizes encountered // map > _cmbs; CombSet::~CombSet() { int num_sets, set, i; Cmb **c; num_sets = this->compound_comb.size(); for (set = 0; set < num_sets; set++) { c = this->compound_comb[set]; this->destroy(c); } // // Delete edge map // for (i = 0; i < (int) this->node_list.size(); i++) delete [] this->edges[i]; delete [] this->edges; } CombSet::CombSet(int n, int k, MinSpanTree *tree) { this->max_set_size = n >= k ? k : n; this->num_elements = n - 1; this->index = 0; this->mst = tree; int set_size = this->max_set_size - 1; int size; int **comb; cerr << " Generating combinations for a set of " << n << " elements, with a maximum subset size of " << k << "\n"; // // Add the initial combination: the empty set // if (_cmbs.count(this->num_elements) == 0 && _cmbs[this->num_elements].count(0) == 0) { //cerr << " N: " << this->num_elements << "; K: 0; Total elements: 0\n"; comb = new int * [2]; comb[0] = new int[1]; comb[0][0] = -1; comb[1] = NULL; _cmbs[this->num_elements][0] = comb; } while (set_size > 0) { // // Check if this set of combinations is already cached. // if (_cmbs.count(this->num_elements) > 0 && _cmbs[this->num_elements].count(set_size) > 0) { set_size--; continue; } // // How many combinations will we make? // size = (int) this->num_combinations(this->num_elements, set_size); cerr << " N: " << this->num_elements << "; K: " << set_size << "; Total elements: " << size << "\n"; // // Generate all combinations, N choose K; N=num_elements, K=set_size // comb = this->generate_combinations(this->num_elements, set_size, size); // // Cache this set of combinations // _cmbs[this->num_elements][set_size] = comb; set_size--; } this->catalog_tree(); // // Finally, generate all combinations of nodes in the tree. // int max = this->max_set_size < this->num_elements ? this->max_set_size : this->num_elements + 1; for (set_size = 0; set_size < max; set_size++) this->partition_tree(set_size); cerr << " Total compound combinations for sets of size " << n << ": " << this->compound_comb.size() << "\n"; } int CombSet::catalog_tree() { set visited; queue q; uint i, n_1, n_2; // // Create a two-dimensional array to represent edges between nodes in the tree // uint cnt = this->mst->node_count(); //cerr << "Creating a two-dimensional array of size: " << cnt << " x " << cnt << "\n"; this->edges = new int * [cnt]; for (i = 0; i < cnt; i++) this->edges[i] = new int[cnt]; Node *n = this->mst->head(); q.push(n); cnt = 0; while (!q.empty()) { n = q.front(); q.pop(); visited.insert(n->id); this->node_list.push_back(n); this->node_map[n->id] = cnt; cnt++; for (i = 0; i < n->min_adj_list.size(); i++) if (visited.count(n->min_adj_list[i]->id) == false) q.push(n->min_adj_list[i]); } n = this->mst->head(); q.push(n); visited.clear(); while (!q.empty()) { n = q.front(); q.pop(); visited.insert(n->id); for (i = 0; i < n->min_adj_list.size(); i++) { if (visited.count(n->min_adj_list[i]->id) == false) { n_1 = this->node_map[n->id]; n_2 = this->node_map[n->min_adj_list[i]->id]; // Create a list of min spanning edges this->edge_list.push_back(make_pair(n->id, n->min_adj_list[i]->id)); // Mark the nodes as connected in our edge array this->edges[n_1][n_2] = 1; this->edges[n_2][n_1] = 1; // Queue this node to be visited next q.push(n->min_adj_list[i]); } } } return 0; } int CombSet::partition_tree(uint set_size) { uint i, j; set visited; queue q; Node *n; int n_1, n_2, node_cnt, cmb_cnt; Cmb **new_comb, *cmb; list nlist_work; int **comb = _cmbs[this->num_elements][set_size]; int *subgraph = new int[this->node_list.size()]; // // We want to methodically remove every set of branches of set_size size. The // subgraphs represent the combinations we want to generate. // for (i = 0; comb[i] != NULL; ++i) { // // This compound combination will consist of set_size+1 subgraphs // new_comb = new Cmb * [set_size + 2]; new_comb[set_size + 1] = NULL; // // Initialize working node list. // nlist_work = this->node_list; // // Remove edges // for (j = 0; j < set_size; j++) { n_1 = this->edge_list[comb[i][j]].first; n_2 = this->edge_list[comb[i][j]].second; this->edges[this->node_map[n_1]][this->node_map[n_2]] = 0; this->edges[this->node_map[n_2]][this->node_map[n_1]] = 0; } // // Traverse the subgraphs of the tree and record combinations. // visited.clear(); cmb_cnt = 0; while (nlist_work.size() > 0) { node_cnt = 0; n = nlist_work.front(); q.push(n); nlist_work.pop_front(); //subgraph[node_cnt] = n->id; while (!q.empty()) { n = q.front(); q.pop(); visited.insert(n->id); subgraph[node_cnt] = n->id; node_cnt++; nlist_work.remove(n); for (j = 0; j < n->min_adj_list.size(); j++) { n_1 = this->node_map[n->id]; n_2 = this->node_map[n->min_adj_list[j]->id]; if (visited.count(n->min_adj_list[j]->id) == false && edges[n_1][n_2] == 1) { q.push(n->min_adj_list[j]); } } } // // Package up this combination. // cmb = new Cmb; cmb->size = node_cnt; cmb->elem = new int[cmb->size]; for (j = 0; j < cmb->size; j++) cmb->elem[j] = subgraph[j]; new_comb[cmb_cnt] = cmb; cmb_cnt++; } this->compound_comb.push_back(new_comb); // // Reset the edges. // for (j = 0; j < set_size; j++) { n_1 = this->edge_list[comb[i][j]].first; n_2 = this->edge_list[comb[i][j]].second; this->edges[this->node_map[n_1]][this->node_map[n_2]] = 1; this->edges[this->node_map[n_2]][this->node_map[n_1]] = 1; } } delete [] subgraph; return 0; } int **CombSet::generate_combinations(int n, int k, int total) { int **comb; // // Generate an int pointer for each combination, terminate the list with // a NULL pointer. // comb = new int * [total + 1]; for (int i = 0; i < total; i++) comb[i] = new int[k]; comb[total] = NULL; // // Setup the initial combination // int comb_num = 0; for (int i = 0; i < k; i++) comb[comb_num][i] = i; comb_num++; // // Generate each successive combination // while (comb_num < total) { for (int i = 0; i < k; i++) comb[comb_num][i] = comb[comb_num - 1][i]; this->next_combination(comb[comb_num], n, k); comb_num++; } return comb; } int CombSet::next_combination(int *comb, int n, int k) { int i; // // The zero'th position has been incremented to its maximal value, // it's not possible to further increment values in the set. // if (comb[0] > n - k) return 0; // // Increment the last position in the set. // i = k - 1; comb[i]++; // // Check if the last position has reached its maximal possible value, // if so, move back one position, and increment it. // while ((i > 0) && (comb[i] >= n - k + 1 + i)) { i--; comb[i]++; } // // Move from the position we incremented above back out to the final position // for (i = i + 1; i < k; i++) comb[i] = comb[i - 1] + 1; return 1; } long int CombSet::num_combinations(int n, int k) { // // Compute the binomial coefficient using the method of: // Y. Manolopoulos, "Binomial coefficient computation: recursion or iteration?", // ACM SIGCSE Bulletin, 34(4):65-67, 2002. // long int r = 1; long int s = (k < n - k) ? n - k + 1 : k + 1; for (long int i = n; i >= s; i--) r = r * i / (n - i + 1); return r; } // // Return a variable length array of Cmb objects, terminated by a NULL pointer. // Cmb **CombSet::next(int map[]) { if (this->index >= (int) this->compound_comb.size()) return NULL; // int index, i, j, k, n; // int size = this->compound_comb[this->index]->size; // int *e = this->compound_comb[this->index]->elem; // Cmb **c = new Cmb * [size + 1]; // for (i = 0; i < size; i++) { // index = e[i]; // // sets vector index number // k = this->compound_set[index].first; // // combination number // n = this->compound_set[index].second; // c[i] = new Cmb; // c[i]->size = this->size[k]; // c[i]->elem = new int[this->size[k]]; // for (j = 0; j < this->size[k]; j++) // c[i]->elem[j] = (map == NULL) ? // this->sets[k][n][j] : // map[this->sets[k][n][j]]; // } // c[size] = NULL; Cmb **c = this->compound_comb[this->index]; this->index++; return c; } void CombSet::reset() { this->index = 0; } void CombSet::destroy(Cmb **cmb) { for (uint j = 0; cmb[j] != NULL; j++) { delete [] cmb[j]->elem; delete cmb[j]; } delete [] cmb; } void write_cmb(int *comb, int size) { stringstream s; string t; s << "{"; for (int i = 0; i < size; i++) s << comb[i] << ", "; t = s.str().substr(0, s.str().length() - 2); t += "}"; cerr << t << "\n"; } stacks-1.35/src/cmb.h000644 000765 000024 00000005323 12335173442 015212 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __CMB_H__ #define __CMB_H__ #include #include #include using std::stringstream; using std::cin; using std::cout; using std::cerr; #include using std::list; #include using std::vector; #include using std::string; #include using std::set; #include using std::pair; using std::make_pair; #include "constants.h" #include "mst.h" #include "utils.h" typedef unsigned int uint; void write_cmb(int *, int); typedef struct cmb { uint size; int *elem; } Cmb; class CombSet { // // Given these two variables, we will select N choose K combinations. // This combination will be stored in sets, and we will then decrement K by 1 // and continue to generate sets. // // Once we have generated all the combinations of a particular size, K, we // will partition the minimum spanning tree by dropping combinations of edges // from the graph. The selection of edges to drop is provided by the combinations // generated first. Finally, each set of disconnected subgraphs makes for one // possible combination. // int num_elements; // N elements from which we wish to produce combinations int max_set_size; // maximum set size, K, the largest subset we wish to select. map node_map; // Convert non-contiguous IDs from the MST into array indexes for this->edges list node_list; vector > edge_list; int **edges; int index; vector compound_comb; MinSpanTree *mst; int catalog_tree(); int partition_tree(uint); int **generate_combinations(int, int, int); int next_combination(int *, int, int); long int num_combinations(int, int); void destroy(Cmb **); public: CombSet(int, int, MinSpanTree *); ~CombSet(); Cmb **next(int map[] = NULL); void reset(); }; #endif // __CMB_H__ stacks-1.35/src/constants.h000644 000765 000024 00000002717 12533677757 016513 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __CONSTANTS_H__ #define __CONSTANTS_H__ // // Pull in the configuration variables from the configure script // #if HAVE_CONFIG_H #include "config.h" #endif // // // const unsigned int fieldw = 4; // // Maximum line length for parsing input files. // const int max_len = 1024; // // Maximum length of idetifiers, such as sequence IDs and chromosome names. // const int id_len = 255; // // Size to use for internal buffer size for gzipped files being read with libz. // const int libz_buffer_size = 1048576; // // Supported file types // enum class FileT {unknown, sql, gzsql, fasta, gzfasta, fastq, gzfastq, bowtie, sam, bam, tsv, bustard, phase, fastphase, beagle}; #endif stacks-1.35/src/cstacks.cc000644 000765 000024 00000123735 12536312241 016245 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // cstacks -- Create a catalog of Stacks. // #include "cstacks.h" // Global variables to hold command-line options. queue > samples; string out_path; string catalog_path; FileT in_file_type = FileT::sql; int batch_id = 0; int ctag_dist = 1; bool set_kmer_len = true; int kmer_len = 0; searcht search_type = sequence; int num_threads = 1; bool mult_matches = false; bool report_mmatches = false; bool require_uniq_haplotypes = false; int main (int argc, char* argv[]) { parse_command_line(argc, argv); uint sample_cnt = samples.size(); cerr << "Number of mismatches allowed between stacks: " << ctag_dist << "\n" << "Loci matched based on " << (search_type == sequence ? "sequence identity" : "genomic location") << ".\n" << "Constructing catalog from " << sample_cnt << " samples.\n"; // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif map catalog; map::iterator cat_it; map::iterator query_it; pair s; bool compressed = false; int i; if (catalog_path.length() > 0) { cerr << "Initializing existing catalog...\n"; if (!initialize_existing_catalog(catalog_path, catalog)) { cerr << "Failed to initialize the catalog.\n"; return 1; } i = 1; } else { s = samples.front(); samples.pop(); cerr << "Initializing new catalog...\n"; if (!initialize_new_catalog(s, catalog)) { cerr << "Failed to initialize the catalog.\n"; return 1; } i = 2; } // // Build an index of the catalog // map cat_index; if (search_type == genomic_loc) { cerr << "Building an index of the catalog.\n"; update_catalog_index(catalog, cat_index); } while (!samples.empty()) { map sample; cerr << "Processing sample " << s.second << " [" << i << " of " << sample_cnt << "]\n"; s = samples.front(); samples.pop(); if (!load_loci(s.second, sample, false, false, compressed)) { cerr << "Failed to load sample " << i << "\n"; continue; } // // Assign the ID for this sample data. // s.first = sample.begin()->second->sample_id; //dump_loci(sample); if (search_type == sequence) { cerr << "Searching for sequence matches...\n"; find_kmer_matches_by_sequence(catalog, sample, ctag_dist); } else if (search_type == genomic_loc) { cerr << "Searching for matches by genomic location...\n"; find_matches_by_genomic_loc(cat_index, sample); } cerr << "Merging matches into catalog...\n"; uint mmatches = 0; merge_matches(catalog, sample, s, ctag_dist, mmatches); cerr << " " << mmatches << " loci matched more than one catalog locus and were excluded.\n"; // // Regenerate the alleles for the catalog tags after merging the new sample into the catalog. // for (cat_it = catalog.begin(); cat_it != catalog.end(); cat_it++) cat_it->second->populate_alleles(); if (search_type == genomic_loc) { cerr << " Updating catalog index...\n"; update_catalog_index(catalog, cat_index); } i++; for (query_it = sample.begin(); query_it != sample.end(); query_it++) delete (*query_it).second; sample.clear(); } cerr << "Writing catalog to '" << out_path << "..."; write_catalog(catalog); cerr << " done.\n"; return 0; } int update_catalog_index(map &catalog, map &cat_index) { map::iterator j; char id[id_len]; for (j = catalog.begin(); j != catalog.end(); j++) { snprintf(id, id_len - 1, "%s|%d|%c", j->second->loc.chr, j->second->loc.bp, j->second->loc.strand == plus ? '+' : '-'); if (cat_index.count(id) == 0) { cat_index[id] = j->first; } else { if (cat_index[id] != j->first) cerr << "Error: Catalog index mismatch, key: '" << id << "'.\n"; } } return 0; } int characterize_mismatch_snps(CLocus *catalog_tag, QLocus *query_tag) { set snp_cols; uint i; for (i = 0; i < catalog_tag->snps.size(); i++) snp_cols.insert(catalog_tag->snps[i]->col); for (i = 0; i < query_tag->snps.size(); i++) snp_cols.insert(query_tag->snps[i]->col); // // For each mismatch found, create a SNP object // const char *c = catalog_tag->con; const char *c_beg = c; const char *c_end = c + strlen(c); const char *q = query_tag->con; const char *q_beg = q; const char *q_end = q + strlen(q); i = 0; while (c < c_end && q < q_end) { if (snp_cols.count(i) == 0 && (*c != *q) && (*c != 'N' && *q != 'N')) { SNP *s = new SNP; s->type = snp_type_het; s->col = c - c_beg; s->lratio = 0; s->rank_1 = *c; s->rank_2 = *q; merge_allele(catalog_tag, s); merge_allele(query_tag, s); catalog_tag->snps.push_back(s); s = new SNP; s->type = snp_type_het; s->col = q - q_beg; s->lratio = 0; s->rank_1 = *q; s->rank_2 = *c; query_tag->snps.push_back(s); } c++; q++; i++; } return 1; } int merge_matches(map &catalog, map &sample, pair &sample_file, int ctag_dist, uint &mmatches) { map::iterator i; vector::iterator mat_it; CLocus *ctag; QLocus *qtag; for (i = sample.begin(); i != sample.end(); i++) { qtag = i->second; // // If this stack didn't match an existing catalog stack, add this stack to the // catalog as a new stack. // if (qtag->matches.size() == 0) { add_unique_tag(sample_file, catalog, qtag); continue; } // // Check for multiple matches. We will reduce the list of Match objects, which // contain matches to multiple alleles for a single locus, to the smallest distance // for a locus. // map local_matches; map::iterator j; for (mat_it = qtag->matches.begin(); mat_it != qtag->matches.end(); mat_it++) { if (local_matches.count((*mat_it)->cat_id) == 0) local_matches[(*mat_it)->cat_id] = (*mat_it)->dist; else if ((*mat_it)->dist < local_matches[(*mat_it)->cat_id]) local_matches[(*mat_it)->cat_id] = (*mat_it)->dist; } uint min_dist = 1000; uint num_matches = 0; int min_cat_id = -1; // // Find the minimum distance and then check how many matches have that distance. // for (j = local_matches.begin(); j != local_matches.end(); j++) min_dist = j->second < min_dist ? j->second : min_dist; for (j = local_matches.begin(); j != local_matches.end(); j++) if (j->second == min_dist) { num_matches++; min_cat_id = j->first; } // // Emit a warning if the query tag matches more than one tag in the catalog. // if (num_matches > 1) { mmatches++; if (report_mmatches) { cerr << " Warning: sample " << sample_file.second << ", tag " << qtag->id << ", matches more than one tag in the catalog and was excluded: "; for (j = local_matches.begin(); j != local_matches.end(); j++) cerr << j->first << " "; cerr << "\n"; } // // Don't record matches to multiple catalog entries unless instructed // to do so by the command line option. // if (!mult_matches) continue; } ctag = catalog[min_cat_id]; if (ctag == NULL) cerr << " Unable to locate catalog tag " << min_cat_id << "\n"; // // If mismatches are allowed between query and catalog tags, identify the // mismatches and convert them into SNP objects to be merged into the catalog tag. // if ((ctag_dist > 0 || search_type == genomic_loc) && !characterize_mismatch_snps(ctag, qtag)) cerr << " Error characterizing mismatch SNPs " << sample_file.second << ", tag " << qtag->id << " with catalog tag " << ctag->id << "\n"; // // Merge the SNPs and alleles from the sample into the catalog tag. // if (!ctag->merge_snps(qtag)) { cerr << "Error merging " << sample_file.second << ", tag " << qtag->id << " with catalog tag " << ctag->id << "\n"; } // // If the catalog consensus tag is shorter than the query tag, replace it. // if (strlen(ctag->con) < strlen(qtag->con)) { ctag->add_consensus(qtag->con); } ctag->sources.push_back(make_pair(sample_file.first, qtag->id)); } return 0; } int add_unique_tag(pair &sample_file, map &catalog, QLocus *qloc) { vector::iterator i; map::iterator j; int cid = catalog.size(); CLocus *c = new CLocus; c->id = cid + 1; c->add_consensus(qloc->con); // // Record the source of this catalog tag. // c->sources.push_back(make_pair(sample_file.first, qloc->id)); // // Add the physical genome location of this locus. // c->loc.set(qloc->loc.chr, qloc->loc.bp, qloc->loc.strand); catalog[c->id] = c; // cerr << "Adding sample: " << qloc->id << " to the catalog as ID: " << c->id << "\n"; for (i = qloc->snps.begin(); i != qloc->snps.end(); i++) { SNP *snp = new SNP; snp->col = (*i)->col; snp->type = (*i)->type; snp->lratio = (*i)->lratio; snp->rank_1 = (*i)->rank_1; snp->rank_2 = (*i)->rank_2; c->snps.push_back(snp); } for (j = qloc->alleles.begin(); j != qloc->alleles.end(); j++) { c->alleles[j->first] = j->second; } c->populate_alleles(); return 0; } int find_kmer_matches_by_sequence(map &catalog, map &sample, int ctag_dist) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // CatKmerHashMap kmer_map; vector kmer_map_keys; map::iterator it; vector >::iterator allele; QLocus *tag_1; CLocus *tag_2; int i, j; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (it = sample.begin(); it != sample.end(); it++) keys.push_back(it->first); // // Calculate the number of k-mers we will generate. If kmer_len == 0, // determine the optimal length for k-mers. // int con_len = strlen(sample[keys[0]]->con); if (set_kmer_len) kmer_len = determine_kmer_length(con_len, ctag_dist); int num_kmers = con_len - kmer_len + 1; cerr << " Distance allowed between stacks: " << ctag_dist << "\n" << " Using a k-mer length of " << kmer_len << "\n" << " Number of kmers per sequence: " << num_kmers << "\n"; // // Calculate the minimum number of matching k-mers required for a possible sequence match. // int min_hits = calc_min_kmer_matches(kmer_len, ctag_dist, con_len, set_kmer_len ? true : false); populate_kmer_hash(catalog, kmer_map, kmer_map_keys, kmer_len); cerr << " " << catalog.size() << " loci in the catalog, " << kmer_map.size() << " kmers in the catalog hash.\n"; #pragma omp parallel private(i, j, tag_1, tag_2, allele) { #pragma omp for for (i = 0; i < (int) keys.size(); i++) { tag_1 = sample[keys[i]]; for (allele = tag_1->strings.begin(); allele != tag_1->strings.end(); allele++) { vector kmers; generate_kmers(allele->second.c_str(), kmer_len, num_kmers, kmers); map > hits; vector >::iterator map_it; int d; // // Lookup the occurances of each k-mer in the kmer_map // for (j = 0; j < num_kmers; j++) { if (kmer_map.count(kmers[j]) > 0) for (map_it = kmer_map[kmers[j]].begin(); map_it != kmer_map[kmers[j]].end(); map_it++) hits[map_it->second].push_back(map_it->first); } // // Free the allocated k-mers. // for (j = 0; j < num_kmers; j++) delete [] kmers[j]; kmers.clear(); //cerr << " Tag " << tag_1->id << " hit " << hits.size() << " kmers.\n"; map >::iterator hit_it; vector::iterator all_it; // // Iterate through the list of hits. For each hit, total up the hits to the various alleles. // Any allele that has more than min_hits check its full length to verify a match. // for (hit_it = hits.begin(); hit_it != hits.end(); hit_it++) { //cerr << " Tag " << hit_it->first << " has " << hit_it->second << " hits (min hits: " << min_hits << ")\n"; map allele_cnts; map::iterator cnt_it; for (all_it = hit_it->second.begin(); all_it != hit_it->second.end(); all_it++) allele_cnts[*all_it]++; for (cnt_it = allele_cnts.begin(); cnt_it != allele_cnts.end(); cnt_it++) { //cerr << " allele " << cnt_it->first << " has " << cnt_it->second << " hits\n"; if (cnt_it->second < min_hits) continue; //cerr << " Match found, checking full-length match\n"; tag_2 = catalog[hit_it->first]; d = dist(allele->second.c_str(), tag_2, cnt_it->first); if (d < 0) cerr << "Unknown error calculating distance between " << tag_1->id << " and " << tag_2->id << "; query allele: " << allele->first << "\n"; //cerr << " Distance: " << d << " CTAG_DIST: " << ctag_dist << "\n"; // // Add a match to the query sequence: catalog ID, catalog allele, query allele, distance // if (d <= ctag_dist) tag_1->add_match(tag_2->id, cnt_it->first, allele->first, d); } } } // Sort the vector of distances. sort(tag_1->matches.begin(), tag_1->matches.end(), compare_matches); } } free_kmer_hash(kmer_map, kmer_map_keys); return 0; } bool compare_matches(Match *a, Match *b) { return (a->dist < b->dist); } int find_matches_by_sequence(map &catalog, map &sample) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // map::iterator i; map::iterator j; int k; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (i = sample.begin(); i != sample.end(); i++) keys.push_back(i->first); #pragma omp parallel private(i, j, k) { #pragma omp for schedule(dynamic) for (k = 0; k < (int) keys.size(); k++) { i = sample.find(keys[k]); vector >::iterator r, s; // // Iterate through the possible SAMPLE alleles // for (r = i->second->strings.begin(); r != i->second->strings.end(); r++) { for (j = catalog.begin(); j != catalog.end(); j++) { // // Iterate through the possible CATALOG alleles // for (s = j->second->strings.begin(); s != j->second->strings.end(); s++) { if (r->second == s->second) { //cerr << "Found a match between " << i->first << " (" << r->first << ") and " << j->first << " (" << s->first << ")\n"; i->second->add_match(j->second->id, s->first, r->first, 0); } } } } } } return 0; } int find_matches_by_genomic_loc(map &cat_index, map &sample) { map::iterator i; map::iterator j; // // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. // vector keys; for (i = sample.begin(); i != sample.end(); i++) keys.push_back(i->first); #pragma omp parallel private(i, j) { char id[id_len]; #pragma omp for for (int k = 0; k < (int) keys.size(); k++) { i = sample.find(keys[k]); snprintf(id, id_len - 1, "%s|%d|%c", i->second->loc.chr, i->second->loc.bp, i->second->loc.strand == plus ? '+' : '-'); if (cat_index.count(id) > 0) i->second->add_match(cat_index[id], "", "", 0); } } return 0; } int write_catalog(map &catalog) { map::iterator i; CLocus *tag; set matches; bool gzip = (in_file_type == FileT::gzsql) ? true : false; // // Parse the input file names to create the output file // stringstream prefix; prefix << out_path << "batch_" << batch_id; string tag_file = prefix.str() + ".catalog.tags.tsv"; string snp_file = prefix.str() + ".catalog.snps.tsv"; string all_file = prefix.str() + ".catalog.alleles.tsv"; if (gzip) { tag_file += ".gz"; snp_file += ".gz"; all_file += ".gz"; } // // Open the output files for writing. // gzFile gz_tags, gz_snps, gz_alle; ofstream tags, snps, alle; if (gzip) { gz_tags = gzopen(tag_file.c_str(), "wb"); if (!gz_tags) { cerr << "Error: Unable to open gzipped catalog tag file '" << tag_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_tags, libz_buffer_size); #endif gz_snps = gzopen(snp_file.c_str(), "wb"); if (!gz_snps) { cerr << "Error: Unable to open gzipped catalog snps file '" << snp_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_snps, libz_buffer_size); #endif gz_alle = gzopen(all_file.c_str(), "wb"); if (!gz_alle) { cerr << "Error: Unable to open gzipped catalog alleles file '" << all_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_alle, libz_buffer_size); #endif } else { tags.open(tag_file.c_str()); if (tags.fail()) { cerr << "Error: Unable to open catalog tag file for writing.\n"; exit(1); } snps.open(snp_file.c_str()); if (snps.fail()) { cerr << "Error: Unable to open catalog SNPs file for writing.\n"; exit(1); } alle.open(all_file.c_str()); if (alle.fail()) { cerr << "Error: Unable to open catalog alleles file for writing.\n"; exit(1); } } // // Record the version of Stacks used and the date generated as a comment in the catalog. // // Obtain the current date. // stringstream log; time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%F %T", timeinfo); log << "# cstacks version " << VERSION << "; catalog generated on " << date << "\n"; if (gzip) { gzputs(gz_tags, log.str().c_str()); gzputs(gz_snps, log.str().c_str()); gzputs(gz_alle, log.str().c_str()); } else { tags << log.str(); snps << log.str(); alle << log.str(); } for (i = catalog.begin(); i != catalog.end(); i++) { tag = i->second; if (gzip) write_gzip_output(tag, gz_tags, gz_snps, gz_alle); else write_simple_output(tag, tags, snps, alle); } if (gzip) { gzclose(gz_tags); gzclose(gz_snps); gzclose(gz_alle); } else { tags.close(); snps.close(); alle.close(); } return 0; } int merge_allele(Locus *locus, SNP *snp) { map > columns; map >::iterator c; vector::iterator i; SNP *lsnp; for (i = locus->snps.begin(); i != locus->snps.end(); i++) columns[(*i)->col] = make_pair("sample", *i); if (columns.count(snp->col)) { lsnp = columns[snp->col].second; // // If this is a new allele for this nucleotide, add it to the catalog SNP. // bool rank_1_exists = false; bool rank_2_exists = false; if (snp->rank_1 == lsnp->rank_1 || snp->rank_1 == lsnp->rank_2 || snp->rank_1 == lsnp->rank_3 || snp->rank_1 == lsnp->rank_4) { rank_1_exists = true; } if (snp->rank_2 == lsnp->rank_1 || snp->rank_2 == lsnp->rank_2 || snp->rank_2 == lsnp->rank_3 || snp->rank_2 == lsnp->rank_4) { rank_2_exists = true; } if (rank_1_exists == false) { if (lsnp->rank_3 == 0) lsnp->rank_3 = snp->rank_1; else lsnp->rank_4 = snp->rank_1; } if (rank_2_exists == false) { if (lsnp->rank_3 == 0) lsnp->rank_3 = snp->rank_2; else lsnp->rank_4 = snp->rank_2; } columns[snp->col] = make_pair("both", lsnp); } else { columns[snp->col] = make_pair("merge", snp); } vector > merged_snps; for (c = columns.begin(); c != columns.end(); c++) merged_snps.push_back((*c).second); // // Sort the SNPs by column // sort(merged_snps.begin(), merged_snps.end(), compare_pair_snp); // // Modify any existing alleles to account for this new SNP. If there are not any alleles, // create new ones. // stringstream sallele; set merged_alleles; string allele, new_allele; int pos; if (locus->alleles.size() == 0) { sallele << locus->con[snp->col]; merged_alleles.insert(sallele.str()); } map::iterator j; vector >::iterator k; for (j = locus->alleles.begin(); j != locus->alleles.end(); j++) { allele = j->first; new_allele = ""; pos = 0; //cerr << "Allele length: " << allele.size() << "\n"; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { // // If we inserted a SNP from the sample, add the proper nucleotide from the consensus // sequence to account for it in the allele string. // if ((*k).first == "merge") { new_allele += locus->con[(*k).second->col]; //cerr << " Adding char from consensus position " << (*k).second->col << "\n"; } else { new_allele += allele[pos]; //cerr << " Adding char from allele position " << pos << "\n"; pos++; } } merged_alleles.insert(new_allele); } set::iterator s; locus->alleles.clear(); for (s = merged_alleles.begin(); s != merged_alleles.end(); s++) { locus->alleles[*s] = 0; } return 1; } int CLocus::merge_snps(QLocus *matched_tag) { vector::iterator i; map::iterator j; vector >::iterator k; map > columns; map >::iterator c; vector > merged_snps; set merged_alleles; set::iterator s; SNP *csnp; for (i = this->snps.begin(); i != this->snps.end(); i++) columns[(*i)->col] = make_pair("catalog", *i); for (i = matched_tag->snps.begin(); i != matched_tag->snps.end(); i++) { // // Is this column already represented from the previous sample? // if (columns.count((*i)->col)) { csnp = columns[(*i)->col].second; // // If this is a new allele for this nucleotide, add it to the catalog SNP. // bool rank_1_exists = false; bool rank_2_exists = false; if ((*i)->rank_1 == csnp->rank_1 || (*i)->rank_1 == csnp->rank_2 || (*i)->rank_1 == csnp->rank_3 || (*i)->rank_1 == csnp->rank_4) { rank_1_exists = true; } if ((*i)->rank_2 == csnp->rank_1 || (*i)->rank_2 == csnp->rank_2 || (*i)->rank_2 == csnp->rank_3 || (*i)->rank_2 == csnp->rank_4) { rank_2_exists = true; } if (rank_1_exists == false) { if (csnp->rank_3 == 0) csnp->rank_3 = (*i)->rank_1; else csnp->rank_4 = (*i)->rank_1; } if (rank_2_exists == false) { if (csnp->rank_3 == 0) csnp->rank_3 = (*i)->rank_2; else csnp->rank_4 = (*i)->rank_2; } columns[(*i)->col] = make_pair("both", csnp); } else { columns[(*i)->col] = make_pair("sample", *i); } } for (c = columns.begin(); c != columns.end(); c++) merged_snps.push_back((*c).second); // // Sort the SNPs by column // sort(merged_snps.begin(), merged_snps.end(), compare_pair_snp); // // Merge the alleles accounting for any SNPs added from either of the two samples. // string allele, new_allele; int pos; for (j = this->alleles.begin(); j != this->alleles.end(); j++) { allele = j->first; new_allele = ""; pos = 0; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { // // If we inserted a SNP from the sample, add the proper nucleotide from the consensus // sequence to account for it in the allele string. // if (k->first == "sample") { new_allele += k->second->col > this->len - 1 ? 'N' : this->con[k->second->col]; } else { new_allele += allele[pos]; pos++; } } merged_alleles.insert(new_allele); } for (j = matched_tag->alleles.begin(); j != matched_tag->alleles.end(); j++) { allele = j->first; new_allele = ""; pos = 0; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { if (k->first == "catalog") { new_allele += k->second->col > matched_tag->len - 1 ? 'N' : matched_tag->con[k->second->col]; } else { new_allele += allele[pos]; pos++; } } merged_alleles.insert(new_allele); } // // If the matching tag being merged into the catalog had no called SNPs // create alleles from the consensus sequence and check that catalog SNP // objects contain all the nucleoties. // if (matched_tag->alleles.size() == 0) { char c; new_allele = ""; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { csnp = k->second; c = matched_tag->con[k->second->col]; new_allele += (csnp->col > matched_tag->len - 1) ? 'N' : c; if (csnp->col > matched_tag->len - 1) continue; if (c != csnp->rank_1 && c != csnp->rank_2 && c != csnp->rank_3 && c != csnp->rank_4) { if (csnp->rank_3 == 0) csnp->rank_3 = c; else csnp->rank_4 = c; } } if (new_allele.length() > 0) merged_alleles.insert(new_allele); } // // If the newly merged alleles contain Ns due to different sequence lengths, // check if we can reduce the alleles as one of the longer allele haplotypes // may fully encompass a shorter allele haplotype that has been padded with Ns. // if (require_uniq_haplotypes) this->reduce_alleles(merged_alleles); // // Update the catalog entry's list of SNPs and alleles // this->snps.clear(); for (k = merged_snps.begin(); k != merged_snps.end(); k++) { SNP *snp = new SNP; snp->col = (*k).second->col; snp->type = (*k).second->type; snp->lratio = 0.0; snp->rank_1 = (*k).second->rank_1; snp->rank_2 = (*k).second->rank_2; snp->rank_3 = (*k).second->rank_3; snp->rank_4 = (*k).second->rank_4; this->snps.push_back(snp); if (k->first == "catalog" || k->first == "both") delete k->second; } this->alleles.clear(); for (s = merged_alleles.begin(); s != merged_alleles.end(); s++) { this->alleles[*s] = 0; } return 1; } int CLocus::reduce_alleles(set &alleles) { set::iterator it; uint len, max_len, match, ncnt; vector haplotypes, cur, next; max_len = 0; for (it = alleles.begin(); it != alleles.end(); it++) { max_len = it->length() > max_len ? it->length() : max_len; haplotypes.push_back(*it); } len = alleles.size(); alleles.clear(); for (uint i = 0; i < len; i++) { //cerr << "Looking at haplotype[" << i << "]: " << haplotypes[i] << "\n"; // // We will only look at strings that contain Ns. // if (haplotypes[i].find('N') == string::npos) { alleles.insert(haplotypes[i]); //cerr << " No Ns, skipping...\n"; continue; } uint k = 0; uint j = i + 1; while (k < len - 1) { cur.push_back(haplotypes[j % len]); k++; j++; } // // Examine the haplotype alleles one SNP at a time. If we are able to uniquely // determine a second haplotype that encompasses the first // to, return it. // j = 0; while (cur.size() > 1 && j < max_len) { for (k = 0; k < cur.size(); k++) { cerr << "Comparing haplotypes[" << i << "]: '" << haplotypes[i] << "' to '" << cur[k] << " at position " << j << "'\n"; if (haplotypes[i][j] == cur[k][j] || haplotypes[i][j] == 'N') { cerr << " Keeping this haplotype.\n"; next.push_back(cur[k]); } else { cerr << " Discarding this haplotype.\n"; } } cur = next; next.clear(); j++; } // // If there is only one left, make sure what we have of the haplotype does match // and its not simply an erroneously called haplotype. If so, then this haplotype // is encompassed by another, longer haplotype and we do not need to keep it. // ncnt = 0; match = 0; if (cur.size() > 1) { cerr << "Discarding " << haplotypes[i] << "\n"; continue; } else if (cur.size() == 1) { for (k = 0; k < max_len; k++) if (haplotypes[i][k] != 'N') ncnt++; for (k = 0; k < max_len; k++) if (cur[0][k] == haplotypes[i][k]) match++; if (match == ncnt) { cerr << "Discarding " << haplotypes[i] << "\n"; continue; } } cerr << "Keeping " << haplotypes[i] << "\n"; alleles.insert(haplotypes[i]); } return 0; } int populate_kmer_hash(map &catalog, CatKmerHashMap &kmer_map, vector &kmer_map_keys, int kmer_len) { map::iterator it; vector >::iterator allele; vector kmers; CLocus *tag; char *hash_key; bool exists; int j; // // Break each stack down into k-mers and create a hash map of those k-mers // recording in which sequences they occur. // int num_kmers = strlen(catalog.begin()->second->con) - kmer_len + 1; for (it = catalog.begin(); it != catalog.end(); it++) { tag = it->second; // // Iterate through the possible Catalog alleles // for (allele = tag->strings.begin(); allele != tag->strings.end(); allele++) { // // Generate and hash the kmers for this allele string // generate_kmers(allele->second.c_str(), kmer_len, num_kmers, kmers); for (j = 0; j < num_kmers; j++) { hash_key = kmers[j]; exists = kmer_map.count(hash_key) == 0 ? false : true; kmer_map[hash_key].push_back(make_pair(allele->first, tag->id)); if (exists) delete [] kmers[j]; else kmer_map_keys.push_back(hash_key); } kmers.clear(); } } //dump_kmer_map(kmer_map); return 0; } int write_simple_output(CLocus *tag, ofstream &cat_file, ofstream &snp_file, ofstream &all_file) { vector::iterator snp_it; map::iterator all_it; vector >::iterator src_it; string sources; for (src_it = tag->sources.begin(); src_it != tag->sources.end(); src_it++) { stringstream s; s << (*src_it).first << "_" << (*src_it).second << ","; sources += s.str(); } sources = sources.substr(0, sources.length() - 1); cat_file << "0" << "\t" << batch_id << "\t" << tag->id << "\t" << tag->loc.chr << "\t" << tag->loc.bp << "\t" << (tag->loc.strand == plus ? "+" : "-") << "\t" << "consensus" << "\t" << "0" << "\t" << sources << "\t" << tag->con << "\t" << 0 << "\t" << // These flags are unused in cstacks, but important in ustacks 0 << "\t" << 0 << "\t" << 0 << "\n"; // // Output the SNPs associated with the catalog tag // for (snp_it = tag->snps.begin(); snp_it != tag->snps.end(); snp_it++) { snp_file << "0" << "\t" << batch_id << "\t" << tag->id << "\t" << (*snp_it)->col << "\t"; switch((*snp_it)->type) { case snp_type_het: snp_file << "E\t"; break; case snp_type_hom: snp_file << "O\t"; break; default: snp_file << "U\t"; break; } snp_file << (*snp_it)->lratio << "\t" << (*snp_it)->rank_1 << "\t" << (*snp_it)->rank_2 << "\t" << ((*snp_it)->rank_3 == 0 ? '-' : (*snp_it)->rank_3) << "\t" << ((*snp_it)->rank_4 == 0 ? '-' : (*snp_it)->rank_4) << "\n"; } // // Output the alleles associated with the two matched tags // for (all_it = tag->alleles.begin(); all_it != tag->alleles.end(); all_it++) all_file << "0" << "\t" << batch_id << "\t" << tag->id << "\t" << all_it->first << "\t" << "0" << "\t" << // These two fields are used in the "0" << "\n"; // ustacks/pstacks output, not in cstacks. return 0; } int write_gzip_output(CLocus *tag, gzFile &cat_file, gzFile &snp_file, gzFile &all_file) { vector::iterator snp_it; map::iterator all_it; vector >::iterator src_it; string sources; stringstream sstr; for (src_it = tag->sources.begin(); src_it != tag->sources.end(); src_it++) { sstr << (*src_it).first << "_" << (*src_it).second << ","; } sources = sstr.str(); sources = sources.substr(0, sources.length() - 1); sstr.str(""); sstr << "0" << "\t" << batch_id << "\t" << tag->id << "\t" << tag->loc.chr << "\t" << tag->loc.bp << "\t" << (tag->loc.strand == plus ? "+" : "-") << "\t" << "consensus" << "\t" << "0" << "\t" << sources << "\t" << tag->con << "\t" << 0 << "\t" << // These flags are unused in cstacks, but important in ustacks 0 << "\t" << 0 << "\t" << 0 << "\n"; gzputs(cat_file, sstr.str().c_str()); sstr.str(""); // // Output the SNPs associated with the catalog tag // for (snp_it = tag->snps.begin(); snp_it != tag->snps.end(); snp_it++) { sstr << "0" << "\t" << batch_id << "\t" << tag->id << "\t" << (*snp_it)->col << "\t"; switch((*snp_it)->type) { case snp_type_het: sstr << "E\t"; break; case snp_type_hom: sstr << "O\t"; break; default: sstr << "U\t"; break; } sstr << (*snp_it)->lratio << "\t" << (*snp_it)->rank_1 << "\t" << (*snp_it)->rank_2 << "\t" << ((*snp_it)->rank_3 == 0 ? '-' : (*snp_it)->rank_3) << "\t" << ((*snp_it)->rank_4 == 0 ? '-' : (*snp_it)->rank_4) << "\n"; } gzputs(snp_file, sstr.str().c_str()); sstr.str(""); // // Output the alleles associated with the two matched tags // for (all_it = tag->alleles.begin(); all_it != tag->alleles.end(); all_it++) sstr << "0\t" << batch_id << "\t" << tag->id << "\t" << all_it->first << "\t" << 0 << "\t" << 0 << "\n"; gzputs(all_file, sstr.str().c_str()); return 0; } int initialize_new_catalog(pair &sample, map &catalog) { map tmp_catalog; bool compressed = false; // // Parse the input files. // if (!load_loci(sample.second, tmp_catalog, false, false, compressed)) return 0; in_file_type = compressed == true ? FileT::gzsql : FileT::sql; sample.first = tmp_catalog.begin()->second->sample_id; // // Iterate over the catalog entires and renumber them after recording the source of // locus. // map::iterator j; int k = 1; for (j = tmp_catalog.begin(); j != tmp_catalog.end(); j++) { j->second->sources.push_back(make_pair(sample.first, j->second->id)); j->second->id = k; catalog[k] = j->second; k++; } return 1; } int initialize_existing_catalog(string catalog_path, map &catalog) { bool compressed; // // Parse the input files. // if (!load_loci(catalog_path, catalog, false, false, compressed)) return 0; in_file_type = compressed == true ? FileT::gzsql : FileT::sql; // // Iterate over the catalog entires and convert the stack components // into source objects, to record what samples each locus came from. // map::iterator j; CLocus *loc; char *p, *q; int sample_id, locus_id; for (j = catalog.begin(); j != catalog.end(); j++) { loc = j->second; for (uint i = 0; i < loc->comp.size(); i++) { // // Parse the ID into sample ID / locus ID, given 43_1356, parse into // sample ID 43 and locus ID 1356. // for (p = loc->comp[i]; *p != '_' && *p != '\0'; p++); if (*p != '_') return 0; p++; sample_id = strtol(loc->comp[i], &q, 10); if (*q != '_') return 0; locus_id = strtol(p, &q, 10); if (*q != '\0') return 0; loc->sources.push_back(make_pair(sample_id, locus_id)); } } return 1; } int parse_command_line(int argc, char* argv[]) { int c; string sstr; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"mmatches", no_argument, NULL, 'm'}, {"genomic_loc", no_argument, NULL, 'g'}, {"uniq_haplotypes", no_argument, NULL, 'u'}, {"report_mmatches", no_argument, NULL, 'R'}, {"batch_id", required_argument, NULL, 'b'}, {"ctag_dist", required_argument, NULL, 'n'}, {"k_len", required_argument, NULL, 'k'}, {"catalog", required_argument, NULL, 'c'}, {"sample", required_argument, NULL, 's'}, {"outpath", required_argument, NULL, 'o'}, {"num_threads", required_argument, NULL, 'p'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hgvuRmo:s:c:b:p:n:k:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'b': batch_id = is_integer(optarg); if (batch_id < 0) { cerr << "Batch ID (-b) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 'n': ctag_dist = is_integer(optarg); break; case 'k': set_kmer_len = false; kmer_len = is_integer(optarg); break; case 'm': mult_matches = true; break; case 'R': report_mmatches = true; break; case 'g': search_type = genomic_loc; break; case 's': sstr = optarg; samples.push(make_pair(0, sstr)); break; case 'c': catalog_path = optarg; break; case 'o': out_path = optarg; break; case 'u': require_uniq_haplotypes = true; break; case 'v': version(); break; case 'p': num_threads = is_integer(optarg); break; case '?': // getopt_long already printed an error message. help(); break; default: help(); abort(); } } if (set_kmer_len == false && (kmer_len < 5 || kmer_len > 31)) { cerr << "Kmer length must be between 5 and 31bp.\n"; help(); } if (samples.size() == 0) { cerr << "You must specify at least one sample file.\n"; help(); } if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; return 0; } void version() { std::cerr << "cstacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "cstacks " << VERSION << "\n" << "cstacks -b batch_id -s sample_file [-s sample_file_2 ...] [-o path] [-g] [-n num] [-p num_threads] [--catalog path] [-h]" << "\n" << " b: MySQL ID of this batch." << "\n" << " s: filename prefix from which to load loci into the catalog." << "\n" << " o: output path to write results." << "\n" << " g: base catalog matching on genomic location, not sequence identity." << "\n" << " m: include tags in the catalog that match to more than one entry (default false)." << "\n" << " n: number of mismatches allowed between sample tags when generating the catalog (default 1)." << "\n" << " p: enable parallel execution with num_threads threads.\n" << " h: display this help messsage." << "\n\n" << " Catalog editing:\n" << " --catalog : provide the path to an existing catalog. cstacks will add data to this existing catalog.\n\n" << " Advanced options:\n" << " --k_len : specify k-mer size for matching between between catalog loci (automatically calculated by default).\n" << " --report_mmatches: report query loci that match more than one catalog locus.\n"; exit(0); } stacks-1.35/src/cstacks.h000644 000765 000024 00000005403 12335173442 016103 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __CSTACKS_H__ #define __CSTACKS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include #include // Support for gzipped output files. #include // Process command-line options #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include #include using std::ifstream; using std::ofstream; using std::stringstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::queue; #include #include "constants.h" #include "stacks.h" #include "kmers.h" #include "locus.h" #include "sql_utilities.h" #include "utils.h" void help( void ); void version( void ); int parse_command_line(int, char**); int initialize_new_catalog(pair &, map &); int initialize_existing_catalog(string, map &); int update_catalog_index(map &, map &); int find_kmer_matches_by_sequence(map &, map &, int); int find_matches_by_sequence(map &, map &); int find_matches_by_genomic_loc(map &, map &); int characterize_mismatch_snps(CLocus *, QLocus *); int merge_allele(Locus *locus, SNP *snp); int merge_matches(map &, map &, pair &, int, uint &); int add_unique_tag(pair &, map &, QLocus *); bool compare_dist(pair, pair); int write_catalog(map &); int write_simple_output(CLocus *, ofstream &, ofstream &, ofstream &); int write_gzip_output(CLocus *, gzFile &, gzFile &, gzFile &); bool compare_matches(Match *, Match *); int populate_kmer_hash(map &, CatKmerHashMap &, vector &, int); #endif // __CSTACKS_H__ stacks-1.35/src/DNANSeq.cc000644 000765 000024 00000007002 12335173442 015774 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // DNANSeq.cc // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id: DNANSeq.cc 2133 2011-06-07 04:07:41Z catchen $ // #include "DNANSeq.h" DNANSeq::DNANSeq(int size) { int bytes; this->bits = size * bits_per_nuc; bytes = BITNSLOTS(this->bits); this->s = new unsigned char[bytes]; memset(this->s, 0, bytes); } DNANSeq::DNANSeq(int size, unsigned char *seq) { unsigned int bytes; this->bits = size * bits_per_nuc; bytes = BITNSLOTS(this->bits); this->s = new unsigned char[bytes]; for (unsigned int i = 0; i < bytes; i++) this->s[i] = seq[i]; } DNANSeq::DNANSeq(int size, const char *seq) { this->bits = size * bits_per_nuc; int bytes = BITNSLOTS(this->bits); this->s = new unsigned char[bytes]; memset(this->s, 0, bytes); int bit = 0; for (int i = 0; i < size; i++) { switch (seq[i]) { case 'A': case 'a': // A == 000 bit += 3; break; case 'C': case 'c': // C == 001 bit += 2; BITSET(this->s, bit); bit++; break; case 'G': case 'g': // G == 010 bit++; BITSET(this->s, bit); bit++; bit++; break; case 'T': case 't': // T == 011 bit++; BITSET(this->s, bit); bit++; BITSET(this->s, bit); bit++; break; case 'N': case 'n': case '.': // N == 100 BITSET(this->s, bit); bit += 3; break; } } } DNANSeq::~DNANSeq() { delete [] this->s; } char DNANSeq::operator[](int pos) { unsigned char c, base; int bit; if (pos > ((this->bits / bits_per_nuc) - 1)) return '\0'; bit = pos * bits_per_nuc; c = 0; base = 'X'; for (int i = bits_per_nuc - 1; i >= 0; i--) { if (BITTEST(this->s, bit)) c |= 1 << i; bit++; } switch (c) { case 0: base = 'A'; break; case 1: base = 'C'; break; case 2: base = 'G'; break; case 3: base = 'T'; break; case 4: base = 'N'; break; default: cerr << "Unknown character " << (int) c << "\n"; break; } //cerr << " Decoding character " << pos << ", '" << base << "'\n"; return base; } int DNANSeq::size() { return this->bits / bits_per_nuc; } char *DNANSeq::subseq(char *seq, int start, int end) { int i; for (i = start; i <= end; i++) seq[i - start] = this->operator[](i); seq[i - start] = '\0'; return seq; } char *DNANSeq::seq(char *seq) { int i; int end = this->bits / bits_per_nuc; for (i = 0; i < end; i++) seq[i] = this->operator[](i); seq[i] = '\0'; return seq; } char *DNANSeq::seq() { int i; int size = this->bits / bits_per_nuc; char *seq = new char[size + 1]; for (i = 0; i < size; i++) seq[i] = this->operator[](i); seq[i] = '\0'; return seq; } stacks-1.35/src/DNANSeq.h000644 000765 000024 00000005752 12335173442 015650 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __DNANSeq_H__ #define __DNANSeq_H__ #include #include #define BITMASK(b) (1 << ((b) % CHAR_BIT)) #define BITSLOT(b) ((b) / CHAR_BIT) #define BITSET(a, b) ((a)[BITSLOT(b)] |= BITMASK(b)) #define BITCLEAR(a, b) ((a)[BITSLOT(b)] &= ~BITMASK(b)) #define BITTEST(a, b) ((a)[BITSLOT(b)] & BITMASK(b)) #define BITNSLOTS(nb) ((nb + CHAR_BIT - 1) / CHAR_BIT) // // We expect (and C++ defines) an unsigned char as 8 bits. // const unsigned short int bits_per_nuc = 3; const unsigned short int byte_size = 8; // // DNA Sequence Storage Class // // Two-bit compression, four bases per byte of storage: // A == 000 // C == 001 // G == 010 // T == 011 // N == 100 // class DNANSeq { public: // // The number of bits required to store string of DNA string // unsigned short int bits; // // Array of bytes to store DNA sequence. // unsigned char *s; DNANSeq(int); DNANSeq(int, const char *); DNANSeq(int, unsigned char *); ~DNANSeq(); char operator[](int); int size(); char *seq(char *); char *seq(); char *subseq(char *, int, int); }; #include #include #include using std::stringstream; using std::cin; using std::cout; using std::cerr; // namespace __gnu_cxx { // template<> // struct hash // { // size_t // operator()(DNANSeq *__s) const { // unsigned long __h = 0; // unsigned int bytes = BITNSLOTS(__s->bits); // for (unsigned int i = 0; i < bytes; i++) // __h = 5 * __h + __s->s[i]; // return size_t(__h); // } // }; // } struct hash_dnanseq { size_t operator()(DNANSeq *__s) const { size_t __result = static_cast(14695981039346656037ULL); unsigned short int __bytes = BITNSLOTS(__s->bits); for (unsigned short int i = 0; i < __bytes; i++) { __result ^= static_cast(__s->s[i]); __result *= static_cast(1099511628211ULL); } return __result; } }; struct dnanseq_eqstr { bool operator()(DNANSeq *s1, DNANSeq *s2) const { unsigned int bytes = BITNSLOTS(s1->bits); for (unsigned int i = 0; i < bytes; i++) if (s1->s[i] != s2->s[i]) return false; return true; } }; #endif // __DNANSeq_H__ stacks-1.35/src/DNASeq.cc000644 000765 000024 00000014045 12335173442 015663 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // DNASeq.cc // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id: DNASeq.cc 2133 2011-06-07 04:07:41Z catchen $ // #include "DNASeq.h" DNASeq::DNASeq(int size) { int bytes; this->size = size; bytes = size / bases_per_byte; bytes += size % bases_per_byte > 0 ? 1 : 0; this->s = new unsigned char[bytes]; memset(this->s, 0, bytes); } DNASeq::DNASeq(int size, unsigned char *seq) { unsigned int bytes; this->size = size; bytes = size / bases_per_byte; bytes += size % bases_per_byte > 0 ? 1 : 0; this->s = new unsigned char[bytes]; for (unsigned int i = 0; i < bytes; i++) this->s[i] = seq[i]; } DNASeq::DNASeq(int size, const char *seq) { int bytes, rem; this->size = size; bytes = size / bases_per_byte; rem = size % bases_per_byte; bytes += rem > 0 ? 1 : 0; this->s = new unsigned char[bytes]; int index = 0; for (int i = 0; i < this->size; i++) { //cerr << "Encoding character " << i << ", '" << seq[i] << "'\n"; if (i > 0 && i % bases_per_byte == 0) index++; //cerr << " encoding '" << seq[i] << "' into byte " << index << ".\n"; this->s[index] <<= 2; switch (seq[i]) { case 'A': case 'a': // A == 00 break; case 'C': case 'c': // C == 01 this->s[index] |= 0x1; break; case 'G': case 'g': // G == 10 this->s[index] |= 0x2; break; case 'T': case 't': // T == 11 this->s[index] |= 0x3; break; } //cerr << " s[" << index << "," << i % bases_per_byte << "] == " << (int)this->s[index] << "\n"; } if (rem > 0) this->s[index] <<= (bases_per_byte - rem) * 2; } DNASeq::~DNASeq() { delete [] this->s; } char DNASeq::operator[](int pos) { unsigned char c, base; int index, rem; if (pos > (this->size - 1)) return '\0'; index = pos / bases_per_byte; rem = pos % bases_per_byte; //cerr << "s[" << index << "," << rem << "] == " << (int)this->s[index] << "\n"; switch (rem) { case 0: c = this->s[index] & 0xC0; // 11000000 c >>= 6; break; case 1: c = this->s[index] & 0x30; // 00110000 c >>= 4; break; case 2: c = this->s[index] & 0xC; // 00001100 c >>= 2; break; case 3: c = this->s[index] & 0x3; // 00000011 break; } switch (c) { case 0: base = 'A'; break; case 1: base = 'C'; break; case 2: base = 'G'; break; case 3: base = 'T'; break; } //cerr << " Decoding character " << pos << ", '" << base << "'\n"; return base; } char *DNASeq::subseq(char *seq, int start, int end) { unsigned char c; int i, index, rem; i = start; index = i / bases_per_byte; rem = i % bases_per_byte; for (; i <= end; i++) { rem = i % bases_per_byte; if (i > 0 && rem == 0) index++; //cerr << "s[" << index << "," << rem << "] == " << (int)this->s[index] << "\n"; switch (rem) { case 0: c = this->s[index] & 0xC0; // 11000000 c >>= 6; break; case 1: c = this->s[index] & 0x30; // 00110000 c >>= 4; break; case 2: c = this->s[index] & 0xC; // 00001100 c >>= 2; break; case 3: c = this->s[index] & 0x3; // 00000011 break; } switch (c) { case 0: seq[i - start] = 'A'; break; case 1: seq[i - start] = 'C'; break; case 2: seq[i - start] = 'G'; break; case 3: seq[i - start] = 'T'; break; } //cerr << " Decoding character " << i << ", '" << seq[i - start] << "'\n"; } seq[i - start] = '\0'; return seq; } char *DNASeq::seq(char *seq) { unsigned char c; int i; int index = 0; for (i = 0; i < this->size; i++) { if (i > 0 && i % bases_per_byte == 0) index++; //cerr << "s[" << index << "," << i % bases_per_byte << "] == " << (int)this->s[index] << "\n"; switch (i % bases_per_byte) { case 0: c = this->s[index] & 0xC0; // 11000000 c >>= 6; break; case 1: c = this->s[index] & 0x30; // 00110000 c >>= 4; break; case 2: c = this->s[index] & 0xC; // 00001100 c >>= 2; break; case 3: c = this->s[index] & 0x3; // 00000011 break; } switch (c) { case 0: seq[i] = 'A'; break; case 1: seq[i] = 'C'; break; case 2: seq[i] = 'G'; break; case 3: seq[i] = 'T'; break; } //cerr << " Decoding character " << i << ", '" << seq[i] << "'\n"; } seq[i] = '\0'; return seq; } char *DNASeq::seq() { unsigned char c; int i; int index = 0; char *seq = new char[this->size + 1]; for (i = 0; i < this->size; i++) { if (i > 0 && i % bases_per_byte == 0) index++; //cerr << "s[" << index << "," << i % bases_per_byte << "] == " << (int)this->s[index] << "\n"; switch (i % bases_per_byte) { case 0: c = this->s[index] & 0xC0; // 11000000 c >>= 6; break; case 1: c = this->s[index] & 0x30; // 00110000 c >>= 4; break; case 2: c = this->s[index] & 0xC; // 00001100 c >>= 2; break; case 3: c = this->s[index] & 0x3; // 00000011 break; } switch (c) { case 0: seq[i] = 'A'; break; case 1: seq[i] = 'C'; break; case 2: seq[i] = 'G'; break; case 3: seq[i] = 'T'; break; } //cerr << " Decoding character " << i << ", '" << seq[i] << "'\n"; } seq[i] = '\0'; return seq; } stacks-1.35/src/DNASeq.h000644 000765 000024 00000005213 12335173442 015522 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __DNASeq_H__ #define __DNASeq_H__ #include #include // #ifdef __GNUC__ // #include // using __gnu_cxx::hash_map; // using __gnu_cxx::hash; // #else // #include // #endif // // We expect (and C++ defines) an unsigned char as 8 bits, so we // should be able to store 4 nucleotide bases per byte of memory. // const unsigned short int bases_per_byte = CHAR_BIT / 2; // // DNA Sequence Storage Class // // Two-bit compression, four bases per byte of storage: // A == 00 // C == 01 // G == 10 // T == 11 // class DNASeq { public: // // The number of DNA bases we are storing // unsigned short int size; // // Array of bytes to store DNA sequence, one character per two bits, four per byte. // unsigned char *s; DNASeq(int); DNASeq(int, const char *); DNASeq(int, unsigned char *); ~DNASeq(); char len() { return this->size; } char operator[](int); char *seq(char *); char *seq(); char *subseq(char *, int, int); }; #include #include #include using std::stringstream; using std::cin; using std::cout; using std::cerr; struct hash_dnaseq { size_t operator()(DNASeq *__s) const { size_t __result = static_cast(14695981039346656037ULL); unsigned short int __bytes = (__s->size / bases_per_byte) + (__s->size % bases_per_byte > 0 ? 1 : 0); for (unsigned short int i = 0; i < __bytes; i++) { __result ^= static_cast(__s->s[i]); __result *= static_cast(1099511628211ULL); } return __result; } }; struct dnaseq_eqstr { bool operator()(DNASeq *s1, DNASeq *s2) const { unsigned int bytes = (s1->size / bases_per_byte) + (s1->size % bases_per_byte > 0 ? 1 : 0); for (unsigned int i = 0; i < bytes; i++) if (s1->s[i] != s2->s[i]) return false; return true; } }; #endif // __DNASeq_H__ stacks-1.35/src/estacks.cc000644 000765 000024 00000045370 12533677757 016274 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // estacks -- search an existing set of stacks for polymorphisms // #include "estacks.h" // // Global variables to hold command-line options. // FileT in_file_type; string in_file; string out_path; bool record_hom = false; int sql_id = 0; int min_stack_cov = 1; int num_threads = 1; // // For use with the multinomial model to call fixed nucleotides. // modelt model_type = snp; double bound_low = 0.0; double bound_high = 1.0; double p_freq = 0.5; double barcode_err_freq = 0.0; double heterozygote_limit = -3.84; double homozygote_limit = 3.84; int main (int argc, char* argv[]) { parse_command_line(argc, argv); // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif HashMap radtags; set merge_map; map unique; load_radtags(in_file, radtags); reduce_radtags(radtags, unique); //dump_stacks(unique); map merged; populate_merged_tags(unique, merged); //dump_merged_stacks(merged); // Call the consensus sequence again, now that remainder tags have been merged. call_consensus(merged, unique, true); count_raw_reads(unique, merged); cerr << "Writing results\n"; write_sql(merged, unique); return 0; } int call_alleles(MergedStack *mtag, vector &reads) { int row; int height = reads.size(); string allele; char base; vector::iterator snp; DNANSeq *d; if (mtag->snps.size() == 0) return 0; for (row = 0; row < height; row++) { allele.clear(); bool haplotype = true; for (snp = mtag->snps.begin(); snp != mtag->snps.end(); snp++) { d = reads[row]; base = (*d)[(*snp)->col]; // // Check to make sure the nucleotide at the location of this SNP is // of one of the two possible states the multinomial model called. // if (base == (*snp)->rank_1 || base == (*snp)->rank_2) allele += base; else haplotype = false; } if (haplotype && allele.size() == mtag->snps.size()) mtag->alleles[allele]++; } return 0; } int call_consensus(map &merged, map &unique, bool invoke_model) { // // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. // map::iterator it; vector keys; for (it = merged.begin(); it != merged.end(); it++) keys.push_back(it->first); int i; #pragma omp parallel private(i) { #pragma omp for schedule(dynamic) for (i = 0; i < (int) keys.size(); i++) { MergedStack *mtag; PStack *utag; mtag = merged[keys[i]]; // // Create a two-dimensional array, each row containing one read. For // each unique tag that has been merged together, add the sequence for // that tag into our array as many times as it originally occurred. // vector::iterator j; vector reads; for (j = mtag->utags.begin(); j != mtag->utags.end(); j++) { utag = unique[*j]; for (uint k = 0; k < utag->count; k++) { reads.push_back(utag->seq); } } // // Iterate over each column of the array and call the consensus base. // int row, col; int length = reads[0]->size(); int height = reads.size(); string con; map nuc; map::iterator max, n; DNANSeq *d; for (col = 0; col < length; col++) { nuc['A'] = 0; nuc['C'] = 0; nuc['G'] = 0; nuc['T'] = 0; for (row = 0; row < height; row++) { d = reads[row]; nuc[(*d)[col]]++; } // // Find the base with a plurality of occurances and call it. // max = nuc.end(); for (n = nuc.begin(); n != nuc.end(); n++) { if (max == nuc.end() || n->second > max->second) max = n; } con += max->second == 0 ? 'N' : max->first; // Search this column for the presence of a SNP if (invoke_model) model_type == snp ? call_multinomial_snp(mtag, col, nuc, record_hom) : call_multinomial_fixed(mtag, col, nuc); } if (invoke_model) { call_alleles(mtag, reads); if (model_type == fixed) { // // Mask nucleotides that are not fixed. // vector::iterator s; for (s = mtag->snps.begin(); s != mtag->snps.end(); s++) { con.replace((*s)->col, 1, "N"); } } } mtag->add_consensus(con.c_str()); } } return 0; } int count_raw_reads(map &unique, map &merged) { map::iterator it; vector::iterator k; PStack *tag; long int m = 0; for (it = merged.begin(); it != merged.end(); it++) { for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; m += tag->count; } m += it->second->remtags.size(); } cerr << " Number of utilized reads " << m << "\n"; return 0; } int write_sql(map &m, map &u) { map::iterator i; vector::iterator j; vector::iterator k; vector::iterator s; map::iterator t; MergedStack *tag_1; PStack *tag_2; // // Parse the input file name to create the output files // size_t pos_1 = in_file.find_last_of("/"); size_t pos_2 = in_file.find_last_of("."); string tag_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".tags.tsv"; string snp_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".snps.tsv"; string all_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".alleles.tsv"; string pil_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".pileup.tsv"; // Open the output files for writing. std::ofstream tags(tag_file.c_str()); std::ofstream snps(snp_file.c_str()); std::ofstream alle(all_file.c_str()); std::ofstream pile(pil_file.c_str()); int tag_id, comp_id; tag_id = 0; for (i = m.begin(); i != m.end(); i++) { tag_1 = i->second; // First write the consensus sequence for (s = tag_1->snps.begin(); s != tag_1->snps.end(); s++) { float total = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) { //if (u[*k]->seq[(*s)->col] == 'N') continue; total += u[*k]->count; } if (total < min_stack_cov) continue; tags << "0" << "\t" << sql_id << "\t" << tag_id << "\t" << tag_1->loc.chr << "\t" << tag_1->loc.bp + (*s)->col << "\t" << "consensus\t" << "\t\t" << tag_1->con[(*s)->col] << "\t" << tag_1->deleveraged << "\t" << tag_1->blacklisted << "\t" << tag_1->lumberjackstack << "\n"; // Now write out the components of each unique tag merged into this one. comp_id = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) { tag_2 = u[*k]; //if (tag_2->seq[(*s)->col] == 'N') continue; for (j = tag_2->map.begin(); j != tag_2->map.end(); j++) { tags << "0" << "\t" << sql_id << "\t" << tag_id << "\t\t\t" << "primary\t" << comp_id << "\t" << *j << "\t" << (*tag_2->seq)[(*s)->col] << "\t\t\t\n"; } comp_id++; } snps << "0" << "\t" << sql_id << "\t" << tag_id << "\t" << 0 << "\t" << (*s)->lratio << "\t" << (*s)->rank_1 << "\t" << (*s)->rank_2 << "\n"; // Write the expressed alleles seen for the recorded SNPs and // the percentage of tags a particular allele occupies. map allele; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) { if ((*u[*k]->seq)[(*s)->col] != (*s)->rank_1 && (*u[*k]->seq)[(*s)->col] != (*s)->rank_2) continue; allele[(*u[*k]->seq)[(*s)->col]] += u[*k]->count; } char pct[id_len]; map::iterator a; for (a = allele.begin(); a != allele.end(); a++) { sprintf(pct, "%.2f", ((a->second/total) * 100)); alle << "0" << "\t" << sql_id << "\t" << tag_id << "\t" << a->first << "\t" << pct << "\t" << a->second << "\n"; } tag_id++; } } for (i = m.begin(); i != m.end(); i++) { tag_1 = i->second; float total = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) total += u[*k]->count; if (total < min_stack_cov) continue; // First write the consensus sequence pile << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << tag_1->loc.chr << "\t" << tag_1->loc.bp << "\t" << "consensus\t" << "\t\t" << tag_1->con << "\n"; // Now write out the components of each unique tag merged into this one. comp_id = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) { tag_2 = u[*k]; for (j = tag_2->map.begin(); j != tag_2->map.end(); j++) { pile << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t\t\t" << "primary\t" << comp_id << "\t" << *j << "\t" << tag_2->seq << "\t\t\t\n"; } comp_id++; } } tags.close(); snps.close(); alle.close(); pile.close(); return 0; } int populate_merged_tags(map &unique, map &merged) { map::iterator i; map::iterator it_new, it_old; map > locations; map >::iterator k; set::iterator s; char id[id_len]; PStack *u; MergedStack *m; int global_id = 0; // // Create a map of each unique Stack that has been aligned to the same genomic location. // for (i = unique.begin(); i != unique.end(); i++) { snprintf(id, id_len - 1, "%s_%d", i->second->loc.chr, i->second->loc.bp); locations[id].insert(i->second->id); } it_old = merged.begin(); for (k = locations.begin(); k != locations.end(); k++) { m = new MergedStack; m->id = global_id; // // Record the consensus and physical location for this stack. // s = k->second.begin(); m->add_consensus(unique[*s]->seq); strncpy(m->loc.chr, unique[*s]->loc.chr, id_len - 1); m->loc.chr[id_len] = '\0'; m->loc.bp = unique[*s]->loc.bp; // // Record the individual stacks that were aligned together. // for (; s != k->second.end(); s++) { u = unique[*s]; m->count += u->count; m->utags.push_back(u->id); } // Insert the new MergedStack giving a hint as to which position // to insert it at. it_new = merged.insert(it_old, pair(global_id, m)); it_old = it_new; global_id++; } cerr << " Merged " << unique.size() << " unique Stacks into " << merged.size() << " loci.\n"; return 0; } // // This function assumes that there may be identical reads, mapped to multiple // places in the genome. In this case, reads are broken down by read ID // and split into different Stack objects. // int reduce_radtags(HashMap &radtags, map &unique) { HashMap::iterator it; vector::iterator sit; PStack *u; int global_id = 1; for (it = radtags.begin(); it != radtags.end(); it++) { // // Make sure there aren't any reads of identical sequence that have been mapped to // different genomic locations. // map locations; map::iterator lit; for (sit = (*it).second.begin(); sit != (*it).second.end(); sit++) locations[(*sit)->loc_str]++; for (lit = locations.begin(); lit != locations.end(); lit++) { // // Populate a PStack object for this unique radtag. // u = new PStack; u->id = global_id; u->count = lit->second; u->add_seq(it->first); // // Record the physical location of this stack. // for (sit = (*it).second.begin(); sit != (*it).second.end(); sit++) { if (strcmp((*sit)->loc_str, lit->first.c_str()) == 0) { u->add_id((*sit)->id); u->loc.set((*sit)->loc.chr, (*sit)->loc.bp, (*sit)->loc.strand); } } unique[u->id] = u; global_id++; } } return 0; } // // We expect tags to have already been aligned to a reference genome. Therefore, the tags // are identified by their chromosome and basepair location. // int load_radtags(string in_file, HashMap &radtags) { Input *fh = NULL; Seq *c; if (in_file_type == FileT::bowtie) fh = new Bowtie(in_file.c_str()); else if (in_file_type == FileT::sam) fh = new Sam(in_file.c_str()); else if (in_file_type == FileT::tsv) fh = new Tsv(in_file.c_str()); cerr << "Parsing " << in_file.c_str() << "\n"; int i = 1; while ((c = fh->next_seq()) != NULL) { if (i % 10000 == 0) cerr << "Loading aligned sequence " << i << " \r"; radtags[c->seq].push_back(c); i++; } if (i == 0) { cerr << "Error: Unable to load data from '" << in_file.c_str() << "'.\n"; exit(1); } cerr << " " << "Analyzed " << i - 1 << " sequence reads; " << "Identified " << radtags.size() << " unique Stacks from those reads.\n"; // // Close the file and delete the Input object. // delete fh; return 0; } int dump_stacks(map &u) { map::iterator it; vector::iterator fit; vector >::iterator pit; vector::iterator mit; for (it = u.begin(); it != u.end(); it++) { cerr << "Stack ID: " << (*it).second->id << "\n" << " Seq: " << (*it).second->seq << "\n" << " IDs: "; for (fit = (*it).second->map.begin(); fit != (*it).second->map.end(); fit++) cerr << *fit << " "; cerr << "\n\n"; } return 0; } int dump_merged_stacks(map &m) { map::iterator it; vector >::iterator pit; vector::iterator fit; for (it = m.begin(); it != m.end(); it++) { cerr << "MergedStack ID: " << it->second->id << "\n" << " Consensus: "; if (it->second->con != NULL) cerr << it->second->con << "\n"; else cerr << "\n"; cerr << " IDs: "; for (fit = it->second->utags.begin(); fit != it->second->utags.end(); fit++) cerr << (*fit) << " "; cerr << "\n" << " Distances: "; for (pit = it->second->dist.begin(); pit != it->second->dist.end(); pit++) cerr << (*pit).first << ": " << (*pit).second << ", "; cerr << "\n\n"; } return 0; } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"rec_hom", no_argument, NULL, 'O'}, {"infile_type", required_argument, NULL, 't'}, {"outfile_type", required_argument, NULL, 'y'}, {"file", required_argument, NULL, 'f'}, {"outpath", required_argument, NULL, 'o'}, {"id", required_argument, NULL, 'i'}, {"min_cov", required_argument, NULL, 'm'}, {"num_threads", required_argument, NULL, 'p'}, {"bc_err_freq", required_argument, NULL, 'e'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvOf:o:i:e:p:m:s:f:t:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 't': if (strcmp(optarg, "bowtie") == 0) in_file_type = FileT::bowtie; else if (strcmp(optarg, "sam") == 0) in_file_type = FileT::sam; else if (strcmp(optarg, "tsv") == 0) in_file_type = FileT::tsv; else in_file_type = FileT::unknown; break; case 'f': in_file = optarg; break; case 'o': out_path = optarg; break; case 'i': sql_id = atoi(optarg); break; case 'm': min_stack_cov = atoi(optarg); break; case 'e': barcode_err_freq = atof(optarg); break; case 'p': num_threads = atoi(optarg); break; case 'O': record_hom = true; break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (in_file.length() == 0 || in_file_type == FileT::unknown) { cerr << "You must specify an input file of a supported type.\n"; help(); } if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (model_type == fixed && barcode_err_freq == 0) { cerr << "You must specify the barcode error frequency.\n"; help(); } return 0; } void version() { std::cerr << "estacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "estacks " << VERSION << "\n" << "estacks -t file_type -f file_path [-o path] [-i id] [-m min_cov] [-r] [-e errfreq] [-p num_threads] [-h]" << "\n" << " p: enable parallel execution with num_threads threads.\n" << " t: input file Type. Supported types: bowtie, sam.\n" << " f: input file path.\n" << " o: output path to write results.\n" << " i: SQL ID to insert into the output to identify this sample.\n" << " O: record homozygotes along with heterozygote SNPs.\n" << " m: minimum depth of coverage to report a stack (default 1).\n" << " e: specify the barcode error frequency (0 < e < 1) if using the 'fixed' model.\n" << " h: display this help messsage." << "\n\n"; exit(0); } stacks-1.35/src/estacks.h000644 000765 000024 00000005547 12335173442 016116 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __PSTACKS_H__ #define __PSTACKS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include #include #include #include using std::stringstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::pair; #include using std::unordered_map; #ifdef HAVE_SPARSEHASH #include using google::sparse_hash_map; #endif #include "constants.h" #include "stacks.h" // Major data structures for holding stacks #include "kmers.h" #include "mstack.h" #include "utils.h" #include "models.h" // Contains maximum likelihood statistical models. #include "Tsv.h" // Reading input files in Tab-separated values format #include "BowtieI.h" // Reading input files in Bowtie format #include "SamI.h" // Reading input files in SAM format #include "FastaI.h" // Reading input files in FASTA format #include "FastqI.h" // Reading input files in FASTQ format #include "DNANSeq.h" const int barcode_size = 5; #ifdef HAVE_SPARSEHASH typedef sparse_hash_map, hash_charptr, eqstr> HashMap; #else typedef unordered_map, hash_charptr, eqstr> HashMap; #endif void help( void ); void version( void ); int parse_command_line(int, char**); int load_radtags(string, HashMap &); int reduce_radtags(HashMap &, map &); int populate_merged_tags(map &, map &); int call_consensus(map &, map &, bool); int call_alleles(MergedStack *, vector &); int count_raw_reads(map &, map &); int write_sql(map &, map &); int write_sam(map &, map &); // // Debugging // int dump_stacks(map &); int dump_merged_stacks(map &); #endif // __ESTACKS_H__ stacks-1.35/src/FastaI.h000644 000765 000024 00000007215 12335173442 015622 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __FASTAI_H__ #define __FASTAI_H__ #include "input.h" class Fasta: public Input { string buf; public: Fasta(const char *path) : Input(path) { }; Fasta(string path) : Input(path.c_str()) { }; ~Fasta() {}; Seq *next_seq(); int next_seq(Seq &); }; Seq *Fasta::next_seq() { // // Check the contents of the line buffer. When we finish reading a FASTA record // the buffer will either contain whitespace or the header of the next FAST // record. // while (this->line[0] != '>' && this->fh.good() ) { this->fh.getline(this->line, max_len); } if (!this->fh.good()) { return NULL; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; // // Initialize the Seq structure and store the FASTA ID // Seq *s = new Seq; s->id = new char[len + 1]; strcpy(s->id, this->line + 1); // // Read the sequence from the file -- keep reading lines until we reach the next // record or the end of file. // this->fh.getline(this->line, max_len); while (this->line[0] != '>' && this->fh.good()) { len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; this->buf += this->line; this->fh.getline(this->line, max_len); } if (this->fh.eof()) { len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; this->buf += this->line; } s->seq = new char[this->buf.length() + 1]; strcpy(s->seq, this->buf.c_str()); this->buf.clear(); return s; } int Fasta::next_seq(Seq &s) { // // Check the contents of the line buffer. When we finish reading a FASTA record // the buffer will either contain whitespace or the header of the next FAST // record. // while (this->line[0] != '>' && this->fh.good() ) { this->fh.getline(this->line, max_len); } if (!this->fh.good()) { return 0; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; // // Store the FASTA ID // strcpy(s.id, this->line + 1); // // Read the sequence from the file -- keep reading lines until we reach the next // record or the end of file. // this->fh.getline(this->line, max_len); while (this->line[0] != '>' && this->fh.good()) { len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0'; this->buf += this->line; this->fh.getline(this->line, max_len); } if (this->fh.eof()) { len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0'; this->buf += this->line; } strcpy(s.seq, this->buf.c_str()); this->buf.clear(); return 1; } #endif // __FASTAI_H__ stacks-1.35/src/FastqI.h000644 000765 000024 00000010525 12335173442 015640 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __FASTQI_H__ #define __FASTQI_H__ #include "input.h" class Fastq: public Input { public: Fastq(const char *path) : Input(path) { }; Fastq(string path) : Input(path.c_str()) { }; ~Fastq() {}; Seq *next_seq(); int next_seq(Seq &s); }; Seq *Fastq::next_seq() { // // Check the contents of the line buffer. When we finish reading a FASTQ record // the buffer will either contain whitespace or the header of the next FASTQ // record. // while (this->line[0] != '@' && this->fh.good() ) { this->fh.getline(this->line, max_len); } if (!this->fh.good()) { return NULL; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0'; // // Initialize the Seq structure and store the FASTQ ID // Seq *s = new Seq; s->id = new char[strlen(this->line) + 1]; strcpy(s->id, this->line + 1); // // Read the sequence from the file // this->fh.getline(this->line, max_len); if (!this->fh.good()) { return NULL; } len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0'; s->seq = new char[len + 1]; strcpy(s->seq, this->line); // // Read the repeat of the ID // this->fh.getline(this->line, max_len); if (this->line[0] != '+' || !this->fh.good()) { return NULL; } // // Read the quality score from the file // this->fh.getline(this->line, max_len); if (!this->fh.good() && !this->fh.eof()) { return NULL; } len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0'; s->qual = new char[len + 1]; strcpy(s->qual, this->line); // // Clear the line buffer so it is set up for the next record. If a '@' // appears in the quality scores read, it will break parsing next time // it is called. // this->line[0] = '\0'; return s; } int Fastq::next_seq(Seq &s) { // // Check the contents of the line buffer. When we finish reading a FASTQ record // the buffer will either contain whitespace or the header of the next FASTQ // record. // while (this->line[0] != '@' && this->fh.good() ) { this->fh.getline(this->line, max_len); } if (!this->fh.good()) { return 0; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; // // Store the FASTQ ID // strcpy(s.id, this->line + 1); // // Read the sequence from the file // this->fh.getline(this->line, max_len); if (!this->fh.good()) { return 0; } len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; strcpy(s.seq, this->line); // // Read the repeat of the ID // this->fh.getline(this->line, max_len); if (this->line[0] != '+' || !this->fh.good()) { return 0; } // // Read the quality score from the file // this->fh.getline(this->line, max_len); if (!this->fh.good() && !this->fh.eof()) { return 0; } len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; strcpy(s.qual, this->line); // // Clear the line buffer so it is set up for the next record. If a '@' // appears in the quality scores read, it will break parsing next time // it is called. // this->line[0] = '\0'; return 1; } #endif // __FASTQI_H__ stacks-1.35/src/file_io.cc000644 000765 000024 00000061724 12574066143 016230 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2012-2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // file_io.cc -- common routines for opening groups of files and processing barcode lists. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #include "file_io.h" int open_files(vector > &files, vector &barcodes, map &pair_1_fhs, map &pair_2_fhs, map &rem_1_fhs, map &rem_2_fhs, map > &counters) { string path, suffix_1, suffix_2, filepath, file; if (paired) { suffix_1 = ".1"; suffix_2 = ".2"; } if (out_file_type == FileT::fastq) { suffix_1 += ".fq"; suffix_2 += ".fq"; } else { suffix_1 += ".fa"; suffix_2 += ".fa"; } uint pos; ofstream *fh; BarcodePair bc; // // If the size of the barcodes vector is 0, then no barcodes // were submitted. In this case, we want to open output files // of the same name as input files, but in out_path. // if (barcodes.size() == 0 && merge == false) { struct stat sb_1, sb_2; for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; if (paired) bc.pe = files[i].second; path = out_path + files[i].first; // // Check that the file has a proper suffix for the output type. // pos = path.find_last_of("."); if (path.substr(pos) == ".bam") { path = path.substr(0, pos) + suffix_1; } else if (path.substr(pos) == ".gz") { path = path.substr(0, pos); pos = path.find_last_of("."); path = path.substr(0, pos) + suffix_1; } else { path = path.substr(0, pos) + suffix_1; } if (stat((in_path_1 + files[i].first).c_str(), &sb_1) == -1) { cerr << "Unable to stat input file '" << in_path_1 + files[i].first << "'\n"; exit(1); } if (stat(path.c_str(), &sb_2) == 0 && sb_2.st_dev == sb_1.st_dev && sb_2.st_ino == sb_1.st_ino) { cerr << "Input and output files ('" << path << "') are the same and will cause the input " << "file to be overwritten. Please specify a separate output directory using '-o'.\n"; help(); } fh = new ofstream(path.c_str(), ifstream::out); pair_1_fhs[bc] = fh; if (pair_1_fhs[bc]->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } if (paired) { file = interleaved ? files[i].first : files[i].second; path = out_path + file; pos = path.find_last_of("."); if (path.substr(pos) == ".bam") { path = path.substr(0, pos) + suffix_2; } else if (path.substr(pos) == ".gz") { path = path.substr(0, pos); pos = path.find_last_of("."); path = path.substr(0, pos) + suffix_2; } else { path = path.substr(0, pos) + suffix_2; } if (stat((in_path_2 + file).c_str(), &sb_1) == -1) { cerr << "Unable to stat input file '" << in_path_2 + file << "'\n"; exit(1); } if (stat(path.c_str(), &sb_2) == 0 && sb_2.st_dev == sb_1.st_dev && sb_2.st_ino == sb_1.st_ino) { cerr << "Input and output file names ('" << path << "') are the same and will cause the input " << "file to be overwritten. Please specify a separate output directory using '-o'.\n"; help(); } fh = new ofstream(path.c_str(), ifstream::out); pair_2_fhs[bc] = fh; if (pair_2_fhs[bc]->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } filepath = files[i].first; pos = filepath.find_last_of("."); if (filepath.substr(pos) == ".gz") { filepath = filepath.substr(0, pos); pos = filepath.find_last_of("."); filepath = filepath.substr(0, pos); } else if (filepath.substr(pos) == ".bam") { filepath = filepath.substr(0, pos); } path = out_path + filepath + ".rem" + suffix_1; fh = new ofstream(path.c_str(), ifstream::out); rem_1_fhs[bc] = fh; if (rem_1_fhs[bc]->fail()) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } filepath = file; pos = filepath.find_last_of("."); if (filepath.substr(pos) == ".gz") { filepath = filepath.substr(0, pos); pos = filepath.find_last_of("."); filepath = filepath.substr(0, pos); } else if (filepath.substr(pos) == ".bam") { filepath = filepath.substr(0, pos); } path = out_path + filepath + ".rem" + suffix_2; fh = new ofstream(path.c_str(), ifstream::out); rem_2_fhs[bc] = fh; if (rem_2_fhs[bc]->fail()) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } } } return 0; } else if (barcodes.size() == 0 && merge == true) { path = out_path + "sample_unbarcoded" + suffix_1; fh = new ofstream(path.c_str(), ifstream::out); if (fh->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; if (paired) bc.pe = files[i].second; pair_1_fhs[bc] = fh; } if (paired) { path = out_path + "sample_unbarcoded" + suffix_2; fh = new ofstream(path.c_str(), ifstream::out); if (fh->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; bc.pe = files[i].second; pair_2_fhs[bc] = fh; } path = out_path + "sample_unbarcoded.rem" + suffix_1; fh = new ofstream(path.c_str(), ifstream::out); if (fh->fail()) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; bc.pe = files[i].second; rem_1_fhs[bc] = fh; } path = out_path + "sample_unbarcoded.rem" + suffix_2; fh = new ofstream(path.c_str(), ifstream::out); if (fh->fail()) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; bc.pe = files[i].second; rem_2_fhs[bc] = fh; } } return 0; } string filename; for (uint i = 0; i < barcodes.size(); i++) { filename = barcodes[i].name_exists() ? barcodes[i].name : "sample_" + barcodes[i].str(); path = out_path + filename + suffix_1; fh = new ofstream(path.c_str(), ifstream::out); pair_1_fhs[barcodes[i]] = fh; if (pair_1_fhs[barcodes[i]]->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } if (paired) { path = out_path + filename + suffix_2; fh = new ofstream(path.c_str(), ifstream::out); pair_2_fhs[barcodes[i]] = fh; if (pair_2_fhs[barcodes[i]]->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } path = out_path + filename + ".rem" + suffix_1; fh = new ofstream(path.c_str(), ifstream::out); rem_1_fhs[barcodes[i]] = fh; if (rem_1_fhs[barcodes[i]]->fail()) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } path = out_path + filename + ".rem" + suffix_2; fh = new ofstream(path.c_str(), ifstream::out); rem_2_fhs[barcodes[i]] = fh; if (rem_2_fhs[barcodes[i]]->fail()) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } } } return 0; } int open_files(vector > &files, vector &barcodes, map &pair_1_fhs, map &pair_2_fhs, map &rem_1_fhs, map &rem_2_fhs, map > &counters) { string path, suffix_1, suffix_2, filepath, file; if (paired) { suffix_1 = ".1"; suffix_2 = ".2"; } if (out_file_type == FileT::gzfastq) { suffix_1 += ".fq.gz"; suffix_2 += ".fq.gz"; } else { suffix_1 += ".fa.gz"; suffix_2 += ".fa.gz"; } uint pos; gzFile *fh; BarcodePair bc; // // If the size of the barcodes vector is 0, then no barcodes // were submitted. In this case, we want to open output files // of the same name as input files, but in out_path. // if (barcodes.size() == 0 && merge == false) { struct stat sb_1, sb_2; for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; if (paired) bc.pe = files[i].second; path = out_path + files[i].first; // // Check that the file has a proper suffix for the output type. // pos = path.find_last_of("."); if (path.substr(pos) == ".bam") { path = path.substr(0, pos) + suffix_1; } else if (path.substr(pos) == ".gz") { path = path.substr(0, pos); pos = path.find_last_of("."); path = path.substr(0, pos) + suffix_1; } else { path = path.substr(0, pos) + suffix_1; } if (stat((in_path_1 + files[i].first).c_str(), &sb_1) == -1) { cerr << "Unable to stat input file '" << in_path_1 + files[i].first << "'\n"; exit(1); } if (stat(path.c_str(), &sb_2) == 0 && sb_2.st_dev == sb_1.st_dev && sb_2.st_ino == sb_1.st_ino) { cerr << "Input and output files ('" << path << "') are the same and will cause the input " << "file to be overwritten. Please specify a separate output directory using '-o'.\n"; help(); } fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); pair_1_fhs[bc] = fh; if (!(*fh)) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } if (paired) { file = interleaved ? files[i].first : files[i].second; path = out_path + file; pos = path.find_last_of("."); if (path.substr(pos) == ".bam") { path.replace(pos, 4, suffix_2); } else if (path.substr(pos) == ".gz") { path = path.substr(0, pos); pos = path.find_last_of("."); path = path.substr(0, pos) + suffix_2; } else { path = path.substr(0, pos) + suffix_2; } if (stat((in_path_2 + file).c_str(), &sb_1) == -1) { cerr << "Unable to stat input file '" << in_path_2 + file << "'\n"; exit(1); } if (stat(path.c_str(), &sb_2) == 0 && sb_2.st_dev == sb_1.st_dev && sb_2.st_ino == sb_1.st_ino) { cerr << "Input and output file names ('" << path << "') are the same and will cause the input " << "file to be overwritten. Please specify a separate output directory using '-o'.\n"; help(); } fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); pair_2_fhs[bc] = fh; if (!(*fh)) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } filepath = files[i].first; pos = filepath.find_last_of("."); if (filepath.substr(pos) == ".gz") { filepath = filepath.substr(0, pos); pos = filepath.find_last_of("."); filepath = filepath.substr(0, pos); } else if (filepath.substr(pos) == ".bam") { filepath = filepath.substr(0, pos); } path = out_path + filepath + ".rem" + suffix_1; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); rem_1_fhs[bc] = fh; if (!*(fh)) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } filepath = file; pos = filepath.find_last_of("."); if (filepath.substr(pos) == ".gz") { filepath = filepath.substr(0, pos); pos = filepath.find_last_of("."); filepath = filepath.substr(0, pos); } else if (filepath.substr(pos) == ".bam") { filepath = filepath.substr(0, pos); } path = out_path + filepath + ".rem" + suffix_2; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); rem_2_fhs[bc] = fh; if (!(*fh)) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } } } return 0; } else if (barcodes.size() == 0 && merge == true) { path = out_path + "sample_unbarcoded" + suffix_1; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); if (!(*fh)) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; if (paired) bc.pe = files[i].second; pair_1_fhs[bc] = fh; } if (paired) { path = out_path + "sample_unbarcoded" + suffix_2; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); if (!(*fh)) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; bc.pe = files[i].second; pair_2_fhs[bc] = fh; } path = out_path + "sample_unbarcoded.rem" + suffix_1; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); if (!(*fh)) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; bc.pe = files[i].second; rem_1_fhs[bc] = fh; } path = out_path + "sample_unbarcoded.rem" + suffix_2; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); if (!(*fh)) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } for (uint i = 0; i < files.size(); i++) { bc.se = files[i].first; bc.pe = files[i].second; rem_2_fhs[bc] = fh; } } return 0; } string filename; for (uint i = 0; i < barcodes.size(); i++) { filename = barcodes[i].name_exists() ? barcodes[i].name : "sample_" + barcodes[i].str(); path = out_path + filename + suffix_1; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); pair_1_fhs[barcodes[i]] = fh; if (!(*pair_1_fhs[barcodes[i]])) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } if (paired) { path = out_path + filename + suffix_2; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); pair_2_fhs[barcodes[i]] = fh; if (!(*pair_2_fhs[barcodes[i]])) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } path = out_path + filename + ".rem" + suffix_1; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); rem_1_fhs[barcodes[i]] = fh; if (!(*rem_1_fhs[barcodes[i]])) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } path = out_path + filename + ".rem" + suffix_2; fh = new gzFile; *fh = gzopen(path.c_str(), "wb"); rem_2_fhs[barcodes[i]] = fh; if (!(*rem_2_fhs[barcodes[i]])) { cerr << "Error opening remainder output file '" << path << "'\n"; exit(1); } } } return 0; } int close_file_handles(map &fhs) { map::iterator i; set ptrs; set::iterator j; for (i = fhs.begin(); i != fhs.end(); i++) { i->second->close(); ptrs.insert(i->second); } for (j = ptrs.begin(); j != ptrs.end(); j++) { delete *j; } return 0; } int close_file_handles(map &fhs) { map::iterator i; set ptrs; set::iterator j; for (i = fhs.begin(); i != fhs.end(); i++) { gzclose(*(i->second)); ptrs.insert(i->second); } for (j = ptrs.begin(); j != ptrs.end(); j++) { delete *j; } return 0; } int load_barcodes(string barcode_file, vector &barcodes, set &se_bc, set &pe_bc, uint &min_se_len, uint &max_se_len, uint &min_pe_len, uint &max_pe_len) { switch(barcode_type) { case null_null: cerr << "Barcode type unspecified, assuming unbarcoded data.\n"; break; case null_index: cerr << "Searching for single, index barcode.\n"; break; case index_null: cerr << "Searching for single-end, indexed barcodes.\n"; break; case inline_null: cerr << "Searching for single-end, inlined barcodes.\n"; break; case index_index: cerr << "Searching for single and paired-end, indexed barcodes.\n"; break; case inline_inline: cerr << "Searching for single and paired-end, inlined barcodes.\n"; break; case inline_index: if (paired) cerr << "Searching for single-end, inlined and paired-end, indexed barcodes.\n"; else cerr << "Searching for single-end inlined and indexed barcodes.\n"; break; case index_inline: if (paired) cerr << "Searching for single-end, indexed and paired-end, inlined barcodes.\n"; else cerr << "Searching for single-end, indexed and inlined barcodes.\n"; break; } if (barcode_file.length() == 0) return 0; char line[id_len]; ifstream fh(barcode_file.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening barcode file '" << barcode_file << "'\n"; exit(1); } char *p, *q, *r, *s; uint cols, line_num = 0; while (fh.good()) { memset(line, 0, id_len); fh.getline(line, id_len); line_num++; if (strlen(line) == 0) continue; // // Check that the proper number of columns exist. // cols = 1; for (p = line; *p != '\0'; p++) if (*p == '\t') cols++; if (cols > 2 && (barcode_type == inline_null || barcode_type == index_null)) { cerr << "Too many columns (" << cols << ") specified in '" << barcode_file << "' for single-end barcodes on line " << line_num << ".\n"; exit(1); } else if (cols > 3) { cerr << "Too many columns (" << cols << ") specified in '" << barcode_file << "' on line " << line_num << ".\n"; exit(1); } // // Identify the first barcode and check that it's legitimate. // p = line; q = p; while (*q != '\0') { switch (*q) { case 'A': case 'C': case 'G': case 'T': break; case 'a': *q = 'A'; break; case 'c': *q = 'C'; break; case 'g': *q = 'G'; break; case 't': *q = 'T'; break; case '\r': case '\t': *q = '\0'; break; default: cerr << "Invalid barcode on line " << line_num << ": '" << p << "'\n"; exit(1); } if (*q != '\0') q++; } // // If a second barcode was specified on the command line, identify it and check that it's legitimate. // r = NULL; if (barcode_type == inline_inline || barcode_type == inline_index || barcode_type == index_inline || barcode_type == index_index) { if (q - p < id_len) q++; r = q; while (*q != '\0') { switch (*q) { case 'A': case 'C': case 'G': case 'T': break; case 'a': *q = 'A'; break; case 'c': *q = 'C'; break; case 'g': *q = 'G'; break; case 't': *q = 'T'; break; case '\r': case '\t': *q = '\0'; break; default: cerr << "Invalid barcode on line " << line_num << ": '" << r << "'\n"; exit(1); } if (*q != '\0') q++; } } // // Check for the existence of a file name to associate with this barcode set. // if (q - p < id_len) q++; s = q; while (*q != '\0') { if (!isalnum(*q)) { switch (*q) { case '-': case '_': break; case '\r': case '\t': *q = '\0'; break; default: cerr << "Invalid filename on line " << line_num << ": '" << s << "' (filenames can consist of letters, numbers, '-' and '_').\n"; exit(1); } } if (*q != '\0') q++; } barcodes.push_back(BarcodePair(p, r, s)); if (p != NULL && strlen(p) > 0) se_bc.insert(string(p)); if (r != NULL && strlen(r) > 0) pe_bc.insert(string(r)); } fh.close(); if (barcodes.size() == 0) { cerr << "Unable to load any barcodes from '" << barcode_file << "'\n"; help(); } // // Make sure barcodes are properly paired up. // int pe_cnt = 0; int se_cnt = 0; for (uint i = 0; i < barcodes.size(); i++) { se_cnt += (barcodes[i].se.length() > 0) ? 1 : 0; pe_cnt += (barcodes[i].pe.length() > 0) ? 1 : 0; } if (pe_cnt > 0 && se_cnt != pe_cnt) { cerr << "Single and paired-end barcodes must be properly paired.\n"; help(); } // // Determine the minimum and maximum barcode lengths for the single-end barcodes. // min_se_len = barcodes[0].se.length(); max_se_len = min_se_len; for (uint i = 1; i < barcodes.size(); i++) { if (barcodes[i].se.length() < min_se_len) min_se_len = barcodes[i].se.length(); else if (barcodes[i].se.length() > max_se_len) max_se_len = barcodes[i].se.length(); } // // Determine the minimum and maximum barcode lengths for the paired-end barcodes. // min_pe_len = barcodes[0].pe.length(); max_pe_len = min_pe_len; for (uint i = 0; i < barcodes.size(); i++) { if (barcodes[i].pe.length() < min_pe_len) min_pe_len = barcodes[i].pe.length(); else if (barcodes[i].pe.length() > max_pe_len) max_pe_len = barcodes[i].pe.length(); } // // If paired barcodes were supplied check that a paired barcode type was // specified and vice versa. // if (se_bc.size() > 0 && pe_bc.size() > 0) { if (barcode_type != inline_inline && barcode_type != index_index && barcode_type != inline_index && barcode_type != index_inline) { cerr << "You provided paried barcodes but did not specify a paired barcode type.\n"; help(); } } else { if (barcode_type != inline_null && barcode_type != index_null) { cerr << "You provided single-end barcodes but did not specify a single-end barcode type.\n"; help(); } } cerr << "Loaded " << barcodes.size() << " barcodes "; if (pe_bc.size() > 0) { if (min_se_len != max_se_len) cerr << "(" << min_se_len << "-" << max_se_len << "bp / "; else cerr << "(" << max_se_len << "bp / "; if (min_pe_len != max_pe_len) cerr << min_pe_len << "-" << max_pe_len << "bp).\n"; else cerr << max_pe_len << "bp).\n"; } else { if (min_se_len != max_se_len) cerr << "(" << min_se_len << "-" << max_se_len << "bp).\n"; else cerr << "(" << max_se_len << "bp).\n"; } return 0; } int build_file_list(vector > &files) { // // Scan a directory for a list of files. // if (in_path_1.length() > 0) { string file, paired_file; const char *p, *q, *end; struct dirent *direntry; DIR *dir = opendir(in_path_1.c_str()); if (dir == NULL) { cerr << "Unable to open directory '" << in_path_1 << "' for reading.\n"; exit(1); } while ((direntry = readdir(dir)) != NULL) { file = direntry->d_name; if (file.substr(0, 1) == ".") continue; // // Check the file suffix to make sure we should process it. // p = file.c_str(); q = p + file.length() + 1; end = q; while (q >= p && *q != '.') q--; if (strcmp(q, ".gz") == 0) { end = q; while (q >= p && *q != '.') q--; } if (strncmp(q, ".fq", end - q) != 0 && strncmp(q, ".fa", end - q) != 0 && strncmp(q, ".fastq", end - q) != 0 && strncmp(q, ".fasta", end - q) != 0 && strncmp(q, ".bam", end - q) != 0) continue; // // If paired-end specified, parse file names to sort out which is which. // if (paired && interleaved == false) { int res; if ((res = parse_illumina_v1(file.c_str())) > 0 || (res = parse_illumina_v2(file.c_str())) > 0) { paired_file = file; paired_file.replace(res, 1, "2"); files.push_back(make_pair(file, paired_file)); } } else { files.push_back(make_pair(file, "")); } } if (files.size() == 0) { cerr << "Unable to locate any input files to process within '" << in_path_1 << "'\n"; } } else { // // Files specified directly: // Break off file path and store path and file name. // if (paired && interleaved == false) { int pos_1 = in_file_p1.find_last_of("/"); in_path_1 = in_file_p1.substr(0, pos_1 + 1); int pos_2 = in_file_p2.find_last_of("/"); in_path_2 = in_file_p2.substr(0, pos_2 + 1); files.push_back(make_pair(in_file_p1.substr(pos_1+1), in_file_p2.substr(pos_2+1))); } else if (paired && interleaved == true) { int pos = in_file.find_last_of("/"); in_path_1 = in_file.substr(0, pos + 1); in_path_2 = in_path_1; files.push_back(make_pair(in_file.substr(pos+1), "")); } else { int pos = in_file.find_last_of("/"); in_path_1 = in_file.substr(0, pos + 1); files.push_back(make_pair(in_file.substr(pos+1), "")); } } cerr << "Found " << files.size(); if (paired && interleaved) cerr << " interleaved, paired input file(s).\n"; else if (paired) cerr << " paired input file(s).\n"; else cerr << " input file(s).\n"; return 0; } stacks-1.35/src/file_io.h000644 000765 000024 00000005300 12533677757 016074 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2012, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __FILE_IO_H__ #define __FILE_IO_H__ #include #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include using std::string; #include using std::map; #include using std::vector; #include using std::pair; #include #include using std::istream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include #include #include "constants.h" #include "clean.h" #include "input.h" // // Command line options defined in process_radtags and process_shortreads. // extern FileT in_file_type; extern FileT out_file_type; extern barcodet barcode_type; extern bool paired; extern bool interleaved; extern bool merge; extern string out_path; extern string in_file; extern string in_file_p1; extern string in_file_p2; extern string in_path_1; extern string in_path_2; // // Defined externally in process_radtags and process_shortreads. // void help( void ); int build_file_list(vector > &); int load_barcodes(string, vector &, set &, set &, uint &, uint &, uint &, uint &); int open_files(vector > &, vector &, map &, map &, map &, map &, map > &); int open_files(vector > &, vector &, map &, map &, map &, map &, map > &); int close_file_handles(map &); int close_file_handles(map &); #endif // __FILE_IO_H__ stacks-1.35/src/genotype_dictionaries.h000644 000765 000024 00000040070 12335173442 021036 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __GENOTYPE_DICTIONARIES_H__ #define __GENOTYPE_DICTIONARIES_H__ enum map_types {unk, none, gen, dh, cp, bc1, f2}; enum out_types {rqtl, joinmap, onemap, genomic}; void initialize_dictionaries(map > &global_dictionary) { global_dictionary["ab/--"]["a"] = "aa"; global_dictionary["ab/--"]["b"] = "bb"; global_dictionary["--/ab"]["a"] = "aa"; global_dictionary["--/ab"]["b"] = "bb"; global_dictionary["aa/bb"]["a"] = "aa"; global_dictionary["aa/bb"]["ab"] = "ab"; global_dictionary["aa/bb"]["b"] = "bb"; global_dictionary["ab/ac"]["a"] = "aa"; global_dictionary["ab/ac"]["ab"] = "ab"; global_dictionary["ab/ac"]["b"] = "bb"; global_dictionary["ab/ac"]["ac"] = "ac"; global_dictionary["ab/ac"]["c"] = "cc"; global_dictionary["ab/ac"]["bc"] = "bc"; global_dictionary["ab/cd"]["a"] = "aa"; global_dictionary["ab/cd"]["ab"] = "ab"; global_dictionary["ab/cd"]["b"] = "bb"; global_dictionary["ab/cd"]["c"] = "cc"; global_dictionary["ab/cd"]["cd"] = "cd"; global_dictionary["ab/cd"]["d"] = "dd"; global_dictionary["ab/cd"]["ac"] = "ac"; global_dictionary["ab/cd"]["ad"] = "ad"; global_dictionary["ab/cd"]["bc"] = "bc"; global_dictionary["ab/cd"]["bd"] = "bd"; global_dictionary["ab/aa"]["a"] = "aa"; global_dictionary["ab/aa"]["ab"] = "ab"; global_dictionary["ab/aa"]["b"] = "bb"; global_dictionary["aa/ab"]["a"] = "aa"; global_dictionary["aa/ab"]["ab"] = "ab"; global_dictionary["aa/ab"]["b"] = "bb"; global_dictionary["ab/cc"]["a"] = "aa"; global_dictionary["ab/cc"]["ab"] = "ab"; global_dictionary["ab/cc"]["b"] = "bb"; global_dictionary["ab/cc"]["c"] = "cc"; global_dictionary["ab/cc"]["ac"] = "ac"; global_dictionary["ab/cc"]["bc"] = "bc"; global_dictionary["cc/ab"]["a"] = "aa"; global_dictionary["cc/ab"]["ab"] = "ab"; global_dictionary["cc/ab"]["b"] = "bb"; global_dictionary["cc/ab"]["c"] = "cc"; global_dictionary["cc/ab"]["ac"] = "ac"; global_dictionary["cc/ab"]["bc"] = "bc"; global_dictionary["ab/ab"]["a"] = "aa"; global_dictionary["ab/ab"]["b"] = "bb"; global_dictionary["ab/ab"]["ab"] = "ab"; } void load_cp_dictionary(map &types, map > &dictionary) { types["ab/--"] = "ab/--"; types["--/ab"] = "--/ab"; types["ab/aa"] = "ab/aa"; types["aa/ab"] = "aa/ab"; types["ab/a-"] = "ab/--"; types["-a/ab"] = "--/ab"; types["ab/c-"] = "ab/cd"; types["-c/ab"] = "ab/cd"; types["ab/cc"] = "ab/--"; types["cc/ab"] = "--/ab"; types["ab/ab"] = "ab/ab"; types["ab/ac"] = "ab/ac"; types["ab/cd"] = "ab/cd"; types["-a/bb"] = "ab/--"; types["aa/b-"] = "--/ab"; dictionary["ab/--"]["--"] = "--"; dictionary["ab/--"]["aa"] = "aa"; dictionary["ab/--"]["ab"] = "ab"; dictionary["ab/--"]["bb"] = "ab"; dictionary["ab/--"]["ac"] = "aa"; dictionary["ab/--"]["bc"] = "ab"; dictionary["--/ab"]["--"] = "--"; dictionary["--/ab"]["aa"] = "aa"; dictionary["--/ab"]["ab"] = "ab"; dictionary["--/ab"]["bb"] = "ab"; dictionary["--/ab"]["ac"] = "aa"; dictionary["--/ab"]["bc"] = "ab"; dictionary["ab/aa"]["--"] = "--"; dictionary["ab/aa"]["aa"] = "aa"; dictionary["ab/aa"]["ab"] = "ab"; dictionary["aa/ab"]["--"] = "--"; dictionary["aa/ab"]["aa"] = "aa"; dictionary["aa/ab"]["ab"] = "ab"; dictionary["ab/ab"]["--"] = "--"; dictionary["ab/ab"]["ab"] = "ab"; dictionary["ab/ab"]["aa"] = "aa"; dictionary["ab/ab"]["bb"] = "bb"; dictionary["ab/ac"]["--"] = "--"; dictionary["ab/ac"]["ab"] = "ab"; dictionary["ab/ac"]["ac"] = "ac"; dictionary["ab/ac"]["bc"] = "bc"; dictionary["ab/ac"]["aa"] = "aa"; dictionary["ab/cd"]["--"] = "--"; dictionary["ab/cd"]["ac"] = "ac"; dictionary["ab/cd"]["ad"] = "ad"; dictionary["ab/cd"]["bc"] = "bc"; dictionary["ab/cd"]["bd"] = "bd"; return; } void load_joinmap_cp_dictionary(map &types, map > &dictionary) { types["ab/--"] = "lmx--"; types["--/ab"] = "--xnp"; types["ab/aa"] = "lmxll"; types["aa/ab"] = "nnxnp"; types["ab/ab"] = "hkxhk"; types["ab/ac"] = "efxeg"; types["ab/cd"] = "abxcd"; dictionary["lmx--"]["--"] = "--"; dictionary["lmx--"]["aa"] = "ll"; dictionary["lmx--"]["ab"] = "lm"; dictionary["lmx--"]["bb"] = "lm"; dictionary["lmx--"]["ac"] = "ll"; dictionary["lmx--"]["bc"] = "lm"; dictionary["--xnp"]["--"] = "--"; dictionary["--xnp"]["aa"] = "nn"; dictionary["--xnp"]["ab"] = "np"; dictionary["--xnp"]["bb"] = "np"; dictionary["--xnp"]["ac"] = "nn"; dictionary["--xnp"]["bc"] = "np"; dictionary["lmxll"]["--"] = "--"; dictionary["lmxll"]["aa"] = "ll"; dictionary["lmxll"]["ab"] = "lm"; dictionary["nnxnp"]["--"] = "--"; dictionary["nnxnp"]["aa"] = "nn"; dictionary["nnxnp"]["ab"] = "np"; dictionary["hkxhk"]["--"] = "--"; dictionary["hkxhk"]["ab"] = "hk"; dictionary["hkxhk"]["aa"] = "hh"; dictionary["hkxhk"]["bb"] = "kk"; dictionary["efxeg"]["--"] = "--"; dictionary["efxeg"]["ab"] = "ef"; dictionary["efxeg"]["ac"] = "eg"; dictionary["efxeg"]["bc"] = "fg"; dictionary["efxeg"]["aa"] = "ee"; dictionary["abxcd"]["--"] = "--"; dictionary["abxcd"]["ac"] = "ac"; dictionary["abxcd"]["ad"] = "ad"; dictionary["abxcd"]["bc"] = "bc"; dictionary["abxcd"]["bd"] = "bd"; return; } void load_onemap_cp_dictionary(map &types, map > &dictionary) { types["ab/--"] = "abxoo"; types["--/ab"] = "ooxab"; types["ab/aa"] = "abxaa"; types["aa/ab"] = "aaxab"; types["ab/ab"] = "abxab"; types["ab/ac"] = "abxac"; types["ab/cd"] = "abxcd"; // D1.11 dictionary["abxoo"]["--"] = "-"; dictionary["abxoo"]["aa"] = "a"; dictionary["abxoo"]["bb"] = "b"; // D2.16 dictionary["ooxab"]["--"] = "-"; dictionary["ooxab"]["aa"] = "a"; dictionary["ooxab"]["bb"] = "b"; // D1.10 dictionary["abxaa"]["--"] = "-"; dictionary["abxaa"]["aa"] = "a"; dictionary["abxaa"]["ab"] = "ab"; // D2.15 dictionary["aaxab"]["--"] = "-"; dictionary["aaxab"]["aa"] = "a"; dictionary["aaxab"]["ab"] = "ab"; // B3.7 dictionary["abxab"]["--"] = "-"; dictionary["abxab"]["ab"] = "ab"; dictionary["abxab"]["aa"] = "a"; dictionary["abxab"]["bb"] = "b"; // A.2 dictionary["abxac"]["--"] = "-"; dictionary["abxac"]["ab"] = "ba"; dictionary["abxac"]["ac"] = "ac"; dictionary["abxac"]["bc"] = "bc"; dictionary["abxac"]["aa"] = "a"; // A.1 dictionary["abxcd"]["--"] = "-"; dictionary["abxcd"]["ac"] = "ac"; dictionary["abxcd"]["ad"] = "ad"; dictionary["abxcd"]["bc"] = "bc"; dictionary["abxcd"]["bd"] = "bd"; return; } void load_bc_dictionary(map &types, map > &dictionary) { types["aa/bb"] = "aa/bb"; types["bb/aa"] = "bb/aa"; types["ab/cc"] = "ab/cc"; types["cc/ab"] = "cc/ab"; dictionary["aa/bb"]["--"] = "--"; dictionary["aa/bb"]["aa"] = "aa"; dictionary["aa/bb"]["ab"] = "ab"; dictionary["aa/bb"]["bb"] = "bb"; dictionary["bb/aa"]["--"] = "--"; dictionary["bb/aa"]["aa"] = "aa"; dictionary["bb/aa"]["ab"] = "ab"; dictionary["bb/aa"]["bb"] = "bb"; dictionary["ab/cc"]["--"] = "--"; dictionary["ab/cc"]["ac"] = "ac"; dictionary["ab/cc"]["bc"] = "bc"; dictionary["ab/cc"]["ab"] = "ab"; dictionary["ab/cc"]["aa"] = "aa"; dictionary["ab/cc"]["bb"] = "bb"; dictionary["cc/ab"]["--"] = "--"; dictionary["cc/ab"]["ac"] = "ac"; dictionary["cc/ab"]["bc"] = "bc"; dictionary["cc/ab"]["ab"] = "ab"; dictionary["cc/ab"]["aa"] = "aa"; dictionary["cc/ab"]["bb"] = "bb"; } void load_f2_dictionary(map &types, map > &dictionary) { types["aa/bb"] = "aa/bb"; types["ab/cd"] = "ab/cd"; types["ab/aa"] = "ab/aa"; types["aa/ab"] = "aa/ab"; types["ab/cc"] = "ab/cc"; types["cc/ab"] = "cc/ab"; dictionary["aa/bb"]["aa"] = "aa"; dictionary["aa/bb"]["ab"] = "ab"; dictionary["aa/bb"]["bb"] = "bb"; dictionary["aa/bb"]["--"] = "--"; dictionary["ab/cd"]["aa"] = "aa"; dictionary["ab/cd"]["ab"] = "ab"; dictionary["ab/cd"]["bb"] = "bb"; dictionary["ab/cd"]["cc"] = "cc"; dictionary["ab/cd"]["cd"] = "cd"; dictionary["ab/cd"]["dd"] = "dd"; dictionary["ab/cd"]["ac"] = "ac"; dictionary["ab/cd"]["ad"] = "ad"; dictionary["ab/cd"]["bc"] = "bc"; dictionary["ab/cd"]["bd"] = "bd"; dictionary["ab/cd"]["--"] = "--"; dictionary["ab/aa"]["aa"] = "--"; dictionary["ab/aa"]["ab"] = "--"; dictionary["ab/aa"]["bb"] = "bb"; dictionary["ab/aa"]["--"] = "--"; dictionary["aa/ab"]["aa"] = "--"; dictionary["aa/ab"]["ab"] = "--"; dictionary["aa/ab"]["bb"] = "bb"; dictionary["aa/ab"]["--"] = "--"; dictionary["ab/cc"]["aa"] = "aa"; dictionary["ab/cc"]["ab"] = "ab"; dictionary["ab/cc"]["bb"] = "bb"; dictionary["ab/cc"]["cc"] = "cc"; dictionary["ab/cc"]["ac"] = "--"; dictionary["ab/cc"]["bc"] = "--"; dictionary["ab/cc"]["--"] = "--"; dictionary["cc/ab"]["aa"] = "aa"; dictionary["cc/ab"]["ab"] = "ab"; dictionary["cc/ab"]["bb"] = "bb"; dictionary["cc/ab"]["cc"] = "cc"; dictionary["cc/ab"]["ac"] = "--"; dictionary["cc/ab"]["bc"] = "--"; dictionary["cc/ab"]["--"] = "--"; } void load_mm_bc_dictionary(map &types, map > &dictionary) { types["aa/bb"] = "aaxbb"; types["bb/aa"] = "bbxaa"; types["ab/cc"] = "abxcc"; types["cc/ab"] = "ccxab"; dictionary["aaxbb"]["--"] = "-"; dictionary["aaxbb"]["aa"] = "b"; dictionary["aaxbb"]["ab"] = "h"; dictionary["aaxbb"]["bb"] = "h"; dictionary["bbxaa"]["--"] = "-"; dictionary["bbxaa"]["aa"] = "h"; dictionary["bbxaa"]["ab"] = "h"; dictionary["bbxaa"]["bb"] = "a"; dictionary["abxcc"]["--"] = "-"; dictionary["abxcc"]["ac"] = "h"; dictionary["abxcc"]["bc"] = "h"; dictionary["abxcc"]["ab"] = "b"; dictionary["abxcc"]["aa"] = "b"; dictionary["abxcc"]["bb"] = "b"; dictionary["ccxab"]["--"] = "-"; dictionary["ccxab"]["ac"] = "h"; dictionary["ccxab"]["bc"] = "h"; dictionary["ccxab"]["ab"] = "a"; dictionary["ccxab"]["aa"] = "a"; dictionary["ccxab"]["bb"] = "a"; } void load_mm_f2_dictionary(map &types, map > &dictionary) { types["aa/bb"] = "aaxbb"; types["ab/cd"] = "abxcd"; types["ab/aa"] = "abxaa"; types["aa/ab"] = "aaxab"; types["ab/cc"] = "abxcc"; types["cc/ab"] = "ccxab"; dictionary["aaxbb"]["aa"] = "a"; dictionary["aaxbb"]["ab"] = "h"; dictionary["aaxbb"]["bb"] = "b"; dictionary["aaxbb"]["--"] = "-"; dictionary["abxcd"]["aa"] = "a"; dictionary["abxcd"]["ab"] = "a"; dictionary["abxcd"]["bb"] = "a"; dictionary["abxcd"]["cc"] = "b"; dictionary["abxcd"]["cd"] = "b"; dictionary["abxcd"]["dd"] = "b"; dictionary["abxcd"]["ac"] = "h"; dictionary["abxcd"]["ad"] = "h"; dictionary["abxcd"]["bc"] = "h"; dictionary["abxcd"]["bd"] = "h"; dictionary["abxcd"]["--"] = "-"; dictionary["abxaa"]["aa"] = "-"; dictionary["abxaa"]["ab"] = "-"; dictionary["abxaa"]["bb"] = "a"; dictionary["abxaa"]["--"] = "-"; dictionary["aaxab"]["aa"] = "-"; dictionary["aaxab"]["ab"] = "-"; dictionary["aaxab"]["bb"] = "b"; dictionary["aaxab"]["--"] = "-"; dictionary["abxcc"]["aa"] = "a"; dictionary["abxcc"]["ab"] = "a"; dictionary["abxcc"]["bb"] = "a"; dictionary["abxcc"]["cc"] = "b"; dictionary["abxcc"]["ac"] = "-"; dictionary["abxcc"]["bc"] = "-"; dictionary["abxcc"]["--"] = "-"; dictionary["ccxab"]["aa"] = "b"; dictionary["ccxab"]["ab"] = "b"; dictionary["ccxab"]["bb"] = "b"; dictionary["ccxab"]["cc"] = "a"; dictionary["ccxab"]["ac"] = "-"; dictionary["ccxab"]["bc"] = "-"; dictionary["ccxab"]["--"] = "-"; } void load_dh_dictionary(map &types, map > &dictionary) { types["ab/--"] = "ab/--"; types["--/ab"] = "--/ab"; dictionary["ab/--"]["aa"] = "aa"; dictionary["ab/--"]["bb"] = "bb"; dictionary["ab/--"]["--"] = "--"; dictionary["--/ab"]["aa"] = "aa"; dictionary["--/ab"]["bb"] = "bb"; dictionary["--/ab"]["--"] = "--"; } void load_mm_dh_dictionary(map &types, map > &dictionary) { types["ab/--"] = "abx--"; types["--/ab"] = "--xab"; dictionary["abx--"]["aa"] = "a"; dictionary["abx--"]["bb"] = "b"; dictionary["abx--"]["--"] = "-"; dictionary["--xab"]["aa"] = "a"; dictionary["--xab"]["bb"] = "b"; dictionary["--xab"]["--"] = "-"; } void load_segregation_ratios(map_types type, map > &segregation_ratios) { switch(type) { case cp: segregation_ratios["ab/--"]["aa"] = 0.50; segregation_ratios["ab/--"]["ab"] = 0.50; segregation_ratios["--/ab"]["aa"] = 0.50; segregation_ratios["--/ab"]["ab"] = 0.50; segregation_ratios["ab/aa"]["aa"] = 0.50; segregation_ratios["ab/aa"]["ab"] = 0.50; segregation_ratios["aa/ab"]["aa"] = 0.50; segregation_ratios["aa/ab"]["ab"] = 0.50; segregation_ratios["ab/ab"]["ab"] = 0.50; segregation_ratios["ab/ab"]["aa"] = 0.25; segregation_ratios["ab/ab"]["bb"] = 0.25; segregation_ratios["ab/ac"]["ab"] = 0.25; segregation_ratios["ab/ac"]["ac"] = 0.25; segregation_ratios["ab/ac"]["bc"] = 0.25; segregation_ratios["ab/ac"]["aa"] = 0.25; segregation_ratios["ab/cd"]["ac"] = 0.25; segregation_ratios["ab/cd"]["ad"] = 0.25; segregation_ratios["ab/cd"]["bc"] = 0.25; segregation_ratios["ab/cd"]["bd"] = 0.25; break; case f2: segregation_ratios["aaxbb"]["a"] = 0.25; segregation_ratios["aaxbb"]["b"] = 0.25; segregation_ratios["aaxbb"]["h"] = 0.50; segregation_ratios["abxcd"]["a"] = 0.25; segregation_ratios["abxcd"]["b"] = 0.25; segregation_ratios["abxcd"]["h"] = 0.50; segregation_ratios["abxaa"]["a"] = 1.00; segregation_ratios["aaxab"]["b"] = 1.00; segregation_ratios["abxcc"]["a"] = 0.50; segregation_ratios["abxcc"]["b"] = 0.50; segregation_ratios["ccxab"]["b"] = 0.50; segregation_ratios["ccxab"]["a"] = 0.50; break; case bc1: segregation_ratios["aaxbb"]["h"] = 0.50; segregation_ratios["aaxbb"]["b"] = 0.50; segregation_ratios["bbxaa"]["h"] = 0.50; segregation_ratios["bbxaa"]["a"] = 0.50; segregation_ratios["abxcc"]["h"] = 0.50; segregation_ratios["abxcc"]["b"] = 0.50; segregation_ratios["ccxab"]["h"] = 0.50; segregation_ratios["ccxab"]["a"] = 0.50; break; case dh: segregation_ratios["ab/--"]["a"] = 0.50; segregation_ratios["ab/--"]["b"] = 0.50; segregation_ratios["--/ab"]["a"] = 0.50; segregation_ratios["--/ab"]["b"] = 0.50; break; case gen: case none: case unk: break; } return; } inline int encode_gtype(char a) { switch (a) { case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; } return -1; } int encoded_gtypes[4][4] = { // A C G T {1, 2, 3, 4}, // A {2, 5, 6, 7}, // C {3, 6, 8, 9}, // G {4, 7, 9, 10} // T }; #endif // __GENOTYPE_DICTIONARIES_H__ stacks-1.35/src/genotypes.cc000644 000765 000024 00000233372 12441417455 016636 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // genotypes -- genotype a mapping cross. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #include "genotypes.h" // Global variables to hold command-line options. int num_threads = 1; int batch_id = -1; map_types map_type = gen; out_types out_type = joinmap; string in_path; string out_path; string cor_path; string out_file; string bl_file; string wl_file; string enz; int progeny_limit = 1; bool man_corrections = false; bool corrections = false; bool expand_id = false; bool sql_out = false; bool filter_lnl = false; double lnl_limit = 0.0; double chisq_pval_limit = 0.05; int min_stack_depth = 0; int min_hom_seqs = 5; double min_het_seqs = 0.05; double max_het_seqs = 0.1; set whitelist, blacklist; // // Hold information about restriction enzymes // map renz; map renz_cnt; map renz_len; // // Dictionaries to hold legal genotypes for different map types. // map > global_dictionary; int main (int argc, char* argv[]) { initialize_renz(renz, renz_cnt, renz_len); parse_command_line(argc, argv); // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif initialize_dictionaries(global_dictionary); vector files; build_file_list(files); if (wl_file.length() > 0) { load_marker_list(wl_file, whitelist); cerr << "Loaded " << whitelist.size() << " whitelisted markers.\n"; } if (bl_file.length() > 0) { load_marker_list(bl_file, blacklist); cerr << "Loaded " << blacklist.size() << " blacklisted markers.\n"; } // // Load the catalog // stringstream catalog_file; map catalog; bool compressed = false; int res; catalog_file << in_path << "batch_" << batch_id << ".catalog"; if ((res = load_loci(catalog_file.str(), catalog, false, false, compressed)) == 0) { cerr << "Unable to load the catalog '" << catalog_file.str() << "'\n"; return 0; } // // Implement the black/white list // reduce_catalog(catalog, whitelist, blacklist); // // Load matches to the catalog // vector > catalog_matches; map samples; vector sample_ids; for (uint i = 0; i < files.size(); i++) { vector m; load_catalog_matches(in_path + files[i], m); if (m.size() == 0) { cerr << "Warning: unable to find any matches in file '" << files[i] << "', excluding this sample from genotypes analysis.\n"; continue; } catalog_matches.push_back(m); samples[m[0]->sample_id] = files[i]; sample_ids.push_back(m[0]->sample_id); } sort(sample_ids.begin(), sample_ids.end()); set parent_ids; identify_parental_ids(catalog, sample_ids, parent_ids); // // Create the population map // cerr << "Populating observed haplotypes for " << sample_ids.size() << " samples, " << catalog.size() << " loci.\n"; PopMap *pmap = new PopMap(sample_ids.size(), catalog.size()); pmap->populate(sample_ids, catalog, catalog_matches); apply_locus_constraints(catalog, pmap); // // Identify mappable markers in the parents // find_markers(catalog, pmap, parent_ids); // // Calculate F, inbreeding coefficient // calculate_f(catalog, pmap, parent_ids); // // Create genotypes maps, calculate mean log likelihood for each locus. // map::iterator it; Datum **d; CSLocus *loc; double mean, cnt; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->locus(loc->id); if (loc->marker.length() > 0) { create_genotype_map(loc, pmap, parent_ids); call_population_genotypes(loc, pmap, global_dictionary); } mean = 0.0; cnt = 0.0; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; mean += d[i]->lnl; cnt++; } loc->lnl = mean / cnt; } // // Make automated corrections // if (corrections) automated_corrections(samples, parent_ids, catalog, catalog_matches, pmap); // // Check markers for potentially missing alleles. // switch(map_type) { case cp: correct_cp_markers_missing_alleles(parent_ids, catalog, pmap); break; case dh: case bc1: case f2: case gen: case none: case unk: break; } // // Reassign genotypes according to specific map type, record any // marker corrections made by detecting missing alleles. // if (map_type != gen) map_specific_genotypes(catalog, pmap, parent_ids); // // Incorporate manual corrections exported from a Stacks SQL database. // if (man_corrections) manual_corrections(cor_path, pmap); // // Calculate segregation distortion using chi-squared test. // map > seg_ratios; if (map_type != gen) { load_segregation_ratios(map_type, seg_ratios); calc_segregation_distortion(seg_ratios, catalog, pmap, parent_ids); } // // If a mapping type was specified, output it. // switch(map_type) { case dh: export_dh_map(catalog, pmap, parent_ids, samples); break; case cp: export_cp_map(catalog, pmap, parent_ids, samples); break; case bc1: export_bc1_map(catalog, pmap, parent_ids, samples); break; case f2: export_f2_map(catalog, pmap, parent_ids, samples); break; case gen: export_gen_map(catalog, pmap, parent_ids, samples); break; case none: case unk: break; } if (sql_out) write_sql(catalog, pmap, parent_ids); // // Output the observed haplotypes. // write_generic(catalog, pmap, samples, parent_ids, false); if (out_type == genomic) write_genomic(catalog, pmap); return 0; } int apply_locus_constraints(map &catalog, PopMap *pmap) { CSLocus *loc; Datum **d; uint below_stack_dep = 0; uint below_lnl_thresh = 0; if (min_stack_depth == 0) return 0; map::iterator it; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { // // Check that each sample is over the minimum stack depth for this locus. // if (d[i] != NULL && d[i]->tot_depth < min_stack_depth) { below_stack_dep++; delete d[i]; d[i] = NULL; } // // Check that each sample is over the log likelihood threshold. // if (d[i] != NULL && filter_lnl && d[i]->lnl < lnl_limit) { below_lnl_thresh++; delete d[i]; d[i] = NULL; } } } if (min_stack_depth > 0) cerr << "Removed " << below_stack_dep << " samples from loci that are below the minimum stack depth of " << min_stack_depth << "x\n"; if (filter_lnl) cerr << "Removed " << below_lnl_thresh << " samples from loci that are below the log likelihood threshold of " << lnl_limit << "\n"; return 0; } int identify_parental_ids(map &catalog, vector &sample_ids, set &parents) { set catalog_parents; map::iterator it; CSLocus *loc; int sample_id; // // We assume the catalog was constructed from one or more parents of one // or more crosses. These are listed in the catalog.tags.tsv file, column 8. // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; for (uint i = 0; i < loc->comp.size(); i++) { sample_id = (int) strtol(loc->comp[i], NULL, 10); catalog_parents.insert(sample_id); } } // // Now we want to iterate over those individuals genotypes found when // searching the Stacks directory and crosscheck those found in the catalog. // for (uint i = 0; i < sample_ids.size(); i++) { if (catalog_parents.count(sample_ids[i]) > 0) parents.insert(sample_ids[i]); } set::iterator sit; cerr << "Identified parent IDs: "; for (sit = parents.begin(); sit != parents.end(); sit++) cerr << *sit << " "; cerr << "\n"; return 0; } int find_markers(map &catalog, PopMap *pmap, set &parent_ids) { map::iterator it; vector::iterator hit; set::iterator p, q; int pid_1, pid_2, parent_count, allele_cnt_1, allele_cnt_2; Datum *d_1, *d_2; CSLocus *loc; if (parent_ids.size() > 2) return 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; // // Count the number of parental tags matching this catalog tag. A proper marker should // contain a single representative from each parent; multiple alleles must be called from // a single tag from a single parent. // if (parent_ids.size() == 1) { p = parent_ids.begin(); pid_1 = *p; pid_2 = -1; d_1 = pmap->blacklisted(loc->id, pid_1) ? NULL : pmap->datum(loc->id, pid_1); d_2 = NULL; } else { p = parent_ids.begin(); q = p++; pid_1 = *p < *q ? *p : *q; pid_2 = *p < *q ? *q : *p; if (pmap->blacklisted(loc->id, pid_1) || pmap->blacklisted(loc->id, pid_2)) { d_1 = NULL; d_2 = NULL; } else { d_1 = pmap->datum(loc->id, pid_1); d_2 = pmap->datum(loc->id, pid_2); } } parent_count = 0; if (d_1 != NULL) parent_count++; if (d_2 != NULL) parent_count++; // // Locus is present in both parents. // if (parent_count == 2) { allele_cnt_1 = d_1->obshap.size(); allele_cnt_2 = d_2->obshap.size(); // // Determine the number of unique alleles // set unique_alleles; for (hit = d_1->obshap.begin(); hit != d_1->obshap.end(); hit++) unique_alleles.insert(*hit); for (hit = d_2->obshap.begin(); hit != d_2->obshap.end(); hit++) unique_alleles.insert(*hit); int num_unique_alleles = unique_alleles.size(); // // Locus is heterozygous in both parents. However, the number of alleles present distinguishes // what type of marker it is. Four unique alleles requries an ab/cd marker, while four // alleles that are the same in both parents requires an ab/ab marker. Finally, three unique // alleles requires either an ab/ac marker. // if (allele_cnt_1 == 2 && allele_cnt_2 == 2) { if (num_unique_alleles == 3) loc->marker = "ab/ac"; else if (num_unique_alleles == 2) loc->marker = "ab/ab"; else loc->marker = "ab/cd"; // // Locus is homozygous in one parent and heterozygous in the other. // } else if (allele_cnt_1 == 2 && allele_cnt_2 == 1) { if (num_unique_alleles == 3) loc->marker = "ab/cc"; else if (num_unique_alleles == 2) loc->marker = "ab/aa"; // // Locus is homozygous in one parent and heterozygous in the other. // } else if (allele_cnt_1 == 1 && allele_cnt_2 == 2) { if (num_unique_alleles == 3) loc->marker = "cc/ab"; else if (num_unique_alleles == 2) loc->marker = "aa/ab"; // // Locus is homozygous in both parents, but heterozygous between parents. // } else if (allele_cnt_1 == 1 && allele_cnt_2 == 1) { if (strcmp(d_1->obshap[0], d_2->obshap[0]) != 0) loc->marker = "aa/bb"; } // // Locus only exists in one parent. // } else if (parent_count == 1) { if (d_1 != NULL && d_1->obshap.size() == 2) loc->marker = "ab/--"; else if (d_2 != NULL && d_2->obshap.size() == 2) loc->marker = "--/ab"; } } return 0; } int calculate_f(map &catalog, PopMap *pmap, set &parent_ids) { map::iterator it; map::iterator j; Datum **d; CSLocus *loc; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->locus(loc->id); if (loc->snps.size() == 0) continue; double tot = 0.0; double hets = 0; double p, q, h, h0; map alle; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; tot++; if (d[i]->obshap.size() > 1) hets++; // // We are measuring the first SNP in the haplotype // for (uint j = 0; j < d[i]->obshap.size(); j++) alle[d[i]->obshap[j][0]]++; } if (alle.size() > 2 || tot == 0) continue; j = alle.begin(); p = j->second; j++; q = j->second; h = hets / tot; // Observed frequency of heterozygotes in the population h0 = 2 * (p/tot) * (q/tot); // 2PQ, expected frequency of hets under Hardy-Weinberg if (h0 > 0) loc->f = (h0 - h) / h0; //cerr << "P: " << p << "; Q: " << q << "; Hets: " << hets << "; total: " << tot << "; f: " << loc->f << "\n"; } return 0; } int create_genotype_map(CSLocus *locus, PopMap *pmap, set &parent_ids) { // // Create a genotype map. For any set of alleles, this routine will // assign each allele to one of the constituent genotypes, e.g. given the // marker type 'aaxbb' and the alleles 'A' from the male, and 'G' // from the female, will assign 'G' == 'bb' and 'A'== 'aa'. It assumes that // recombination may have occurred as with an F2, F3 or later cross. // //cerr << "Creating genotype map for catalog ID " << locus->id << ", marker: " << locus->marker << ".\n"; // // Get the parent IDs ordered // set::iterator p = parent_ids.begin(); set::iterator q = p++; int pid_1 = *p < *q ? *p : *q; int pid_2 = *p < *q ? *q : *p; set p1_gtypes, p2_gtypes; set::iterator i; map legal_gtypes, com_gtypes; // // First, identify any alleles that are common between the two parents. // p1_gtypes.insert(locus->marker[0]); p1_gtypes.insert(locus->marker[1]); p2_gtypes.insert(locus->marker[3]); p2_gtypes.insert(locus->marker[4]); for (i = p1_gtypes.begin(); i != p1_gtypes.end(); i++) if (*i != '-') legal_gtypes[*i]++; for (i = p2_gtypes.begin(); i != p2_gtypes.end(); i++) if (*i != '-') legal_gtypes[*i]++; // // Find the common genotypes // vector types; map::iterator j; for (j = legal_gtypes.begin(); j != legal_gtypes.end(); j++) if (j->second > 1) types.push_back(j->first); sort(types.begin(), types.end()); Datum *d_1, *d_2; map haplotypes; map::iterator k; vector > sorted_haplotypes; d_1 = pmap->datum(locus->id, pid_1); d_2 = pmap->datum(locus->id, pid_2); if (d_1 != NULL) { for (uint n = 0; n < d_1->obshap.size(); n++) haplotypes[d_1->obshap[n]]++; } if (d_2 != NULL) { for (uint n = 0; n < d_2->obshap.size(); n++) haplotypes[d_2->obshap[n]]++; } // // Sort the haplotypes map by value // for (k = haplotypes.begin(); k != haplotypes.end(); k++) sorted_haplotypes.push_back(*k); sort(sorted_haplotypes.begin(), sorted_haplotypes.end(), hap_compare); for (uint n = 0, index = 0; n < sorted_haplotypes.size() && index < types.size(); n++, index++) { if (sorted_haplotypes[n].second > 1) { locus->gmap[sorted_haplotypes[n].first] = types[index]; com_gtypes[types[index]]++; // cerr << " Assigning common allele " << sorted_haplotypes[n].first << " to genotype '" << locus->gmap[sorted_haplotypes[n].first] << "'\n"; } } // // Now, examine the remaining first parent alleles. // if (d_1 != NULL) { legal_gtypes.clear(); for (i = p1_gtypes.begin(); i != p1_gtypes.end(); i++) if (*i != '-' && com_gtypes.count(*i) == 0) { // cerr << " Adding " << *i << " to first parent genotypes\n"; legal_gtypes[*i]++; } types.clear(); for (j = legal_gtypes.begin(); j != legal_gtypes.end(); j++) types.push_back(j->first); sort(types.begin(), types.end()); for (uint n = 0, index = 0; n < d_1->obshap.size() && index < types.size(); n++, index++) { if (locus->gmap.count(d_1->obshap[n])) { index--; continue; } locus->gmap[d_1->obshap[n]] = types[index]; // cerr << " Assinging '" << d_1->obshap[n] << "' to first parent genotype '" << locus->gmap[d_1->obshap[n]] << "'\n"; } } // // Finally, repeat in the second parent. // if (d_2 != NULL) { legal_gtypes.clear(); for (i = p2_gtypes.begin(); i != p2_gtypes.end(); i++) if (*i != '-' && com_gtypes.count(*i) == 0) { // cerr << " Adding " << *i << " to second genotypes\n"; legal_gtypes[*i]++; } types.clear(); for (j = legal_gtypes.begin(); j != legal_gtypes.end(); j++) types.push_back(j->first); sort(types.begin(), types.end()); for (uint n = 0, index = 0; n < d_2->obshap.size() && index < types.size(); n++, index++) { if (locus->gmap.count(d_2->obshap[n])) { index--; continue; } locus->gmap[d_2->obshap[n]] = types[index]; // cerr << " Assinging '" << d_2->obshap[n] << "' to second parent genotype '" << locus->gmap[d_2->obshap[n]] << "'\n"; } } return 0; } int call_population_genotypes(CSLocus *locus, PopMap *pmap, map > &dictionary) { // // Fetch the array of observed haplotypes from the population // Datum **d = pmap->locus(locus->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; vector gtypes; string gtype; //cerr << "Sample Id: " << pmap->rev_sample_index(i) << "\n"; for (uint j = 0; j < d[i]->obshap.size(); j++) { // // Impossible allele encountered. // if (locus->gmap.count(d[i]->obshap[j]) == 0) { gtypes.clear(); gtypes.push_back("-"); goto impossible; } gtypes.push_back(locus->gmap[d[i]->obshap[j]]); //cerr << " Observed Haplotype: " << d[i]->obshap[j] << ", Genotype: " << locus->gmap[d[i]->obshap[j]] << "\n"; } impossible: sort(gtypes.begin(), gtypes.end()); for (uint j = 0; j < gtypes.size(); j++) { gtype += gtypes[j]; //cerr << " Adding genotype to string: " << gtypes[j] << "; " << gtype << "\n"; } string m = dictionary[locus->marker].count(gtype) ? dictionary[locus->marker][gtype] : "--"; if (d[i]->gtype != NULL) delete d[i]->gtype; d[i]->gtype = new char[m.length() + 1]; strcpy(d[i]->gtype, m.c_str()); if (m != "--") locus->gcnt++; //cerr << "Assigning datum, marker: " << locus->marker << ", string: " << m << ", haplotype: " << d[i]->obshap[0] << ", gtype: " << gtype << "\n"; } return 0; } int correct_cp_markers_missing_alleles(set &parent_ids, map &catalog, PopMap *pmap) { map::iterator it; CSLocus *loc; Datum **d; map > seg_ratios; // // The following segregation ratios will occur when one of the parents // in the cross is missing an allele. We will not see these ratios // in one of these markers with no missing alleles. // seg_ratios["ab/aa"]["aa"] = 0.50; seg_ratios["ab/aa"]["ab"] = 0.25; seg_ratios["ab/aa"]["bb"] = 0.25; seg_ratios["aa/ab"]["aa"] = 0.50; seg_ratios["aa/ab"]["ab"] = 0.25; seg_ratios["aa/ab"]["bb"] = 0.25; seg_ratios["ab/cc"]["ac"] = 0.25; seg_ratios["ab/cc"]["bc"] = 0.25; seg_ratios["ab/cc"]["aa"] = 0.25; seg_ratios["ab/cc"]["bb"] = 0.25; seg_ratios["cc/ab"]["ac"] = 0.25; seg_ratios["cc/ab"]["bc"] = 0.25; seg_ratios["cc/ab"]["aa"] = 0.25; seg_ratios["cc/ab"]["bb"] = 0.25; cerr << "Testing catalog loci for mapping parents with missing alleles..."; int corrected = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; // // We only want to examine markers where one parent is homozygous. // if (loc->marker != "ab/aa" && loc->marker != "aa/ab" && loc->marker != "ab/cc" && loc->marker != "cc/ab") continue; map cnts; // // Calculate initial segregation distortion. // double n = tally_generic_gtypes(loc->id, pmap, parent_ids, cnts); double chisq_pval = chisq_test(seg_ratios, cnts, loc->marker, n); // // Check if our genotype ratios match the segregation ratios specified above. If so, // we have a dropped allele in one of the parents. // if (n == 0 || chisq_pval < chisq_pval_limit) continue; corrected++; if (loc->marker == "ab/aa") loc->marker = "ab/a-"; else if (loc->marker == "aa/ab") loc->marker = "-a/ab"; else if (loc->marker == "ab/cc") loc->marker = "ab/c-"; else if (loc->marker == "cc/ab") loc->marker = "-c/ab"; d = pmap->locus(loc->id); if (loc->marker == "ab/a-" || loc->marker == "-a/ab") { for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "bb") == 0) strcpy(d[i]->gtype, "ab"); } } else if (loc->marker == "ab/c-") { for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "bb") == 0) strcpy(d[i]->gtype, "bd"); else if (strcmp(d[i]->gtype, "aa") == 0) strcpy(d[i]->gtype, "ad"); } } else if (loc->marker == "-c/ab") { for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "bb") == 0) strcpy(d[i]->gtype, "ad"); else if (strcmp(d[i]->gtype, "aa") == 0) strcpy(d[i]->gtype, "ac"); else if (strcmp(d[i]->gtype, "bc") == 0) strcpy(d[i]->gtype, "bd"); else if (strcmp(d[i]->gtype, "ac") == 0) strcpy(d[i]->gtype, "bc"); } } } // // Now we will deal with aa/bb markers separately, since there can be three possible // missing allele situations: // aa: 50%, ab: 50% - we have an aa/b- marker, which should be mapped as an --/ab // bb: 50%, ab: 50% - we have an -a/bb marker, which should be mapped as an ab/-- // aa: 33%, ab: 33%, bb: 33% - we have an -a/b- maker, which should be mapped as an ab/ab, but // we can't disambiguate the aa bb genotypes so it can't be mapped. // map > seg_ratio_1, seg_ratio_2; seg_ratio_1["aa/bb"]["aa"] = 0.50; seg_ratio_1["aa/bb"]["ab"] = 0.50; seg_ratio_2["aa/bb"]["bb"] = 0.50; seg_ratio_2["aa/bb"]["ab"] = 0.50; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->marker != "aa/bb") continue; map cnts; double n = tally_generic_gtypes(loc->id, pmap, parent_ids, cnts); double chisq_pval = chisq_test(seg_ratio_1, cnts, loc->marker, n); if (n == 0) continue; if (chisq_pval >= chisq_pval_limit) { corrected++; loc->marker = "aa/b-"; d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "ab") == 0) strcpy(d[i]->gtype, "bb"); } } else { chisq_pval = chisq_test(seg_ratio_2, cnts, loc->marker, n); if (chisq_pval >= chisq_pval_limit) { corrected++; loc->marker = "-a/bb"; d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "ab") == 0) strcpy(d[i]->gtype, "aa"); } } } } cerr << "corrected " << corrected << " catalog loci.\n"; return 0; } int automated_corrections(map &samples, set &parent_ids, map &catalog, vector > &matches, PopMap *pmap) { int sample_id, catalog_id, tag_id; Datum *d; Locus *s; string file; cerr << "Performing automated corrections...\n"; for (uint i = 0; i < matches.size(); i++) { sample_id = matches[i][0]->sample_id; file = samples[sample_id]; //if (sample_id != 29) continue; if (parent_ids.count(sample_id)) continue; map stacks; bool compressed = false; int res; if ((res = load_loci(in_path + file, stacks, true, false, compressed)) == 0) { cerr << "Unable to load sample file '" << file << "'\n"; return 0; } set > processed; for (uint j = 0; j < matches[i].size(); j++) { catalog_id = matches[i][j]->cat_id; sample_id = matches[i][j]->sample_id; tag_id = matches[i][j]->tag_id; if (catalog.count(catalog_id) == 0) continue; // // There are multiple matches per stack, but we only need to process // each stack once to make corrections. // if (processed.count(make_pair(catalog_id, tag_id)) == 0 && catalog[catalog_id]->marker.length() > 0) { d = pmap->datum(catalog_id, sample_id); //cerr << "Accessing catalog ID " << catalog_id << "; sample: " << sample_id << "; marker: " << catalog[catalog_id]->marker << ": d: " << d << "; gtype: " << d->gtype << "\n"; if (d != NULL && strcmp(d->gtype, "--") != 0) { s = stacks[tag_id]; check_uncalled_snps(catalog[catalog_id], s, d); } processed.insert(make_pair(catalog_id, tag_id)); } } // // Free up memory // map::iterator it; for (it = stacks.begin(); it != stacks.end(); it++) delete it->second; } // // Summarize correction results // long pot_gen = 0; long tot_gen = 0; long tot_cor = 0; long het_cor = 0; long rem_cor = 0; int markers = 0; map::iterator it; for (it = catalog.begin(); it != catalog.end(); it++) { if (it->second->marker.length() == 0) continue; markers++; for (int j = 0; j < pmap->sample_cnt(); j++) { sample_id = pmap->rev_sample_index(j); if (parent_ids.count(sample_id)) continue; d = pmap->datum(it->first, sample_id); pot_gen++; if (d == NULL) continue; tot_gen++; if (d->corrected == true) { tot_cor++; if (strcmp(d->gtype, "--") == 0) rem_cor++; else het_cor++; } } } cerr << pot_gen << " potential genotypes in " << markers << " markers, " << tot_gen << " populated; " << tot_cor << " corrected, " << het_cor << " converted to heterozygotes, " << rem_cor << " unsupported homozygotes removed.\n"; return 0; } int check_uncalled_snps(CSLocus *clocus, Locus *stack, Datum *d) { // // If this locus is already known to be multi-allele, return, we only want // to verify uncalled SNPs. // if (strlen(d->gtype) > 1 && d->gtype[0] != d->gtype[1]) return 0; //cerr << "Catalog locus: " << clocus->id << ", marker: " << clocus->marker << ", tag_id: " << stack->id << "; Starting gtype: " << d->gtype << "\n"; vector verified_snps; string status = "false"; string homozygous; for (uint i = 0; i < clocus->snps.size(); i++) { check_homozygosity(stack->reads, clocus->snps[i]->col, clocus->snps[i]->rank_1, clocus->snps[i]->rank_2, homozygous); if (homozygous == "unknown") status = "true"; else if (homozygous == "false") verified_snps.push_back(clocus->snps[i]); } if (status == "true") { d->corrected = true; delete [] d->gtype; d->gtype = new char[2]; strcpy(d->gtype, "-"); return 0; } else if (verified_snps.size() < clocus->snps.size()) { return 0; } // // Detect the alleles present from the verified SNPs // vector haplotypes; call_alleles(verified_snps, stack->reads, haplotypes); vector types; for (uint i = 0; i < haplotypes.size(); i++) { if (clocus->gmap.count(haplotypes[i])) { //cerr << " Adding haplotype '" << haplotypes[i] << "', which maps to '" << clocus->gmap[haplotypes[i]] << "' to the genotype\n"; types.push_back(clocus->gmap[haplotypes[i]]); } else { //cerr << " Adding haplotype '-' for " << haplotypes[i] << "\n"; types.push_back("-"); } } sort(types.begin(), types.end()); string genotype; for (uint i = 0; i < types.size(); i++) genotype += types[i]; //cerr << "Final genotype: " << genotype << "\n"; genotype = global_dictionary[clocus->marker].count(genotype) ? global_dictionary[clocus->marker][genotype] : "--"; //cerr << "Final translated genotype: " << genotype << "\n"; if (strcmp(genotype.c_str(), d->gtype) != 0) { d->corrected = true; delete [] d->gtype; d->gtype = new char[genotype.length() + 1]; strcpy(d->gtype, genotype.c_str()); } //cerr << " Catalog: " << clocus->id << ", stack: " << stack->id << ", Ending Genotype: " << d->gtype << "\n\n"; return 0; } int call_alleles(vector &snps, vector &reads, vector &haplotypes) { map a; int height = reads.size(); char base; for (int i = 0; i < height; i++) { string haplotype; for (uint j = 0; j < snps.size(); j++) { base = reads[i][snps[j]->col]; // // Check to make sure the nucleotide at the location of this SNP is // of one of the two possible states the multinomial model called. // if (base == snps[j]->rank_1 || base == snps[j]->rank_2) haplotype += base; else break; } if (haplotype.length() == snps.size()) a[haplotype]++; } map::iterator it; for (it = a.begin(); it != a.end(); it++) { //cerr << " Calling haplotype: " << it->first << "\n"; haplotypes.push_back(it->first); } return 0; } int check_homozygosity(vector &reads, int col, char rank_1, char rank_2, string &homozygous) { //cerr << " Examining col " << col << ", rank 1: " << rank_1 << "; rank 2: " << rank_2 << "\n"; int height = reads.size(); homozygous = "true"; if (height < min_hom_seqs) { homozygous = "unknown"; return 0; } map nuc; vector > sorted_nuc; nuc['A'] = 0; nuc['C'] = 0; nuc['G'] = 0; nuc['T'] = 0; for (int j = 0; j < height; j++) nuc[reads[j][col]]++; map::iterator i; for (i = nuc.begin(); i != nuc.end(); i++) sorted_nuc.push_back(make_pair(i->first, i->second)); sort(sorted_nuc.begin(), sorted_nuc.end(), compare_pair); // // Check if more than a single nucleotide occurs in this column. Only // count nucleotides that are part of the called SNP, do not count // error-generated nucleotides. Also, check that the sorting was successful // by ensuring that sorted_nuc[0] > sorted_nuc[1] > sorted_nuc[2]. // if (sorted_nuc[2].second > 0 && sorted_nuc[1].second <= sorted_nuc[2].second) { homozygous = "unknown"; return 0; } // cerr << "Sorted_nuc[0], '" << sorted_nuc[0].first << "', count: " << sorted_nuc[0].second // << "; Sorted_nuc[1], '" << sorted_nuc[1].first << "', count: " << sorted_nuc[1].second // << "; Sorted_nuc[2], '" << sorted_nuc[2].first << "', count: " << sorted_nuc[2].second << "\n"; if ((sorted_nuc[0].second > 0) && (sorted_nuc[0].first == rank_1 || sorted_nuc[0].first == rank_2) && (sorted_nuc[1].second > 0) && (sorted_nuc[1].first == rank_1 || sorted_nuc[1].first == rank_2)) { homozygous = "false"; } // // If we find a second nucleotide present, check its prevelence. If it is // less than 1/20 of the total reads, don't count a heterozygote. If it is // less than 1/10 report that we can't tell if its homozygous or not. Otherwise, // report this tag as a heterozygote. // double frac = (double) sorted_nuc[1].second / (double) height; //cerr << " Frac: " << frac << "; Second-most Prominent Nuc count: " << sorted_nuc[1].second << "; Depth: " << height << "\n"; if (homozygous == "false" && frac < min_het_seqs) homozygous = "true"; else if (homozygous == "false" && frac < max_het_seqs) homozygous = "unknown"; //cerr << " Homozygous: " << homozygous << "\n"; return 0; } int manual_corrections(string cor_path, PopMap *pmap) { // // Load manual corrections from a tab-seprated file, as exported from a Stacks SQL // dataabse. Has the format: // idbatch_idcatalog_idsample_idgenotype // char line[max_len]; ifstream fh(cor_path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening manual corrections file '" << cor_path << "'\n"; exit(1); } vector parts; int catalog_id, sample_id, len; char gtype[id_len]; char *e; int line_num = 0; int total = 0; int skipped = 0; int success = 0; int i; while (fh.good()) { fh.getline(line, id_len); line_num++; len = strlen(line); if (len == 0) continue; // // Check that there is no carraige return in the buffer. // if (line[len - 1] == '\r') line[len - 1] = '\0'; // // Ignore comments // if (line[0] == '#') continue; parse_tsv(line, parts); if (parts.size() != 5) { cerr << "Error parsing '" << line << "' at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } catalog_id = (int) strtol(parts[2].c_str(), &e, 10); if (*e != '\0') { cerr << "Error parsing '" << parts[2].c_str() << "' at line: " << line_num << ".\n"; return 0; } sample_id = (int) strtol(parts[3].c_str(), &e, 10); if (*e != '\0') { cerr << "Error parsing '" << parts[3].c_str() << "' at line: " << line_num << ".\n"; return 0; } strcpy(gtype, parts[4].c_str()); // // Overwrite the genotype in the PopMap. // Datum **d = pmap->locus(catalog_id); total++; if (d == NULL) { skipped++; continue; } if ((i = pmap->sample_index(sample_id)) < 0) { skipped++; continue; } if (d[i] == NULL) { skipped++; continue; } for (uint k = 0; k < strlen(gtype); k++) gtype[k] = tolower(gtype[k]); if (strcmp(gtype, "--") == 0) strcpy(gtype, "-"); if (d[i]->gtype != NULL) delete [] d[i]->gtype; d[i]->gtype = new char[strlen(gtype) + 1]; strcpy(d[i]->gtype, gtype); d[i]->corrected = true; success++; } fh.close(); cerr << "Successfully imported " << success << " manually corrected genotypes. Skipped " << skipped << " genotypes due to invalid catalog or sample IDs, " << total << " genotypes read from file.\n"; return 0; } int export_gen_map(map &catalog, PopMap *pmap, set &parent_ids, map &samples) { // // We wish to export, a set of generic genotypes, not specific to any mapping type. // // // Mark those genotypes that have been corrected in uppercase letters. // map::iterator it; CSLocus *loc; uint len; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; Datum **d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (d[i]->corrected) { len = strlen(d[i]->gtype); for (uint k = 0; k < len; k++) d[i]->gtype[k] = toupper(d[i]->gtype[k]); } } } // // Output the results // write_generic(catalog, pmap, samples, parent_ids, true); return 0; } int export_f2_map(map &catalog, PopMap *pmap, set &parent_ids, map &samples) { // // We wish to export, according to the JoinMap manual, a locus genotype file (loc-file), // which contains the information of all the loci for a single segregating population. // // We are exporting an F2 population type: // The result of selfing the F1 of a cross between two fully homozygous diploid parents. // // Genotype codes for an F2 population, depending on the locus segregation type. // // Seg. type Possible genotypes // --------- ------------------ // a, b, h, – // a, b, h, – // a, – // b, – // a, b, – // b, – // map types; map > dictionary; load_mm_f2_dictionary(types, dictionary); // // Translate the genotypes for this particular map type. // translate_genotypes(types, dictionary, catalog, pmap, samples, parent_ids); // // Output the results // switch(out_type) { case joinmap: write_joinmap(catalog, pmap, types, samples, parent_ids); break; case rqtl: write_rqtl(catalog, pmap, types, samples, parent_ids); break; case onemap: write_onemap_mapmaker(catalog, pmap, types, samples, parent_ids); break; default: break; } return 0; } int export_dh_map(map &catalog, PopMap *pmap, set &parent_ids, map &samples) { // // We wish to export, according to the JoinMap manual, a locus genotype file (loc-file), // which contains the information of all the loci for a single segregating population. // // We are exporting a DH population type: // a doubled haploid population: the result of doubling the gametes of a single heterozygous // diploid individual. // // Segregation type codes for population type DH, from Joinmap manual: // // Code Description // ------- ----------- // locus homozygous in both parents, heterozygous between the parents // // Genotype codes for a CP population, depending on the locus segregation type. // // Seg. type Possible genotypes // --------- ------------------ // a the one genotype // b the other genotype // map types; map > dictionary; load_mm_dh_dictionary(types, dictionary); // // Translate the genotypes for this particular map type. // translate_genotypes(types, dictionary, catalog, pmap, samples, parent_ids); // // Output the results // switch(out_type) { case joinmap: write_joinmap(catalog, pmap, types, samples, parent_ids); break; case rqtl: write_rqtl(catalog, pmap, types, samples, parent_ids); break; default: break; } return 0; } int export_bc1_map(map &catalog, PopMap *pmap, set &parent_ids, map &samples) { // // We wish to export, according to the JoinMap manual, a locus genotype file (loc-file), // which contains the information of all the loci for a single segregating population. // // We are exporting a BC1 population type: // a first generation backcross population: the result of crossing the F1 of a cross between // two fully homozygous diploid parents to one of the parents. // // Segregation type codes for population type BC1, from Joinmap manual: // // Code Description // ------- ----------- // locus homozygous in both parents, heterozygous between the parents // // Genotype codes for a BC1 population, depending on the locus segregation type. // // Seg. type Possible genotypes // --------- ------------------ // a homozygote or haploid as the first parent // b homozygote or haploid as the second parent // h heterozygote (as the F1) // map types; map > dictionary; load_mm_bc_dictionary(types, dictionary); // // Translate the genotypes for this particular map type. // translate_genotypes(types, dictionary, catalog, pmap, samples, parent_ids); // // Output the results // switch(out_type) { case joinmap: write_joinmap(catalog, pmap, types, samples, parent_ids); break; case rqtl: write_rqtl(catalog, pmap, types, samples, parent_ids); break; case onemap: write_onemap_mapmaker(catalog, pmap, types, samples, parent_ids); break; default: break; } return 0; } int export_cp_map(map &catalog, PopMap *pmap, set &parent_ids, map &samples) { // // We wish to export, according to the JoinMap manual, a locus genotype file (loc-file), // which contains the information of all the loci for a single segregating population. // // We are exporting a CP population type: // a population resulting from a cross between two heterogeneously // heterozygous and homozygous diploid parents, linkage phases originally // (possibly) unknown. // // Segregation type codes for population type CP, from Joinmap manual: // // Code Description // ------- ----------- // locus heterozygous in both parents, four alleles // locus heterozygous in both parents, three alleles // locus heterozygous in both parents, two alleles // locus heterozygous in the first parent // locus heterozygous in the second parent // // Genotype codes for a CP population, depending on the locus segregation type. // // Seg. type Possible genotypes // --------- ------------------ // ac, ad, bc, bd, –– // ee, ef, eg, fg, –– // hh, hk, kk, h-, k-, –– // ll, lm, –– // nn, np, –– // map types; map > dictionary; switch(out_type) { case joinmap: load_joinmap_cp_dictionary(types, dictionary); break; case onemap: load_onemap_cp_dictionary(types, dictionary); break; default: break; } // // Translate the genotypes for this particular map type. // translate_genotypes(types, dictionary, catalog, pmap, samples, parent_ids); // // Output the results // switch(out_type) { case joinmap: write_joinmap(catalog, pmap, types, samples, parent_ids); break; case onemap: write_onemap(catalog, pmap, types, samples, parent_ids); break; default: break; } return 0; } int calc_segregation_distortion(map > &seg_ratios, map &catalog, PopMap *pmap, set &parent_ids) { map types; map > dictionary; switch(map_type) { case dh: load_dh_dictionary(types, dictionary); break; case cp: load_cp_dictionary(types, dictionary); break; case bc1: load_mm_bc_dictionary(types, dictionary); break; case f2: load_mm_f2_dictionary(types, dictionary); break; case gen: case none: case unk: break; } map::iterator it; CSLocus *loc; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (seg_ratios.count(loc->marker) == 0) continue; map cnts; double n = tally_translated_gtypes(loc->id, pmap, parent_ids, dictionary[loc->marker], cnts); if (n == 0) continue; // cerr << "ID: " << loc->id << "; marker: " << loc->marker << "\n"; loc->chisq = chisq_test(seg_ratios, cnts, loc->marker, n); } return 0; } double tally_translated_gtypes(int loc_id, PopMap *pmap, set &parent_ids, map &dictionary, map &cnts) { Datum **d = pmap->locus(loc_id); double n = 0.0; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "--") == 0) continue; n++; if (cnts.count(dictionary[d[i]->gtype]) > 0) cnts[dictionary[d[i]->gtype]]++; else cnts[dictionary[d[i]->gtype]] = 1; } return n; } double tally_generic_gtypes(int loc_id, PopMap *pmap, set &parent_ids, map &cnts) { Datum **d = pmap->locus(loc_id); double n = 0.0; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (strcmp(d[i]->gtype, "--") == 0) continue; n++; if (cnts.count(d[i]->gtype) > 0) cnts[d[i]->gtype]++; else cnts[d[i]->gtype] = 1; } return n; } double chisq_test(map > &seg_ratios, map &cnts, string marker, double n) { // // Calculate chi-square value. // sit->second * n == the expected value for this genotype // double chisq = 0.0; double exp = 0.0; double obs = 0.0; double df = seg_ratios[marker].size() - 1; map::iterator sit; for (sit = seg_ratios[marker].begin(); sit != seg_ratios[marker].end(); sit++) { obs = cnts.count(sit->first) == 0 ? 0 : cnts[sit->first]; exp = sit->second * n; // cerr << " category: " << sit->first << "; obs: " << obs << "; exp: " << exp << "\n"; chisq += ((obs - exp) * (obs - exp)) / exp; } // cerr << " df: " << df << "; Chisq value: " << chisq << "; pvalue: " << chisq_pvalue(df, chisq) << "\n"; // // Determine p-value // return chisq_pvalue(df, chisq); } double chisq_pvalue(int df, double chisq) { int i = 0; while (chisq > chisq_crit_values[df][i] && i < chisq_crit_values_size) { i++; } if (i == chisq_crit_values_size) return chisq_crit_values[0][chisq_crit_values_size - 1]; return chisq_crit_values[0][i]; } int map_specific_genotypes(map &catalog, PopMap *pmap, set &parent_ids) { map types; map > dictionary; switch(map_type) { case dh: load_dh_dictionary(types, dictionary); break; case cp: load_cp_dictionary(types, dictionary); break; case bc1: load_bc_dictionary(types, dictionary); break; case f2: load_f2_dictionary(types, dictionary); break; case gen: case none: case unk: break; } map::iterator it; string marker, m; Datum **d; CSLocus *loc; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; loc->gcnt = 0; if (loc->marker.length() == 0) continue; if (types.count(loc->marker)) { loc->uncor_marker = loc->marker; loc->marker = types[loc->marker]; marker = loc->marker; } else { marker = ""; } d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (marker.length() == 0) { m = "--"; } else { m = dictionary[marker].count(d[i]->gtype) ? dictionary[marker][d[i]->gtype] : dictionary[marker]["--"]; } strcpy(d[i]->gtype, m.c_str()); if (m != dictionary[marker]["--"]) loc->gcnt++; } } return 0; } int translate_genotypes(map &types, map > &dictionary, map &catalog, PopMap *pmap, map &samples, set &parent_ids) { map::iterator it; CSLocus *loc; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; string marker = types.count(loc->marker) ? types[loc->marker] : ""; Datum **d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (parent_ids.count(pmap->rev_sample_index(i))) continue; //cerr << "Examining progeny " << samples[pmap->rev_sample_index(i)] << "; marker: " << loc->marker << "\n"; string m; if (marker.length() == 0) { m = dictionary[marker]["--"]; } else { m = dictionary[marker].count(d[i]->gtype) ? dictionary[marker][d[i]->gtype] : dictionary[marker]["--"]; } d[i]->trans_gtype = new char[m.length() + 1]; // // If the genotype was corrected, output it in uppercase letters. // if (d[i]->corrected) { for (uint k = 0; k < m.length(); k++) d[i]->trans_gtype[k] = toupper(m[k]); d[i]->trans_gtype[m.length()] = '\0'; } else { strcpy(d[i]->trans_gtype, m.c_str()); } if (m != dictionary[marker]["--"]) loc->trans_gcnt++; //cerr << " allele: " << d[i]->trans_gtype << "; trans_gcnt: " << loc->trans_gcnt << "\n"; } } return 0; } int tally_progeny_haplotypes(CSLocus *locus, PopMap *pmap, set &parent_ids, int &total, double &max, string &freq_str) { char gtype[id_len]; map freq; Datum **d = pmap->locus(locus->id); total = 0; max = 0; //cerr << "Examining marker: " << locus->id << "\n"; for (int i = 0; i < pmap->sample_cnt(); i++) { if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (d[i] == NULL) continue; //cerr << " Sample: " << i << "; Haplotype: " << d[i]->obshap[0] << "; Genotype: " << d[i]->gtype << "\n"; if (strcmp(d[i]->gtype, "--") != 0) { // // Automated corrections will uppercase genotypes, convert them back to lowercase // in order to tally them properly. // int j = 0; while (d[i]->gtype[j] != '\0') { gtype[j] = tolower(d[i]->gtype[j]); j++; } gtype[j] = '\0'; freq[gtype]++; total++; } } if (total == 0) return 0; double frac; stringstream s; char f[id_len]; map::iterator it; for (it = freq.begin(); it != freq.end(); it++) { frac = (double) it->second / (double) total * 100; if (frac > max) max = frac; sprintf(f, "(%0.1f%%);", frac); s << it->first << ":" << it->second << f; } freq_str = s.str().substr(0, s.str().length() - 1); return 0; } int write_sql(map &catalog, PopMap *pmap, set &parent_ids) { if (map_type == none) return 0; stringstream pop_name; pop_name << "batch_" << batch_id << ".markers.tsv"; string file = in_path + pop_name.str(); cerr << "Writing SQL markers file to '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening markers SQL file '" << file << "'\n"; exit(1); } fh << "# SQL ID" << "\t" << "Batch ID" << "\t" << "Catalog Locus ID" << "\t" << "Marker Type" << "\t" << "Total Genotypes" << "\t" << "Max" << "\t" << "Genotype Freqs" << "\t" << "Segregation Distortion" << "\t" << "Mean Log Likelihood" << "\t" << "Genotype Map" << "\t" << "Uncorrected Marker" << "\n"; map::iterator it; map::iterator j; CSLocus *loc; char max_str[id_len]; stringstream gtype_map; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->marker.length() == 0) continue; double max = 0.0; int total = 0; string freq, map; tally_progeny_haplotypes(loc, pmap, parent_ids, total, max, freq); sprintf(max_str, "%0.2f", max); // // Record the haplotype to genotype map. // gtype_map.str(""); for (j = loc->gmap.begin(); j != loc->gmap.end(); j++) gtype_map << j->first << ":" << j->second << ";"; map = gtype_map.str().substr(0, gtype_map.str().length() - 1); fh << 0 << "\t" << batch_id << "\t" << loc->id << "\t" << loc->marker << "\t" << total << "\t" << max_str << "\t" << freq << "\t" << loc->chisq << "\t" << loc->lnl << "\t" << map << "\t" << (loc->uncor_marker.length() == 0 ? loc->marker : loc->uncor_marker) << "\n"; } fh.close(); pop_name.str(""); pop_name << "batch_" << batch_id << ".genotypes_" << progeny_limit << ".txt"; file = in_path + pop_name.str(); cerr << "Writing SQL genotypes file to '" << file << "'\n"; fh.open(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening genotypes SQL file '" << file << "'\n"; exit(1); } fh << "# SQL ID" << "\t" << "Batch ID" << "\t" << "Catalog Locus ID" << "\t" << "Sample ID" << "\t" << "Genotype" << "\n"; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->gcnt < progeny_limit) continue; Datum **d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (parent_ids.count(pmap->rev_sample_index(i))) continue; fh << 0 << "\t" << batch_id << "\t" << loc->id << "\t" << pmap->rev_sample_index(i) << "\t"; if (d[i] == NULL) map_type == cp ? fh << "--\n" : fh << "-\n"; else fh << d[i]->gtype << "\n"; } } fh.close(); return 0; } int write_genomic(map &catalog, PopMap *pmap) { stringstream pop_name; pop_name << "batch_" << batch_id << ".genomic_" << progeny_limit << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening genomic output file '" << file << "'\n"; exit(1); } // // Count the number of markers that have enough samples to output. // map::iterator cit; CSLocus *loc; int num_loci = 0; for (cit = catalog.begin(); cit != catalog.end(); cit++) { loc = cit->second; if (loc->hcnt < progeny_limit) continue; num_loci += loc->len - renz_len[enz]; } cerr << "Writing " << num_loci << " nucleotide positions to genomic file, '" << file << "'\n"; // // Write the header // fh << num_loci << "\t" << pmap->sample_cnt() << "\n"; // // Output each locus. // map >::iterator it; int a, b; uint rcnt = renz_cnt[enz]; uint rlen = renz_len[enz]; char *p; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint i = 0; i < it->second.size(); i++) { loc = it->second[i]; if (loc->hcnt < progeny_limit) continue; Datum **d = pmap->locus(loc->id); set snp_locs; string obshap; for (uint i = 0; i < loc->snps.size(); i++) snp_locs.insert(loc->snps[i]->col); uint start = 0; uint end = loc->len; // // Check for the existence of the restriction enzyme cut site, mask off // its output. // for (uint n = 0; n < rcnt; n++) if (strncmp(loc->con, renz[enz][n], rlen) == 0) start += renz_len[enz]; if (start == 0) { p = loc->con + (loc->len - rlen); for (uint n = rcnt; n < rcnt + rcnt; n++) if (strncmp(p, renz[enz][n], rlen) == 0) end -= renz_len[enz]; } uint k = 0; for (uint n = start; n < end; n++) { fh << loc->id << "\t" << loc->loc.chr << "\t" << loc->loc.bp + n; if (snp_locs.count(n) == 0) { for (int j = 0; j < pmap->sample_cnt(); j++) { a = encode_gtype(loc->con[n]); fh << "\t" << encoded_gtypes[a][a]; } } else { for (int j = 0; j < pmap->sample_cnt(); j++) { fh << "\t"; if (d[j] == NULL) fh << "0"; else switch (d[j]->obshap.size()) { case 1: a = encode_gtype(d[j]->obshap[0][k]); fh << encoded_gtypes[a][a]; break; case 2: a = encode_gtype(d[j]->obshap[0][k]); b = encode_gtype(d[j]->obshap[1][k]); fh << encoded_gtypes[a][b]; break; default: fh << "0"; break; } } k++; } fh << "\n"; } } } fh.close(); return 0; } int write_generic(map &catalog, PopMap *pmap, map &samples, set &parent_ids, bool write_gtypes) { stringstream pop_name; pop_name << "batch_" << batch_id; if (write_gtypes) pop_name << ".genotypes_" << progeny_limit << ".tsv"; else pop_name << ".haplotypes_" << progeny_limit << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening generic output file '" << file << "'\n"; exit(1); } // // Count the number of markers that have enough samples to output. // map::iterator it; CSLocus *loc; int num_loci = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (write_gtypes == false && loc->hcnt < progeny_limit) continue; if (write_gtypes == true && loc->gcnt < progeny_limit) continue; num_loci++; } cerr << "Writing " << num_loci << " loci to " << (write_gtypes ? "genotype" : "observed haplotype") << " file, '" << file << "'\n"; // // Write the header // fh << "# Catalog ID\t"; if (expand_id) fh << "\t"; if (write_gtypes) fh << "Marker\t"; fh << "Cnt\t" << "Seg Dist\t"; map::iterator s; for (int i = 0; i < pmap->sample_cnt(); i++) { if (write_gtypes && parent_ids.count(pmap->rev_sample_index(i))) continue; fh << samples[pmap->rev_sample_index(i)]; if (i < pmap->sample_cnt() - 1) fh << "\t"; } fh << "\n"; // // Output each locus. // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (write_gtypes == false && loc->hcnt < progeny_limit) continue; if (write_gtypes == true && loc->gcnt < progeny_limit) continue; stringstream id; loc->annotation.length() > 0 ? id << loc->id << "|" << loc->annotation : id << loc->id; fh << id.str(); if (expand_id) { if (loc->annotation.length() > 0) id << "\t" << loc->id << "\t" << loc->annotation; else if (strlen(loc->loc.chr) > 0) id << "\t" << loc->id << "\t" << loc->loc.chr << "_" << loc->loc.bp; else id << "\t" << loc->id << "\t"; } if (write_gtypes) fh << "\t" << loc->marker; write_gtypes ? fh << "\t" << loc->gcnt : fh << "\t" << loc->hcnt; fh << "\t" << loc->chisq; Datum **d = pmap->locus(loc->id); string obshap; for (int i = 0; i < pmap->sample_cnt(); i++) { if (write_gtypes && parent_ids.count(pmap->rev_sample_index(i))) continue; fh << "\t"; if (d[i] == NULL) fh << "-"; else if (write_gtypes) { fh << d[i]->gtype; } else { obshap = ""; for (uint j = 0; j < d[i]->obshap.size(); j++) obshap += string(d[i]->obshap[j]) + "/"; obshap = obshap.substr(0, obshap.length()-1); fh << obshap; } } fh << "\n"; } fh.close(); return 0; } int write_joinmap(map &catalog, PopMap *pmap, map &types, map &samples, set &parent_ids) { stringstream pop_name; pop_name << "batch_" << batch_id << ".genotypes_" << progeny_limit; string file = in_path + pop_name.str() + ".loc"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening joinmap output file '" << file << "'\n"; exit(1); } // // Count the number of mappable progeny // map::iterator it; CSLocus *loc; int num_loci = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; num_loci++; } cerr << "Writing " << num_loci << " loci to JoinMap file, '" << file << "'\n"; map map_types; map_types[cp] = "CP"; map_types[dh] = "DH"; map_types[bc1] = "BC1"; map_types[f2] = "F2"; // // Output the header of the file // fh << "name = " << pop_name.str() << "\n" << "popt = " << map_types[map_type] << "\n" << "nloc = " << num_loci << "\n" << "nind = " << pmap->sample_cnt() - parent_ids.size() << "\n\n"; // // Output each locus. // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; stringstream id; loc->annotation.length() > 0 ? id << loc->id << "|" << loc->annotation : id << loc->id; fh << id.str() << "\t"; if (expand_id) { id.str(""); if (loc->annotation.length() > 0) id << loc->id << "\t" << loc->annotation; else if (strlen(loc->loc.chr) > 0) id << loc->id << "\t" << loc->loc.chr << "_" << loc->loc.bp; else id << loc->id << "\t"; fh << id.str() << "\t"; } if (types[loc->marker] == "lmx--") fh << ""; else if (types[loc->marker] == "--xnp") fh << ""; else fh << "<" << types[loc->marker] << ">"; Datum **d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (parent_ids.count(pmap->rev_sample_index(i))) continue; fh << "\t"; if (d[i] == NULL) map_type == cp ? fh << "--" : fh << "-"; else fh << d[i]->trans_gtype; } fh << "\n"; } fh << "\nindividual names:\n"; map::iterator s; for (s = samples.begin(); s != samples.end(); s++) { if (parent_ids.count(s->first)) continue; fh << s->second << "\n"; } fh.close(); return 0; } int write_onemap_mapmaker(map &catalog, PopMap *pmap, map &types, map &samples, set &parent_ids) { stringstream pop_name; pop_name << "batch_" << batch_id << ".genotypes_" << progeny_limit; string file = in_path + pop_name.str() + ".onemap.txt"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening joinmap output file '" << file << "'\n"; exit(1); } // // Count the number of mappable progeny // map::iterator it; CSLocus *loc; int num_loci = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; num_loci++; } cerr << "Writing " << num_loci << " loci to OneMap file, '" << file << "'\n"; // // Output map type. // if (map_type == f2 ) fh << "data type f2 intercross\n"; else if (map_type == bc1) fh << "data type f2 backcross\n"; // // Output the header: number of individuals, number of markers, number of // quantitative traits (none). // fh << pmap->sample_cnt() - parent_ids.size() << " " << num_loci << " " << "0\n\n"; // // Output each locus. // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; fh << "*" << loc->id; Datum **d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (parent_ids.count(pmap->rev_sample_index(i))) continue; fh << " "; if (d[i] == NULL) fh << "-"; else fh << d[i]->trans_gtype; } fh << "\n"; } fh.close(); return 0; } int write_onemap(map &catalog, PopMap *pmap, map &types, map &samples, set &parent_ids) { stringstream pop_name; pop_name << "batch_" << batch_id << ".genotypes_" << progeny_limit; string file = in_path + pop_name.str() + "onemap.tsv"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening joinmap output file '" << file << "'\n"; exit(1); } // // Count the number of mappable progeny // map::iterator it; CSLocus *loc; int num_loci = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; num_loci++; } cerr << "Writing " << num_loci << " loci to OneMap file, '" << file << "'\n"; map marker_types; marker_types["abxoo"] = "D1.11"; marker_types["ooxab"] = "D2.16"; marker_types["abxaa"] = "D1.10"; marker_types["aaxab"] = "D2.15"; marker_types["abxab"] = "B3.7"; marker_types["abxac"] = "A.2"; marker_types["abxcd"] = "A.1"; // // Output the header: number of individuals followed by number of markers. // fh << pmap->sample_cnt() - parent_ids.size() << "\t" << num_loci << "\n"; // // Output each locus. // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; fh << "*" << loc->id << " " << marker_types[types[loc->marker]] << "\t"; Datum **d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (parent_ids.count(pmap->rev_sample_index(i))) continue; if (d[i] == NULL) fh << "-"; else fh << d[i]->trans_gtype; if (i < pmap->sample_cnt() - 1) fh << ","; } fh << "\n"; } fh.close(); return 0; } int write_rqtl(map &catalog, PopMap *pmap, map &types, map &samples, set &parent_ids) { stringstream pop_name; pop_name << "batch_" << batch_id << ".genotypes_" << progeny_limit; string file = in_path + pop_name.str() + ".rqtl.tsv"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening R/QTL output file '" << file << "'\n"; exit(1); } // // Count the number of mappable progeny // map::iterator it; CSLocus *loc; int num_loci = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->trans_gcnt < progeny_limit) continue; num_loci++; } cerr << "Writing " << num_loci << " loci to R/QTL file, '" << file << "'\n"; map map_types; map_types[cp] = "CP"; map_types[dh] = "DH"; map_types[bc1] = "BC1"; map_types[f2] = "F2"; // // Output the header of the file, followed by the list of markers, one per column // fh << "# Exported: " << pop_name.str() << "\n" << "# Map Type: " << map_types[map_type] << "\n" << "# Num Loci: " << num_loci << "\n" << "# Num Samples " << pmap->sample_cnt() - parent_ids.size() << "\n"; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->gcnt < progeny_limit) continue; fh << ","; stringstream id; loc->annotation.length() > 0 ? id << loc->id << "|" << loc->annotation : id << loc->id; fh << id.str(); } fh << "\n"; // // Output the chromosome (if available) for each marker and then the location // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->gcnt < progeny_limit) continue; fh << ","; string chr; chr = strlen(loc->loc.chr) > 0 ? loc->loc.chr : "1"; fh << chr; } fh << "\n"; int i = 1; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (loc->gcnt < progeny_limit) continue; fh << ","; int bp = loc->loc.bp > 0 ? loc->loc.bp : i; fh << bp; i++; } fh << "\n"; // // For each sample, print out the genotypes for each marker // Datum *d; for (int i = 0; i < pmap->sample_cnt(); i++) { if (parent_ids.count(pmap->rev_sample_index(i))) continue; fh << samples[pmap->rev_sample_index(i)]; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; //if (loc->gcnt < progeny_limit) continue; d = pmap->datum(loc->id, pmap->rev_sample_index(i)); fh << ","; if (d == NULL) map_type == cp ? fh << "--" : fh << "-"; else fh << d->trans_gtype; } fh << "\n"; } fh.close(); return 0; } // sub create_imputed_genotype_map { // my ($order, $marker, $tag_id, $parents, $progeny, $map) = @_; // my (@keys, $key, $m, $type, $allele, $uall); // my (%gtypes, %allcnt, %uniqall); // // // // Count up the number of each type of observed allele in the progeny, // // record whether those genotypes are heterozygous or homozygous. // // // foreach $key (keys %{$progeny}) { // my $alleles; // print STDERR "Examining progeny $key\n" if ($debug); // // // // Discard progeny with more than one locus matched to this catalog tag. // // // @keys = keys %{$progeny->{$key}}; // next if (scalar(@keys) > 1); // $alleles = join("|", sort @{$progeny->{$key}->{$keys[0]}}); // if (!defined($allcnt{$alleles})) { // $allcnt{$alleles} = scalar(@{$progeny->{$key}->{$keys[0]}}); // } // //print STDERR "Adding genotype $alleles\n"; // $gtypes{$alleles}++; // foreach $allele (@{$progeny->{$key}->{$keys[0]}}) { // $uniqall{$allele}++; // } // } // // // // Examine the first parent alleles (the only alleles we have, since // // we are imputing the second parent. // // // my @parents = keys %{$parents}; // my %legal_genotypes = (); // $key = $order->{$parents[0]} eq "first" ? $parents[0] : $parents[1]; // $m = substr($marker, 0, 2); // foreach $type (split(//, $m)) { // //print STDERR " Adding $type to genotypes\n" if ($debug); // $legal_genotypes{$type}++; // } // my @types = sort keys %legal_genotypes; // if ($marker eq "lmxll") { // @keys = sort {$gtypes{$b} <=> $gtypes{$a}} keys %gtypes; // // // // Discard heterozygous alleles and find the highest frequency homozygote, // // this is the "l" in the "lmxll" marker. // // // while ($allcnt{$keys[0]} == 2) { // shift @keys; // } // $map->{$keys[0]} = shift @types; // print STDERR " Assinging '$keys[0]' to first parent genotype '", $map->{$keys[0]}, "'\n" if ($debug); // foreach $uall (sort {$uniqall{$b} <=> $uniqall{$a}} keys %uniqall) { // if ($uall ne $keys[0]) { // $allele = $uall; // last; // } // } // $map->{$allele} = shift @types; // print STDERR " Assinging '$allele' to first parent genotype '", $map->{$allele}, "'\n" if ($debug); // } // } int load_marker_list(string path, set &list) { char line[id_len]; ifstream fh(path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening white/black list file '" << path << "'\n"; exit(1); } int marker; char *e; while (fh.good()) { fh.getline(line, id_len); if (strlen(line) == 0) continue; marker = (int) strtol(line, &e, 10); if (*e == '\0') list.insert(marker); } fh.close(); if (list.size() == 0) { cerr << "Unable to load any markers from '" << path << "'\n"; help(); } return 0; } int build_file_list(vector &files) { uint pos; string file; struct dirent *direntry; DIR *dir = opendir(in_path.c_str()); if (dir == NULL) { cerr << "Unable to open directory '" << in_path << "' for reading.\n"; exit(1); } while ((direntry = readdir(dir)) != NULL) { file = direntry->d_name; if (file == "." || file == "..") continue; if (file.substr(0, 6) == "batch_") continue; pos = file.rfind(".tags.tsv"); if (pos < file.length()) files.push_back(file.substr(0, pos)); } closedir(dir); sort(files.begin(), files.end()); if (files.size() == 0) { cerr << "Unable to locate any input files to process within '" << in_path << "'\n"; } cerr << "Found " << files.size() << " input file(s).\n"; return 0; } bool hap_compare(pair a, pair b) { return (a.second > b.second); } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"corr", no_argument, NULL, 'c'}, {"sql", no_argument, NULL, 's'}, {"num_threads", required_argument, NULL, 'p'}, {"batch_id", required_argument, NULL, 'b'}, {"in_path", required_argument, NULL, 'P'}, {"map_type", required_argument, NULL, 't'}, {"out_type", required_argument, NULL, 'o'}, {"progeny", required_argument, NULL, 'r'}, {"min_depth", required_argument, NULL, 'm'}, {"min_hom_seqs", required_argument, NULL, 'H'}, {"min_het_seqs", required_argument, NULL, 'N'}, {"max_het_seqs", required_argument, NULL, 'X'}, {"renz", required_argument, NULL, 'e'}, {"whitelist", required_argument, NULL, 'W'}, {"blacklist", required_argument, NULL, 'B'}, {"man_corr", required_argument, NULL, 'C'}, {"lnl_lim", required_argument, NULL, 'L'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvcsib:p:t:o:r:P:m:e:H:N:X:W:B:C:L:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'P': in_path = optarg; break; case 'b': batch_id = is_integer(optarg); if (batch_id < 0) { cerr << "Batch ID (-b) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 't': if (strcasecmp(optarg, "cp") == 0) map_type = cp; else if (strcasecmp(optarg, "bc1") == 0) map_type = bc1; else if (strcasecmp(optarg, "f2") == 0) map_type = f2; else if (strcasecmp(optarg, "dh") == 0) map_type = dh; else if (strcasecmp(optarg, "gen") == 0) map_type = gen; else map_type = unk; break; case 'o': if (strcasecmp(optarg, "joinmap") == 0) out_type = joinmap; else if (strcasecmp(optarg, "rqtl") == 0) out_type = rqtl; else if (strcasecmp(optarg, "onemap") == 0) out_type = onemap; else if (strcasecmp(optarg, "genomic") == 0) out_type = genomic; break; case 'r': progeny_limit = atoi(optarg); break; case 'c': corrections = true; break; case 'L': lnl_limit = is_double(optarg); filter_lnl = true; break; case 'i': expand_id = true; break; case 's': sql_out = true; break; case 'W': wl_file = optarg; break; case 'B': bl_file = optarg; break; case 'C': man_corrections = true; cor_path = optarg; break; case 'm': min_stack_depth = is_integer(optarg); break; case 'H': min_hom_seqs = is_integer(optarg); break; case 'N': min_het_seqs = is_double(optarg); break; case 'X': max_het_seqs = is_double(optarg); break; case 'e': enz = optarg; break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: help(); abort(); } } if (in_path.length() == 0) { cerr << "You must specify a path to the directory containing Stacks output files.\n"; help(); } if (in_path.at(in_path.length() - 1) != '/') in_path += "/"; if (batch_id < 0) { cerr << "You must specify a batch ID.\n"; help(); } if (map_type != cp && map_type != dh && map_type != bc1 && map_type != f2 && map_type != gen && map_type != none) { cerr << "You must specify a valid map type. 'CP', 'DH', 'F2', 'BC1' and 'GEN' are the currently supported map types.\n"; help(); } if (map_type != none && min_stack_depth > 0) cerr << "Warning: using a minimum stack depth when building genetic markers is not recommended.\n"; if (out_type == genomic && enz.length() == 0) { cerr << "You must specify the restriction enzyme used with 'genomic' output.\n"; help(); } if (out_type == genomic && renz.count(enz) == 0) { cerr << "Unrecognized restriction enzyme specified: '" << enz.c_str() << "'.\n"; help(); } return 0; } void version() { std::cerr << "genotypes " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "genotypes " << VERSION << "\n" << "genotypes -b batch_id -P path [-r min] [-m min] [-t map_type -o type] [-B blacklist] [-W whitelist] [-c] [-s] [-e renz] [-v] [-h]" << "\n" << " b: Batch ID to examine when exporting from the catalog.\n" << " r: minimum number of progeny required to print a marker.\n" << " c: make automated corrections to the data.\n" << " P: path to the Stacks output files.\n" << " t: map type to write. 'CP', 'DH', 'F2', 'BC1' and 'GEN' are the currently supported map types.\n" << " o: output file type to write, 'joinmap', 'onemap', 'rqtl', and 'genomic' are currently supported.\n" << " m: specify a minimum stack depth required before exporting a locus in a particular individual.\n" << " s: output a file to import results into an SQL database.\n" << " B: specify a file containing Blacklisted markers to be excluded from the export.\n" << " W: specify a file containign Whitelisted markers to include in the export.\n" << " e: restriction enzyme, required if generating 'genomic' output.\n" << " v: print program version." << "\n" << " h: display this help messsage." << "\n" << " Filtering options:\n" << " --lnl_lim [num]: filter loci with log likelihood values below this threshold.\n" << " Automated corrections options:\n" << " --min_hom_seqs: minimum number of reads required at a stack to call a homozygous genotype (default 5).\n" << " --min_het_seqs: below this minor allele frequency a stack is called a homozygote, above it (but below --max_het_seqs) it is called unknown (default 0.05).\n" << " --max_het_seqs: minimum frequency of minor allele to call a heterozygote (default 0.1).\n" << " Manual corrections options:\n" << " --cor_path : path to file containing manual genotype corrections from a Stacks SQL database to incorporate into output.\n"; exit(0); } stacks-1.35/src/genotypes.h000644 000765 000024 00000012475 12335173442 016474 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __GENOTYPES_H__ #define __GENOTYPES_H__ #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::stringstream; #include using std::vector; #include using std::map; #include using std::set; #include "constants.h" #include "input.h" #include "stacks.h" #include "locus.h" #include "renz.h" #include "PopMap.h" #include "sql_utilities.h" #include "catalog_utils.h" #include "utils.h" #include "genotype_dictionaries.h" // // Chi-squared distribution critical values // [0] => p-values // [1] => one degree of freedom // [2] => two degrees of freedom // [3] => three degrees of freedom // const int chisq_crit_values_size = 10; const double chisq_crit_values[4][10] = { {0.50, 0.25, 0.20, 0.15, 0.10, 0.05, 0.01, 0.005, 0.001, 0.0005}, {0.46, 1.32, 1.64, 2.07, 2.71, 3.84, 6.63, 7.880, 10.830, 12.1200}, {1.39, 2.77, 3.22, 3.79, 4.61, 5.99, 9.21, 10.600, 13.820, 15.2000}, {2.37, 4.11, 4.64, 5.32, 6.25, 7.81, 11.34, 12.840, 16.270, 17.7300} }; void help( void ); void version( void ); int parse_command_line(int, char**); int build_file_list(vector &); int load_marker_list(string, set &); int identify_parental_ids(map &, vector &, set &); int find_markers(map &, PopMap *, set &); int calculate_f(map &, PopMap *, set &); int create_genotype_map(CSLocus *, PopMap *, set &); int apply_locus_constraints(map &, PopMap *); int call_population_genotypes(CSLocus *, PopMap *, map > &); int tally_progeny_haplotypes(CSLocus *, PopMap *, set &, int &, double &, string &); int map_specific_genotypes(map &, PopMap *, set &); int translate_genotypes(map &, map > &, map &, PopMap *, map &, set &); int automated_corrections(map &, set &, map &, vector > &, PopMap *); int check_uncalled_snps(CSLocus *, Locus *, Datum *); int call_alleles(vector &, vector &, vector &); int check_homozygosity(vector &, int, char, char, string &); int manual_corrections(string, PopMap *); int correct_cp_markers_missing_alleles(set &, map &, PopMap *); int calc_segregation_distortion(map > &, map &, PopMap *, set &); double tally_generic_gtypes(int, PopMap *, set &, map &); double tally_translated_gtypes(int, PopMap *, set &, map &, map &); double chisq_test(map > &, map &, string, double); double chisq_pvalue(int, double); int export_gen_map(map &, PopMap *, set &, map &); int export_cp_map(map &, PopMap *, set &, map &); int export_bc1_map(map &, PopMap *, set &, map &); int export_dh_map(map &, PopMap *, set &, map &); int export_f2_map(map &, PopMap *, set &, map &); int write_generic(map &, PopMap *, map &, set &, bool); int write_sql(map &, PopMap *, set &); int write_joinmap(map &, PopMap *, map &, map &, set &); int write_onemap(map &, PopMap *, map &, map &, set &); int write_onemap_mapmaker(map &, PopMap *, map &, map &, set &); int write_rqtl(map &, PopMap *, map &, map &, set &); int write_genomic(map &, PopMap *); bool hap_compare(pair, pair); #endif // __GENOTYPES_H__ stacks-1.35/src/gzFasta.h000644 000765 000024 00000012211 12335173442 016042 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __GZFASTA_H__ #define __GZFASTA_H__ #ifdef HAVE_LIBZ #include #include #include "input.h" class GzFasta: public Input { gzFile gz_fh; string buf; public: GzFasta(const char *path) : Input() { this->gz_fh = gzopen(path, "rb"); if (!this->gz_fh) { cerr << "Failed to open gzipped file '" << path << "': " << strerror(errno) << ".\n"; exit(EXIT_FAILURE); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(this->gz_fh, libz_buffer_size); #endif }; GzFasta(string path) : Input() { this->gz_fh = gzopen(path.c_str(), "rb"); if (!this->gz_fh) { cerr << "Failed to open gzipped file '" << path << "': " << strerror(errno) << ".\n"; exit(EXIT_FAILURE); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(this->gz_fh, libz_buffer_size); #endif }; ~GzFasta() { gzclose(this->gz_fh); }; Seq *next_seq(); int next_seq(Seq &); }; Seq *GzFasta::next_seq() { // // Check the contents of the line buffer. When we finish reading a FASTA record // the buffer will either contain whitespace or the header of the next FAST // record. // while (this->line[0] != '>' && !gzeof(this->gz_fh)) { gzgets(this->gz_fh, this->line, max_len); } if (gzeof(this->gz_fh)) { return NULL; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; // // Initialize the Seq structure and store the FASTA ID // Seq *s = new Seq; s->id = new char[len + 1]; strcpy(s->id, this->line + 1); // // Read the sequence from the file -- keep reading lines until we reach the next // record or the end of file. // gzgets(this->gz_fh, this->line, max_len); while (this->line[0] != '>' && !gzeof(this->gz_fh)) { len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; this->buf += this->line; gzgets(this->gz_fh, this->line, max_len); } if (gzeof(this->gz_fh)) { len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; this->buf += this->line; } s->seq = new char[this->buf.length() + 1]; strcpy(s->seq, this->buf.c_str()); this->buf.clear(); return s; } int GzFasta::next_seq(Seq &s) { // // Check the contents of the line buffer. When we finish reading a FASTA record // the buffer will either contain whitespace or the header of the next FAST // record. // while (this->line[0] != '>' && !gzeof(this->gz_fh)) { gzgets(this->gz_fh, this->line, max_len); } if (gzeof(this->gz_fh)) { return 0; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; // // Store the FASTA ID // strcpy(s.id, this->line + 1); // // Read the sequence from the file -- keep reading lines until we reach the next // record or the end of file. // gzgets(this->gz_fh, this->line, max_len); while (this->line[0] != '>' && !gzeof(this->gz_fh)) { len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; this->buf += this->line; gzgets(this->gz_fh, this->line, max_len); } if (gzeof(this->gz_fh)) { len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; this->buf += this->line; } strcpy(s.seq, this->buf.c_str()); this->buf.clear(); return 1; } #else // If HAVE_LIBZ is undefined and zlib library is not present. #include "input.h" class GzFasta: public Input { public: GzFasta(const char *path) : Input() { cerr << "Gzip support was not enabled when Stacks was compiled.\n"; }; ~GzFasta() {}; Seq *next_seq() { return NULL; }; int next_seq(Seq &) { return 0; }; }; #endif // HAVE_LIBZ #endif // __GZFASTA_H__ stacks-1.35/src/gzFastq.h000644 000765 000024 00000014227 12335173442 016073 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __GZFASTQ_H__ #define __GZFASTQ_H__ #ifdef HAVE_LIBZ #include #include #include "input.h" class GzFastq: public Input { gzFile gz_fh; public: GzFastq(string path) : Input() { this->gz_fh = gzopen(path.c_str(), "rb"); if (!this->gz_fh) { cerr << "Failed to open gzipped file '" << path << "': " << strerror(errno) << ".\n"; exit(EXIT_FAILURE); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(this->gz_fh, libz_buffer_size); #endif }; GzFastq(const char *path) : Input() { this->gz_fh = gzopen(path, "rb"); if (!this->gz_fh) { cerr << "Failed to open gzipped file '" << path << "': " << strerror(errno) << ".\n"; exit(EXIT_FAILURE); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(this->gz_fh, libz_buffer_size); #endif }; ~GzFastq() { gzclose(this->gz_fh); }; Seq *next_seq(); int next_seq(Seq &s); }; Seq *GzFastq::next_seq() { char *res = NULL; // // Check the contents of the line buffer. When we finish reading a FASTQ record // the buffer will either contain whitespace or the header of the next FASTQ // record. // this->line[0] = '\0'; do { res = gzgets(this->gz_fh, this->line, max_len); } while (this->line[0] != '@' && res != NULL); if (res == NULL) { return NULL; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; // // Initialize the Seq structure and store the FASTQ ID // Seq *s = new Seq; s->id = new char[strlen(this->line) + 1]; strcpy(s->id, this->line + 1); // // Read the sequence from the file // gzgets(this->gz_fh, this->line, max_len); if (gzeof(this->gz_fh)) { return NULL; } len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; s->seq = new char[len + 1]; strcpy(s->seq, this->line); // // Read the repeat of the ID // this->line[0] = '\0'; res = gzgets(this->gz_fh, this->line, max_len); if (this->line[0] != '+' || res == NULL) { return NULL; } // // Read the quality score from the file // this->line[0] = '\0'; res = gzgets(this->gz_fh, this->line, max_len); if (res == NULL && strlen(this->line) == 0) { return NULL; } len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; s->qual = new char[len + 1]; strcpy(s->qual, this->line); // // Clear the line buffer so it is set up for the next record. If a '@' // appears in the quality scores read, it will break parsing next time // it is called. // this->line[0] = '\0'; return s; } int GzFastq::next_seq(Seq &s) { char *res = NULL; // // Check the contents of the line buffer. When we finish reading a FASTQ record // the buffer will either contain whitespace or the header of the next FASTQ // record. // this->line[0] = '\0'; do { res = gzgets(this->gz_fh, this->line, max_len); } while (this->line[0] != '@' && res != NULL); if (res == NULL) { return 0; } // // Check if there is a carraige return in the buffer // uint len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; // // Store the FASTQ ID // strcpy(s.id, this->line + 1); // // Read the sequence from the file // this->line[0] = '\0'; res = gzgets(this->gz_fh, this->line, max_len); if (res == NULL) { return 0; } len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; strcpy(s.seq, this->line); // // Read the repeat of the ID // this->line[0] = '\0'; res = gzgets(this->gz_fh, this->line, max_len); if (this->line[0] != '+' || res == NULL) { return 0; } // // Read the quality score from the file // this->line[0] = '\0'; res = gzgets(this->gz_fh, this->line, max_len); if (res == NULL && strlen(this->line) == 0) { return 0; } len = strlen(this->line); if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0'; if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0'; strcpy(s.qual, this->line); // // Clear the line buffer so it is set up for the next record. If a '@' // appears in the quality scores read, it will break parsing next time // it is called. // this->line[0] = '\0'; return 1; } #else // If HAVE_LIBZ is undefined and zlib library is not present. #include "input.h" class GzFastq: public Input { public: GzFastq(const char *path) : Input() { cerr << "Gzip support was not enabled when Stacks was compiled.\n"; }; GzFastq(string path) : Input() { cerr << "Gzip support was not enabled when Stacks was compiled.\n"; }; ~GzFastq() {}; Seq *next_seq() { return NULL; }; int next_seq(Seq &) { return 0; }; }; #endif // HAVE_LIBZ #endif // __GZFASTQ_H__ stacks-1.35/src/hstacks.cc000644 000765 000024 00000064320 12441417455 016254 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // hstacks -- find homologous stacks among a set of samples // // Match stacks between samples to identify homologous loci. Stacks // may contain masked sites (N's), resulting from using the fixed-model in // the ustacks program, or an explicit number of mismatches per tag may be // specified, allowing non-exact matching homologus sites to be identified // across a set of samples. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id$ // #include "hstacks.h" // Global variables to hold command-line options. string in_path; string out_path; int batch_id = 0; int num_threads = 1; int stack_depth_min = 1; int stack_dist = 0; int n_limit = 4; int main (int argc, char* argv[]) { parse_command_line(argc, argv); // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif vector input_files; vector::iterator in_file; map samples; build_file_list(in_path, input_files); int id = 0; for (in_file = input_files.begin(); in_file != input_files.end(); in_file++) { map sample; map::iterator it; size_t pos_1 = (*in_file).find_last_of("/"); size_t pos_2 = (*in_file).find_last_of("."); string sample_id = (*in_file).substr(pos_1 + 1, (pos_2 - pos_1 - 1)); bool compressed = false; load_loci(*in_file, sample, false, false, compressed); // // Give each locus a unique ID among all samples // for (it = sample.begin(); it != sample.end(); it++) { it->second->uniq_id = id; samples[id] = (*it).second; id++; } } // // Calculate distance between tags in different samples. // cerr << "Calculating distance between stacks...\n"; calc_kmer_distance(samples, stack_dist); // // Write out tags matched between samples. // write_homologous_loci(samples); return 0; } int calc_kmer_distance(map &loci, int stack_dist) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // CatKmerHashMap kmer_map; map::iterator it; vector >::iterator allele; HLocus *tag_1, *tag_2; int i, j; // // Calculate the number of k-mers we will generate. If kmer_len == 0, // determine the optimal length for k-mers. // int con_len = strlen(loci.begin()->second->con); int kmer_len = determine_kmer_length(con_len, stack_dist); int num_kmers = con_len - kmer_len + 1; // // Calculate the minimum number of matching k-mers required for a possible sequence match. // int min_hits = calc_min_kmer_matches(kmer_len, stack_dist, con_len, false); // // If more mismatches are allowed than can be handled by the k-mer algorithm, revert // to the simple, slow matching algorithm. // if (min_hits <= 0) { cerr << " Unable to use k-mer matching due to the number of allowed mismatches. Switching to slower algorithm...\n"; calc_distance(loci, stack_dist); return 0; } cerr << " Number of kmers per sequence: " << num_kmers << "\n"; populate_kmer_hash(loci, kmer_map, kmer_len); cerr << " " << loci.size() << " loci, " << kmer_map.size() << " elements in the kmer hash.\n"; // // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. // vector keys; for (it = loci.begin(); it != loci.end(); it++) keys.push_back(it->first); #pragma omp parallel private(i, j, tag_1, tag_2, allele) { #pragma omp for schedule(dynamic) for (i = 0; i < (int) keys.size(); i++) { tag_1 = loci[keys[i]]; for (allele = tag_1->strings.begin(); allele != tag_1->strings.end(); allele++) { vector kmers; generate_kmers(allele->second.c_str(), kmer_len, num_kmers, kmers); map > hits; vector >::iterator map_it; int d; // // Lookup the occurances of each k-mer in the kmer_map // for (j = 0; j < num_kmers; j++) { if (kmer_map.count(kmers[j]) > 0) for (map_it = kmer_map[kmers[j]].begin(); map_it != kmer_map[kmers[j]].end(); map_it++) hits[map_it->second].push_back(map_it->first); } // // Free the allocated k-mers. // for (j = 0; j < num_kmers; j++) delete [] kmers[j]; kmers.clear(); //cerr << " Tag " << tag_1->id << " hit " << hits.size() << " kmers.\n"; map >::iterator hit_it; vector::iterator all_it; // // Iterate through the list of hits. For each hit, total up the hits to the various alleles. // Any allele that has more than min_hits check its full length to verify a match. // for (hit_it = hits.begin(); hit_it != hits.end(); hit_it++) { //cerr << " Tag " << hit_it->first << " has " << hit_it->second << " hits (min hits: " << min_hits << ")\n"; map allele_cnts; map::iterator cnt_it; for (all_it = hit_it->second.begin(); all_it != hit_it->second.end(); all_it++) allele_cnts[*all_it]++; for (cnt_it = allele_cnts.begin(); cnt_it != allele_cnts.end(); cnt_it++) { //cerr << " allele " << cnt_it->first << " has " << cnt_it->second << " hits\n"; if (cnt_it->second < min_hits) continue; //cerr << " Match found, checking full-length match\n"; tag_2 = loci[hit_it->first]; d = dist(allele->second.c_str(), tag_2, cnt_it->first); if (d < 0) cerr << "Unknown error calculating distance between " << tag_1->id << " and " << tag_2->id << "; query allele: " << allele->first << "\n"; //cerr << " Distance: " << d << " CTAG_DIST: " << ctag_dist << "\n"; // // Add a match to the query sequence: catalog ID, catalog allele, query allele, distance // if (d <= stack_dist) { if (tag_1->depth < stack_depth_min || tag_2->depth < stack_depth_min) continue; tag_1->add_match(tag_2->uniq_id, d); } } } } // Sort the vector of distances. sort(tag_1->matches.begin(), tag_1->matches.end(), compare_mdist); } } return 0; } int populate_kmer_hash(map &loci, CatKmerHashMap &kmer_map, int kmer_len) { map::iterator it; vector >::iterator allele; vector kmers; HLocus *tag; char *hash_key; bool exists; int j; // // Break each stack down into k-mers and create a hash map of those k-mers // recording in which sequences they occur. // int num_kmers = strlen(loci.begin()->second->con) - kmer_len + 1; for (it = loci.begin(); it != loci.end(); it++) { tag = it->second; // // Iterate through the possible Loci alleles // for (allele = tag->strings.begin(); allele != tag->strings.end(); allele++) { // // Generate and Hash the kmers for this allele string // generate_kmers(allele->second.c_str(), kmer_len, num_kmers, kmers); for (j = 0; j < num_kmers; j++) { exists = kmer_map.count(kmers[j]) == 0 ? false : true; if (exists) { hash_key = kmers[j]; } else { hash_key = new char [strlen(kmers[j]) + 1]; strcpy(hash_key, kmers[j]); } kmer_map[hash_key].push_back(make_pair(allele->first, tag->uniq_id)); } for (j = 0; j < num_kmers; j++) delete [] kmers[j]; kmers.clear(); } } //dump_kmer_map(kmer_map); return 0; } int calc_distance(map &loci, int utag_dist) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // map::iterator it; HLocus *tag_1, *tag_2; int i, j; cerr << "Calculating distance between stacks...\n"; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (it = loci.begin(); it != loci.end(); it++) keys.push_back(it->first); #pragma omp parallel private(i, j, tag_1, tag_2) { #pragma omp for schedule(dynamic) for (i = 0; i < (int) keys.size(); i++) { tag_1 = loci[keys[i]]; int d; for (j = 0; j < (int) keys.size(); j++) { tag_2 = loci[keys[j]]; // Don't compare tag_1 against itself. if (tag_1 == tag_2) continue; d = dist(tag_1, tag_2); // // Store the distance between these two sequences if it is // below the maximum distance. // if (d == utag_dist) { if (tag_1->depth < stack_depth_min || tag_2->depth < stack_depth_min) continue; tag_1->add_match(tag_2->uniq_id, d); } } // Sort the vector of distances. sort(tag_1->matches.begin(), tag_1->matches.end(), compare_mdist); } } return 0; } int dist(HLocus *tag_1, HLocus *tag_2) { int dist = 0; char *p = tag_1->con; char *q = tag_2->con; char *end = p + strlen(p); // Count the number of characters that are different // between the two sequences. Don't count wildcard 'N' // nucleotides. while (p < end) { dist += ((*p == *q) || (*q == 'N' || *p == 'N')) ? 0 : 1; p++; q++; } return dist; } bool compare_mdist(Match *a, Match *b) { return (a->dist < b->dist); } int call_consensus(map &loci, set &merge_list, string &consensus, vector &snps, vector &alleles) { // // Create a two-dimensional array, each row containing one read. For // each unique tag that has been merged together, add the sequence for // that tag into our array as many times as it originally occurred. // HLocus *tag; set::iterator j; vector reads; for (j = merge_list.begin(); j != merge_list.end(); j++) { tag = loci[*j]; reads.push_back(tag->con); } // // Iterate over each column of the array and call the consensus base. // int row, col; int length = strlen(reads[0]); int height = reads.size(); char *base; for (col = 0; col < length; col++) { vector > nuc; nuc.push_back(make_pair('A', 0)); nuc.push_back(make_pair('C', 0)); nuc.push_back(make_pair('G', 0)); nuc.push_back(make_pair('T', 0)); for (row = 0; row < height; row++) { base = reads[row]; base = base + col; //cerr << " Row: " << row << " Col: " << col << " Base: " << *base << "\n"; switch(*base) { case 'A': nuc[0].second++; break; case 'C': nuc[1].second++; break; case 'G': nuc[2].second++; break; case 'T': nuc[3].second++; break; default: break; } } //cerr << "A: " << nuc[0].second << " C: " << nuc[1].second << " G: " << nuc[2].second << " T: " << nuc[3].second << "\n"; // // Find the base with a plurality of occurances and call it. // sort(nuc.begin(), nuc.end(), compare_pair); consensus += nuc[0].second > 0 ? nuc[0].first : 'N'; // // If the nucleotides are not fixed record a SNP. // if (nuc[0].second > 0 && nuc[1].second > 0) { SNP *s = new SNP; s->col = col; s->lratio = 0; s->rank_1 = nuc[0].first; s->rank_2 = nuc[1].first; snps.push_back(s); } } if (!call_alleles(reads, snps, alleles)) { cerr << "Error calling alleles.\n"; exit(0); } return 0; } int call_alleles(vector &reads, vector &snps, vector &alleles) { int row; int height = reads.size(); string allele; char *base; vector::iterator snp; if (snps.size() == 0) return 1; for (row = 0; row < height; row++) { allele.clear(); for (snp = snps.begin(); snp != snps.end(); snp++) { base = reads[row]; base = base + (*snp)->col; allele += *base; } if (allele.size() == snps.size()) alleles.push_back(allele); else return 0; } return 1; } int write_homologous_loci(map &samples) { map::iterator i; vector::iterator k; set write_map; HLocus *tag_1, *tag_2; cerr << "Writing homologous stacks...\n"; // // Parse the input file name to create the output files // stringstream prefix; string out_file; prefix << out_path << "batch_" << batch_id; // Open the output files for writing. out_file = prefix.str() + ".homologous.nucs.tsv"; ofstream nuc_file(out_file.c_str()); out_file = prefix.str() + ".homologous.snps.tsv"; ofstream snp_file(out_file.c_str()); out_file = prefix.str() + ".homologous.alleles.tsv"; ofstream all_file(out_file.c_str()); out_file = prefix.str() + ".homologous.matches.tsv"; ofstream mat_file(out_file.c_str()); out_file = prefix.str() + ".homologous.tags.tsv"; ofstream tag_file(out_file.c_str()); int id = 1; for (i = samples.begin(); i != samples.end(); i++) { tag_1 = i->second; // // This tag may already have been merged by an earlier operation. // if (write_map.find(tag_1->uniq_id) != write_map.end()) continue; set unique_merge_list; set unique_alleles; set::iterator it; trace_stack_graph(tag_1, samples, unique_merge_list); // // Call the consensus for this locus and identify SNPs and associated alleles. // string consensus; vector snps; vector alleles; call_consensus(samples, unique_merge_list, consensus, snps, alleles); // // Output the consensus tag for a locus in this sample. // tag_file << "0" << "\t" << batch_id << "\t" << id << "\t" << tag_1->loc.chr << "\t" << tag_1->loc.bp << "\t" << "consensus" << "\t" << 0 << "\t" << "" << "\t" << consensus << "\t" << 0 << "\t" << // These flags are unused in hstacks, but important in ustacks 0 << "\t" << 0 << "\n"; // // Output the SNPs and alleles // string allele; vector::iterator s; set::iterator u; for (s = snps.begin(); s != snps.end(); s++) snp_file << "0" << "\t" << batch_id << "\t" << id << "\t" << (*s)->col << "\t" << (*s)->lratio << "\t" << (*s)->rank_1 << "\t" << (*s)->rank_2 << "\n"; for (uint a = 0; a < alleles.size(); a++) unique_alleles.insert(alleles[a]); for (u = unique_alleles.begin(); u != unique_alleles.end(); u++) all_file << "0" << "\t" << batch_id << "\t" << id << "\t" << *u << "\t" << 0 << "\t" << 0 << "\n"; unique_alleles.clear(); int sub_id = 0; int a = 0; for (it = unique_merge_list.begin(); it != unique_merge_list.end(); it++) { tag_2 = samples[(*it)]; // Record the nodes that have been merged in this round. write_map.insert(tag_2->uniq_id); // // For each tag we are outputting, output the depth of coverage for each // nucleotide in that stack separately (in order to calculate correlations // between depth of coverage and fixed/non-fixed nucleotides. // if (unique_merge_list.size() > 1) { char *p, *end; end = tag_2->con + strlen(tag_2->con); for (p = tag_2->con; p < end; p++) nuc_file << tag_2->sample_id << "_" << tag_2->id << "\t" << *p << "\t" << tag_2->depth << "\n"; } // // Output the consensus sequenes for all homologous loci. // tag_file << "0" << "\t" << batch_id << "\t" << id << "\t" << tag_2->loc.chr << "\t" << tag_2->loc.bp << "\t" << "primary" << "\t" << sub_id << "\t" << tag_2->sample_id << "_" << tag_2->id << "\t" << tag_2->con << "\t" << "" << "\t" << // These flags are unused in hstacks, but important in ustacks "" << "\t" << "" << "\n"; allele = (alleles.size() == 0) ? "consensus" : alleles[a]; mat_file << "0" << "\t" << batch_id << "\t" << id << "\t" << tag_2->sample_id << "\t" << tag_2->uniq_id << "\t" << allele << "\n"; sub_id++; a++; } id++; } tag_file.close(); nuc_file.close(); snp_file.close(); all_file.close(); mat_file.close(); cerr << " Wrote " << id - 1 << " tags to " << out_file << "\n"; return 0; } int trace_stack_graph(HLocus *tag_1, map &loci, set &unique_merge_list) { queue merge_list; pair::iterator,bool> ret; vector::iterator k; HLocus *tag_2; unique_merge_list.insert(tag_1->uniq_id); merge_list.push(tag_1->uniq_id); while (!merge_list.empty()) { tag_2 = loci[merge_list.front()]; merge_list.pop(); for (k = tag_2->matches.begin(); k != tag_2->matches.end(); k++) { ret = unique_merge_list.insert((*k)->cat_id); // // If this Tag has not already been added to the merge list (i.e. we were able // to insert it in to our unique_merge_list, which is a set), add it for consideration // later in the loop. // if (ret.second == true) merge_list.push((*k)->cat_id); } } return 0; } int build_file_list(string in_path, vector &sql_files) { struct dirent *dirp; string d; DIR *dp = opendir(in_path.c_str()); if (dp == NULL) { cerr << "Error (" << errno << ") opening " << in_path << "\n"; return errno; } while ((dirp = readdir(dp)) != NULL) { d = string(dirp->d_name); if (d.find("tags.tsv") != string::npos && d.find("batch") == string::npos) { size_t pos = d.find(".tags.tsv"); d = in_path + d.substr(0, pos); sql_files.push_back(d); } } closedir(dp); cerr << "Identified " << sql_files.size() << " samples.\n"; return 0; } HLocus::~HLocus() { vector::iterator it; for (it = this->matches.begin(); it != this->matches.end(); it++) delete *it; } int HLocus::add_match(int id, int distance) { Match *m = new Match; m->cat_id = id; m->dist = distance; this->matches.push_back(m); return 0; } int HLocus::populate_alleles() { this->strings.clear(); string s; int j; vector::iterator s_it; map::iterator a; if (this->strings.size() == 0) this->strings.push_back(make_pair("consensus", this->con)); else for (a = this->alleles.begin(); a != this->alleles.end(); a++) { s = this->con; j = 0; for (s_it = this->snps.begin(); s_it != this->snps.end(); s_it++) { s.replace((*s_it)->col, 1, 1, a->first[j]); j++; } this->strings.push_back(make_pair(a->first, s)); } // // Count the number of N's // vector col; char *p = this->con; int i = 0; while (*p != '\0') { if (*p == 'N') col.push_back(i); i++; p++; } int n_cnt = col.size(); if (n_cnt == 0) return 0; // // If there are too many Ns in this stack, do not include it in the // search. // if (n_cnt > n_limit) { this->strings.clear(); return 0; } // // Generate all permutations of strings for n_cnt N's // if (pstrings.count(n_cnt) == 0) generate_permutations(pstrings, n_cnt); vector > new_strings; vector >::iterator k; vector::iterator c; char *q, **r; int n = (int) pow(4, n_cnt); for (k = this->strings.begin(); k != this->strings.end(); k++) { for (i = 0; i < n; i++) { r = pstrings[n_cnt]; q = r[i]; s = k->second; j = 0; for (c = col.begin(); c != col.end(); c++) { //cerr << "Str: " << s << "; rep str: " << q << "; replacing col: " << *c << " with '" << q[j] << "'\n"; s.replace(*c, 1, 1, q[j]); j++; } new_strings.push_back(make_pair(k->first, s)); } } this->strings.clear(); for (k = new_strings.begin(); k != new_strings.end(); k++) this->strings.push_back(*k); return 0; } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"stack_dist", required_argument, NULL, 'n'}, {"depth_min", required_argument, NULL, 'm'}, {"inpath", required_argument, NULL, 'p'}, {"outpath", required_argument, NULL, 'o'}, {"n_limit", required_argument, NULL, 'N'}, {"batch_id", required_argument, NULL, 'b'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvi:p:o:b:e:m:n:N:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'v': version(); break; case 'i': in_path = optarg; break; case 'o': out_path = optarg; break; case 'b': batch_id = atoi(optarg); break; case 'N': n_limit = atoi(optarg); break; case 'm': stack_depth_min = atoi(optarg); break; case 'n': stack_dist = atoi(optarg); break; case 'p': num_threads = atoi(optarg); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (in_path.length() == 0) { cerr << "You must specify a path to a set of input files.\n"; help(); } if (in_path.at(in_path.length() - 1) != '/') in_path += "/"; if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; return 0; } void version() { std::cerr << "hstacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "hstacks " << VERSION << "\n" << "hstacks -i path [-o path] [-b batch_id] [-n mismatches] [-m min] [-p min_threads] [-N limit] [-h]" << "\n" << " i: path to the set of SQL files from which to load loci." << "\n" << " o: output path to write results." << "\n" << " b: SQL Batch ID to insert into the output to identify a group of samples." << "\n" << " m: minimum stack depth required for a locus to be included in the search." << "\n" << " n: number of mismatches to allow between stacks." << "\n" << " N: number of 'N' characters to allow in a stack (default: 4)." << "\n" << " p: enable parallel execution with num_threads threads.\n" << " h: display this help messsage." << "\n\n"; exit(0); } stacks-1.35/src/hstacks.h000644 000765 000024 00000005326 12335173442 016114 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __HSTACKS_H__ #define __HSTACKS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include #include #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include #include using std::ofstream; using std::stringstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::queue; #include using std::set; #include #include "constants.h" #include "stacks.h" #include "locus.h" #include "kmers.h" #include "models.h" #include "sql_utilities.h" // // A map holding k-mer permutation strings. For use when generating fuzzy k-mers. // map pstrings; // // Homologous Locus Class // class HLocus : public Locus { public: int uniq_id; // An ID that is unique among all samples in this analysis. vector matches; // Matching tags found for the catalog. HLocus(): Locus() {} ~HLocus(); int populate_alleles(); int add_match(int, int); }; void help( void ); void version( void ); int parse_command_line(int, char**); int build_file_list(string, vector &); int calc_kmer_distance(map &, int); int calc_distance(map &, int); int dist(HLocus *, HLocus *); int write_homologous_loci(map &); int trace_stack_graph(HLocus *, map &, set &); int call_consensus(map &, set &, string &, vector &, vector &); int call_alleles(vector &, vector &, vector &); int populate_kmer_hash(map &, CatKmerHashMap &, int); bool compare_mdist(Match *, Match *); bool compare_pair(pair, pair); #endif // __HSTACKS_H__ stacks-1.35/src/input.cc000644 000765 000024 00000012156 12533677757 015772 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // input.cc -- routines to read various formats of data into the XXX data structure. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id$ // #include "input.h" Seq::Seq() { this->id = NULL; this->seq = NULL; this->qual = NULL; this->loc_str = NULL; } Seq::Seq(const char *id, const char *seq) { this->id = new char[strlen(id) + 1]; this->seq = new char[strlen(seq) + 1]; this->qual = NULL; this->loc_str = NULL; strcpy(this->id, id); strcpy(this->seq, seq); } Seq::Seq(const char *id, const char *seq, const char *qual) { this->id = new char[strlen(id) + 1]; this->seq = new char[strlen(seq) + 1]; this->qual = new char[strlen(qual) + 1]; this->loc_str = NULL; strcpy(this->id, id); strcpy(this->seq, seq); strcpy(this->qual, qual); } Seq::Seq(const char *id, const char *seq, const char *qual, const char *chr, uint bp, strand_type strand) { this->id = new char[strlen(id) + 1]; this->qual = new char[strlen(qual) + 1]; this->loc_str = new char[strlen(chr) + 15]; strcpy(this->id, id); strcpy(this->qual, qual); this->loc.set(chr, bp, strand); sprintf(this->loc_str, "%s|%d|%c", chr, bp, strand == plus ? '+' : '-'); // // Reverse complement sequences from the negative strand // if (strand == plus) { this->seq = new char[strlen(seq) + 1]; strcpy(this->seq, seq); } else { this->seq = rev_comp(seq); } } Input::Input() { memset(this->line, '\0', max_len); } Input::Input(const char *path) { memset(this->line, '\0', max_len); this->path = string(path); // // Open the file for reading // this->fh.open(path, ifstream::in); if (this->fh.fail()) cerr << "Error opening input file '" << path << "'\n"; } Input::~Input() { // Close the file this->fh.close(); } int parse_tsv(const char *line, vector &parts) { const char *p, *q; string part; parts.clear(); p = line; do { for (q = p; *q != '\t' && *q != '\0'; q++); if (q - p == 0) part = ""; else part.assign(p, (q - p)); parts.push_back(part); p = q + 1; } while (*q != '\0'); //for (size_t i = 0; i < parts.size(); i++) // cerr << "Parts[" << i << "]: " << parts[i].c_str() << "\n"; //cerr << "\n"; return 0; } int parse_ssv(const char *line, vector &parts) { const char *p, *q; string part; parts.clear(); p = line; do { for (q = p; *q != ' ' && *q != '\0'; q++); if (q - p == 0) part = ""; else part.assign(p, (q - p)); parts.push_back(string(part)); p = q + 1; } while (*q != '\0'); return 0; } int read_line(ifstream &fh, char **line, int *size) { char buf[max_len]; int blen, llen; memset(*line, 0, *size); llen = 0; // // Make sure we read the entire line. // do { fh.clear(); fh.getline(buf, max_len); blen = strlen(buf); if (blen + llen <= (*size) - 1) { strcat(*line, buf); llen += blen; } else { *size *= 2; llen += blen; *line = (char *) realloc(*line, *size); strcat(*line, buf); } } while (fh.fail() && !fh.bad() && !fh.eof()); if (fh.eof() || fh.bad()) return 0; return 1; } int read_gzip_line(gzFile &fh, char **line, int *size) { char buf[max_len]; int blen, llen; bool eol; memset(*line, 0, *size); llen = 0; eol = false; // // Make sure we read the entire line. // do { if (gzgets(fh, buf, max_len) == NULL) break; blen = strlen(buf); if (blen > 0 && buf[blen - 1] == '\n') { eol = true; buf[blen - 1] = '\0'; } if (blen + llen <= (*size) - 1) { strcat(*line, buf); llen += blen; } else { *size *= 2; llen += blen; *line = (char *) realloc(*line, *size); strcat(*line, buf); } } while (!gzeof(fh) && !eol); if (gzeof(fh)) return 0; return 1; } bool is_comment(const char *line) { const char *p = line; while (*p != '\0') switch(*p) { case '#': return true; break; case ' ': case '\t': p++; break; default: return false; break; } return false; } stacks-1.35/src/input.h000644 000765 000024 00000004225 12533677757 015632 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __INPUT_H__ #define __INPUT_H__ #include #include #include #include #include #include #include #include using std::ifstream; using std::string; using std::vector; using std::cin; using std::cout; using std::cerr; using std::endl; #include "constants.h" #include "utils.h" #include "stacks.h" typedef unsigned int uint; class Seq { public: char *id; char *seq; char *qual; // Location information for a mapped sequence char *loc_str; PhyLoc loc; Seq( void ); Seq(const char *, const char *); Seq(const char *, const char *, const char *); Seq(const char *, const char *, const char *, const char *, uint, strand_type); ~Seq( void ) { delete[] id; delete[] seq; delete[] qual; delete[] loc_str; } }; // // The base class for all of our Input classes, such as Tsv, Fastq, Fasta, etc. // class Input { public: string path; ifstream fh; char line[max_len]; Input(); Input(const char *path); virtual ~Input(); virtual Seq *next_seq() = 0; virtual int next_seq(Seq &) = 0; }; int parse_tsv(const char *, vector &); int parse_ssv(const char *, vector &); int read_line(ifstream &, char **, int *); int read_gzip_line(gzFile &, char **, int *); bool is_comment(const char *); #endif // __INPUT_H__ stacks-1.35/src/kmer_filter.cc000644 000765 000024 00000145242 12571641525 017123 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // kmer_filter -- // #include "kmer_filter.h" // // Global variables to hold command-line options. // FileT in_file_type = FileT::unknown; FileT out_file_type = FileT::fastq; vector in_files; vector in_pair_files; string in_path; string out_path; string k_freq_path; bool filter_rare_k = false; bool filter_abundant_k = false; bool normalize = false; bool discards = false; bool write_k_freq = false; bool read_k_freq = false; bool kmer_distr = false; bool ill_barcode = false; uint truncate_seq = 0; int transition_lim = 3; int normalize_lim = 0; int kmer_len = 15; int max_k_freq = 20000; double max_k_pct = 0.80; double min_k_pct = 0.15; int min_lim = 0; int max_lim = 0; int num_threads = 1; int barcode_size = 0; int main (int argc, char* argv[]) { parse_command_line(argc, argv); if (min_lim == 0) min_lim = (int) round((double) kmer_len * 0.80); cerr << "Using a kmer size of " << kmer_len << "\n"; if (filter_rare_k) { cerr << "Filtering out reads by identifying rare kmers: On.\n" << " A kmer is considered rare when its coverage is at " << min_k_pct * 100 << "% or below the median kmer coverage for the read.\n" << " A read is dropped when it contains " << min_lim << " or more rare kmers in a row.\n"; } else cerr << "Filtering out reads by identifying rare kmers: Off.\n"; if (filter_abundant_k) { cerr << "Filtering out reads by identifying abundant kmers: On.\n" << " Kmer is considered abundant when it occurs " << max_k_freq << " or more times.\n"; if (max_lim == 0) cerr << " A read is dropped when it contains " << max_k_pct * 100 << "% or more abundant kmers.\n"; else cerr << " A read is dropped when it contains " << max_lim << " or more abundant kmers.\n"; } else cerr << "Filtering out reads by identifying abundant kmers: Off.\n"; if (normalize) cerr << "Normalizing read depth: On.\n" << " Read depth limit: " << normalize_lim << "x\n"; else cerr << "Normalizing read depth: Off.\n"; vector > files, pair_files; map > counters; SeqKmerHash kmers; vector kmers_keys; build_file_list(in_files, files); cerr << "Found " << files.size() << " input file(s).\n"; build_file_list(in_pair_files, pair_files); cerr << "Found " << pair_files.size() << " paired input file(s).\n"; if (filter_rare_k || filter_abundant_k || kmer_distr || write_k_freq) { cerr << "Generating kmer distribution...\n"; if (read_k_freq) read_kmer_freq(k_freq_path, kmers, kmers_keys); else populate_kmers(pair_files, files, kmers, kmers_keys); // double kmer_med, kmer_mad; // calc_kmer_median(kmers, kmer_med, kmer_mad); // cerr << "Median kmer frequency: " << kmer_med << "; median absolute deviation: " << kmer_mad << "\n"; if (kmer_distr) { generate_kmer_dist(kmers); if (write_k_freq == false) exit(0); } if (write_k_freq) { write_kmer_freq(k_freq_path, kmers); exit(0); } cerr << "Filtering reads by kmer frequency...\n"; for (uint i = 0; i < pair_files.size(); i += 2) { cerr << "Processing paired file " << i+1 << " of " << (pair_files.size() / 2) << " [" << pair_files[i].second << "]\n"; counters[pair_files[i].second]["total"] = 0; counters[pair_files[i].second]["retained"] = 0; counters[pair_files[i].second]["rare_k"] = 0; counters[pair_files[i].second]["abundant_k"] = 0; process_paired_reads(pair_files[i].first, pair_files[i].second, pair_files[i+1].first, pair_files[i+1].second, kmers, counters[pair_files[i].second]); cerr << " " << counters[pair_files[i].second]["total"] << " total reads; " << "-" << counters[pair_files[i].second]["rare_k"] << " rare k-mer reads; " << "-" << counters[pair_files[i].second]["abundant_k"] << " abundant k-mer reads; " << counters[pair_files[i].second]["retained"] << " retained reads.\n"; } for (uint i = 0; i < files.size(); i++) { cerr << "Processing file " << i+1 << " of " << files.size() << " [" << files[i].second << "]\n"; counters[files[i].second]["total"] = 0; counters[files[i].second]["retained"] = 0; counters[files[i].second]["rare_k"] = 0; counters[files[i].second]["abundant_k"] = 0; process_reads(files[i].first, files[i].second, kmers, counters[files[i].second]); cerr << " " << counters[files[i].second]["total"] << " total reads; " << "-" << counters[files[i].second]["rare_k"] << " rare k-mer reads; " << "-" << counters[files[i].second]["abundant_k"] << " abundant k-mer reads; " << counters[files[i].second]["retained"] << " retained reads.\n"; } free_kmer_hash(kmers, kmers_keys); print_results(counters); } if (normalize) { cerr << "Normalizing read depth...\n"; // // Add the remainder files from the previous step to the queue. // if (filter_rare_k || filter_abundant_k) { string file; int pos; for (uint i = 0; i < pair_files.size(); i += 2) { file = pair_files[i].second; pos = file.find_last_of("."); if (file.substr(pos - 2, 2) == ".1") pos -= 2; file = file.substr(0, pos) + ".rem.fil"; file += out_file_type == FileT::fastq ? ".fq" : ".fa"; cerr << "Adding remainder file generated in previous step to queue, '" << file << "\n"; files.push_back(make_pair(pair_files[i].first, file)); } } for (uint i = 0; i < pair_files.size(); i += 2) { cerr << "Processing paired files " << i+1 << " of " << (pair_files.size() / 2) << " [" << pair_files[i].second << " / " << pair_files[i+1].second << "]\n"; counters[pair_files[i].second]["total"] = 0; counters[pair_files[i].second]["retained"] = 0; counters[pair_files[i].second]["overep"] = 0; normalize_paired_reads(pair_files[i].first, pair_files[i].second, pair_files[i+1].first, pair_files[i+1].second, kmers, kmers_keys, counters[pair_files[i].second]); cerr << " " << counters[pair_files[i].second]["total"] << " total reads; " << "-" << counters[pair_files[i].second]["overep"] << " over-represented reads; " << counters[pair_files[i].second]["retained"] << " retained reads.\n"; } for (uint i = 0; i < files.size(); i++) { cerr << "Processing file " << i+1 << " of " << files.size() << " [" << files[i].second << "]\n"; counters[files[i].second]["total"] = 0; counters[files[i].second]["retained"] = 0; counters[files[i].second]["overep"] = 0; normalize_reads(files[i].first, files[i].second, kmers, kmers_keys, counters[files[i].second]); cerr << " " << counters[files[i].second]["total"] << " total reads; " << "-" << counters[files[i].second]["overep"] << " over-represented reads; " << counters[files[i].second]["retained"] << " retained reads.\n"; } free_kmer_hash(kmers, kmers_keys); } return 0; } int process_paired_reads(string in_path_1, string in_file_1, string in_path_2, string in_file_2, SeqKmerHash &kmers, map &counter) { Input *fh_1, *fh_2; ofstream *discard_fh_1, *discard_fh_2; int pos; string path; string path_1 = in_path_1 + in_file_1; string path_2 = in_path_2 + in_file_2; if (in_file_type == FileT::fastq) { fh_1 = new Fastq(path_1); fh_2 = new Fastq(path_2); } else if (in_file_type == FileT::fasta) { fh_1 = new Fasta(path_1); fh_2 = new Fasta(path_2); } else if (in_file_type == FileT::gzfasta) { fh_1 = new GzFasta(path_1 + ".gz"); fh_2 = new GzFasta(path_2 + ".gz"); } else if (in_file_type == FileT::gzfastq) { fh_1 = new GzFastq(path_1 + ".gz"); fh_2 = new GzFastq(path_2 + ".gz"); } else if (in_file_type == FileT::bustard) { fh_1 = new Bustard(path_1); fh_2 = new Bustard(path_2); } // // Open the output files. // pos = in_file_1.find_last_of("."); path = out_path + in_file_1.substr(0, pos) + ".fil" + in_file_1.substr(pos); ofstream *ofh_1 = new ofstream(path.c_str(), ifstream::out); if (ofh_1->fail()) { cerr << "Error opening filtered output file '" << path << "'\n"; exit(1); } pos = in_file_2.find_last_of("."); path = out_path + in_file_2.substr(0, pos) + ".fil" + in_file_2.substr(pos); ofstream *ofh_2 = new ofstream(path.c_str(), ifstream::out); if (ofh_2->fail()) { cerr << "Error opening filtered paired output file '" << path << "'\n"; exit(1); } pos = in_file_2.find_last_of("."); // // Pull the ".2" suffix off the paired file name to make the remainder file name. // if (in_file_2.substr(pos - 2, 2) == ".2") pos -= 2; path = out_path + in_file_2.substr(0, pos) + ".rem.fil"; path += out_file_type == FileT::fastq ? ".fq" : ".fa"; ofstream *rem_fh = new ofstream(path.c_str(), ifstream::out); if (rem_fh->fail()) { cerr << "Error opening filtered remainder output file '" << path << "'\n"; exit(1); } // // Open a file for recording discarded reads // if (discards) { pos = in_file_1.find_last_of("."); path = out_path + in_file_1.substr(0, pos) + ".discards" + in_file_1.substr(pos); discard_fh_1 = new ofstream(path.c_str(), ifstream::out); if (discard_fh_1->fail()) { cerr << "Error opening discard output file '" << path << "'\n"; exit(1); } pos = in_file_2.find_last_of("."); path = out_path + in_file_2.substr(0, pos) + ".discards" + in_file_2.substr(pos); discard_fh_2 = new ofstream(path.c_str(), ifstream::out); if (discard_fh_2->fail()) { cerr << "Error opening discard output file '" << path << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); Seq *s_2 = fh_2->next_seq(); if (s_1 == NULL || s_2 == NULL) { cerr << "Unable to allocate Seq object.\n"; exit(1); } int rare_k, abundant_k, num_kmers, max_kmer_lim; bool retain_1, retain_2; char *kmer = new char[kmer_len + 1]; long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read pair " << i << " \r"; counter["total"] += 2; stringstream msg_1, msg_2; retain_1 = true; retain_2 = true; num_kmers = strlen(s_1->seq) - kmer_len + 1; max_kmer_lim = max_lim == 0 ? (int) round((double) num_kmers * max_k_pct) : max_lim; // // Drop the first sequence if it has too many rare or abundant kmers. // kmer_lookup(kmers, s_1->seq, kmer, num_kmers, rare_k, abundant_k); if (filter_rare_k && rare_k > 0) { counter["rare_k"]++; retain_1 = false; msg_1 << "rare_k_" << rare_k; } if (retain_1 && filter_abundant_k && abundant_k > max_kmer_lim) { counter["abundant_k"]++; retain_1 = false; msg_1 << "abundant_k_" << abundant_k; } rare_k = 0; abundant_k = 0; num_kmers = strlen(s_2->seq) - kmer_len + 1; max_kmer_lim = max_lim == 0 ? (int) round((double) num_kmers * max_k_pct) : max_lim; // // Drop the second sequence if it has too many rare or abundant kmers. // kmer_lookup(kmers, s_2->seq, kmer, num_kmers, rare_k, abundant_k); if (filter_rare_k && rare_k > 0) { counter["rare_k"]++; retain_2 = false; msg_2 << "rare_k_" << rare_k; } if (retain_2 && filter_abundant_k && abundant_k > max_kmer_lim) { counter["abundant_k"]++; retain_2 = false; msg_2 << "abundant_k_" << abundant_k; } if (retain_1 && retain_2) { counter["retained"] += 2; out_file_type == FileT::fastq ? write_fastq(ofh_1, s_1) : write_fasta(ofh_1, s_1); out_file_type == FileT::fastq ? write_fastq(ofh_2, s_2) : write_fasta(ofh_2, s_2); } if (retain_1 && !retain_2) { counter["retained"]++; out_file_type == FileT::fastq ? write_fastq(rem_fh, s_1) : write_fasta(rem_fh, s_1); } if (!retain_1 && retain_2) { counter["retained"]++; out_file_type == FileT::fastq ? write_fastq(rem_fh, s_2) : write_fasta(rem_fh, s_2); } if (discards && !retain_1) out_file_type == FileT::fastq ? write_fastq(discard_fh_1, s_1, msg_1.str()) : write_fasta(discard_fh_1, s_1, msg_1.str()); if (discards && !retain_2) out_file_type == FileT::fastq ? write_fastq(discard_fh_2, s_2, msg_2.str()) : write_fasta(discard_fh_2, s_2, msg_2.str()); delete s_1; delete s_2; i++; } while ((s_1 = fh_1->next_seq()) != NULL && (s_2 = fh_2->next_seq()) != NULL); delete [] kmer; if (discards) { delete discard_fh_1; delete discard_fh_2; } // // Close the file and delete the Input object. // delete fh_1; delete fh_2; delete ofh_1; delete ofh_2; delete rem_fh; return 0; } int process_reads(string in_path, string in_file, SeqKmerHash &kmers, map &counter) { Input *fh; ofstream *discard_fh; int pos; string path = in_path + in_file; if (in_file_type == FileT::fastq) fh = new Fastq(path); else if (in_file_type == FileT::fasta) fh = new Fasta(path); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(path + ".gz"); else if (in_file_type == FileT::gzfasta) fh = new GzFasta(path + ".gz"); else if (in_file_type == FileT::bustard) fh = new Bustard(path); // // Open the output file. // pos = in_file.find_last_of("."); path = out_path + in_file.substr(0, pos) + ".fil" + in_file.substr(pos); ofstream *out_fh = new ofstream(path.c_str(), ifstream::out); if (out_fh->fail()) { cerr << "Error opening output file '" << path << "'\n"; exit(1); } // // Open a file for recording discarded reads // if (discards) { pos = in_file.find_last_of("."); path = out_path + in_file.substr(0, pos) + ".discards" + in_file.substr(pos); discard_fh = new ofstream(path.c_str(), ifstream::out); if (discard_fh->fail()) { cerr << "Error opening discard output file '" << path << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s = fh->next_seq(); if (s == NULL) { cerr << "Unable to allocate Seq object.\n"; exit(1); } int rare_k, abundant_k, num_kmers, max_kmer_lim; bool retain; char *kmer = new char[kmer_len + 1]; long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read " << i << " \r"; counter["total"]++; stringstream msg; // // Drop this sequence if it has too many rare or abundant kmers. // retain = true; num_kmers = strlen(s->seq) - kmer_len + 1; max_kmer_lim = max_lim == 0 ? (int) round((double) num_kmers * max_k_pct) : max_lim; kmer_lookup(kmers, s->seq, kmer, num_kmers, rare_k, abundant_k); if (filter_rare_k && rare_k > 0) { counter["rare_k"]++; retain = false; msg << "rare_k_" << rare_k; } if (retain && filter_abundant_k && abundant_k > max_kmer_lim) { counter["abundant_k"]++; retain = false; msg << "abundant_k_" << abundant_k; } if (retain) { counter["retained"]++; out_file_type == FileT::fastq ? write_fastq(out_fh, s) : write_fasta(out_fh, s); } if (discards && !retain) out_file_type == FileT::fastq ? write_fastq(discard_fh, s, msg.str()) : write_fasta(discard_fh, s, msg.str()); delete s; i++; } while ((s = fh->next_seq()) != NULL); delete [] kmer; if (discards) delete discard_fh; // // Close the file and delete the Input object. // delete fh; delete out_fh; return 0; } int normalize_paired_reads(string in_path_1, string in_file_1, string in_path_2, string in_file_2, SeqKmerHash &kmers, vector &kmer_keys, map &counter) { Input *fh_1, *fh_2; ofstream *discard_fh_1, *discard_fh_2; ofstream *ofh_1, *ofh_2, *rem_fh; string path_1, path_2; int pos; if (filter_abundant_k || filter_rare_k) { // // If we already filtered the data, open the files we created in the output // directory to normalize. // pos = in_file_1.find_last_of("."); path_1 = out_path + in_file_1.substr(0, pos) + ".fil" + in_file_1.substr(pos); pos = in_file_2.find_last_of("."); path_2 = out_path + in_file_2.substr(0, pos) + ".fil" + in_file_2.substr(pos); if (in_file_type == FileT::fastq) { fh_1 = new Fastq(path_1); fh_2 = new Fastq(path_2); } else if (in_file_type == FileT::gzfastq) { fh_1 = new Fastq(path_1); fh_2 = new Fastq(path_2); } else if (in_file_type == FileT::fasta) { fh_1 = new Fasta(path_1); fh_2 = new Fasta(path_2); } else if (in_file_type == FileT::gzfasta) { fh_1 = new Fasta(path_1); fh_2 = new Fasta(path_2); } } else { // // Otherwise, open unmodified files. // path_1 = in_path_1 + in_file_1; path_2 = in_path_2 + in_file_2; if (in_file_type == FileT::fastq) { fh_1 = new Fastq(path_1); fh_2 = new Fastq(path_2); } else if (in_file_type == FileT::gzfastq) { fh_1 = new GzFastq(path_1 + ".gz"); fh_2 = new GzFastq(path_2 + ".gz"); } else if (in_file_type == FileT::fasta) { fh_1 = new Fasta(path_1); fh_2 = new Fasta(path_2); } else if (in_file_type == FileT::gzfasta) { fh_1 = new GzFasta(path_1 + ".gz"); fh_2 = new GzFasta(path_2 + ".gz"); } else if (in_file_type == FileT::bustard) { fh_1 = new Bustard(path_1); fh_2 = new Bustard(path_2); } } // // Open the output files. // if (filter_abundant_k || filter_rare_k) { pos = in_file_1.find_last_of("."); path_1 = out_path + in_file_1.substr(0, pos) + ".fil.norm" + in_file_1.substr(pos); ofh_1 = new ofstream(path_1.c_str(), ifstream::out); if (ofh_1->fail()) { cerr << "Error opening normalized output file '" << path_1 << "'\n"; exit(1); } pos = in_file_2.find_last_of("."); path_2 = out_path + in_file_2.substr(0, pos) + ".fil.norm" + in_file_2.substr(pos); ofh_2 = new ofstream(path_2.c_str(), ifstream::out); if (ofh_2->fail()) { cerr << "Error opening normalized paired output file '" << path_2 << "'\n"; exit(1); } if (in_file_2.substr(pos - 2, 2) == ".2") pos -= 2; path_2 = out_path + in_file_2.substr(0, pos) + ".fil.norm.rem"; path_2 += out_file_type == FileT::fastq ? ".fq" : ".fa"; rem_fh = new ofstream(path_2.c_str(), ifstream::out); if (rem_fh->fail()) { cerr << "Error opening normalized remainder output file '" << path_2 << "'\n"; exit(1); } } else { pos = in_file_1.find_last_of("."); path_1 = out_path + in_file_1.substr(0, pos) + ".norm" + in_file_1.substr(pos); ofh_1 = new ofstream(path_1.c_str(), ifstream::out); if (ofh_1->fail()) { cerr << "Error opening normalized output file '" << path_1 << "'\n"; exit(1); } pos = in_file_2.find_last_of("."); path_2 = out_path + in_file_2.substr(0, pos) + ".norm" + in_file_2.substr(pos); ofh_2 = new ofstream(path_2.c_str(), ifstream::out); if (ofh_2->fail()) { cerr << "Error opening normalized paired output file '" << path_2 << "'\n"; exit(1); } if (in_file_2.substr(pos - 2, 2) == ".2") pos -= 2; path_2 = out_path + in_file_2.substr(0, pos) + ".norm.rem"; path_2 += out_file_type == FileT::fastq ? ".fq" : ".fa"; rem_fh = new ofstream(path_2.c_str(), ifstream::out); if (rem_fh->fail()) { cerr << "Error opening normalized remainder output file '" << path_2 << "'\n"; exit(1); } } // // Open a file for recording discarded reads // if (discards) { pos = in_file_1.find_last_of("."); if (filter_abundant_k || filter_rare_k) path_1 = out_path + in_file_1.substr(0, pos) + ".fil.discards" + in_file_1.substr(pos); else path_1 = out_path + in_file_1.substr(0, pos) + ".discards" + in_file_1.substr(pos); discard_fh_1 = new ofstream(path_1.c_str(), ifstream::out); if (discard_fh_1->fail()) { cerr << "Error opening discard output file '" << path_1 << "'\n"; exit(1); } pos = in_file_2.find_last_of("."); if (filter_abundant_k || filter_rare_k) path_2 = out_path + in_file_2.substr(0, pos) + ".fil.discards" + in_file_2.substr(pos); else path_2 = out_path + in_file_2.substr(0, pos) + ".discards" + in_file_2.substr(pos); discard_fh_2 = new ofstream(path_2.c_str(), ifstream::out); if (discard_fh_2->fail()) { cerr << "Error opening discard output file '" << path_1 << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); Seq *s_2 = fh_2->next_seq(); if (s_1 == NULL || s_2 == NULL) { cerr << "Unable to allocate Seq object.\n"; exit(1); } int num_kmers; bool retain_1, retain_2; char *kmer = new char[kmer_len + 1]; long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read pair " << i << " \r"; counter["total"] += 2; retain_1 = true; retain_2 = true; num_kmers = strlen(s_1->seq) - kmer_len + 1; // // Drop the first sequence if it has too many rare or abundant kmers. // retain_1 = normalize_kmer_lookup(kmers, s_1->seq, kmer, num_kmers, kmer_keys); num_kmers = strlen(s_2->seq) - kmer_len + 1; // // Drop the second sequence if it has too many rare or abundant kmers. // retain_2 = normalize_kmer_lookup(kmers, s_2->seq, kmer, num_kmers, kmer_keys); if (retain_1 && retain_2) { counter["retained"] += 2; out_file_type == FileT::fastq ? write_fastq(ofh_1, s_1) : write_fasta(ofh_1, s_1); out_file_type == FileT::fastq ? write_fastq(ofh_2, s_2) : write_fasta(ofh_2, s_2); } else { counter["overep"] +=2; } if (retain_1 && !retain_2) { counter["retained"]++; counter["overep"]++; out_file_type == FileT::fastq ? write_fastq(rem_fh, s_1) : write_fasta(rem_fh, s_1); } if (!retain_1 && retain_2) { counter["retained"]++; counter["overep"]++; out_file_type == FileT::fastq ? write_fastq(rem_fh, s_2) : write_fasta(rem_fh, s_2); } if (discards && !retain_1) out_file_type == FileT::fastq ? write_fastq(discard_fh_1, s_1) : write_fasta(discard_fh_1, s_1); if (discards && !retain_2) out_file_type == FileT::fastq ? write_fastq(discard_fh_2, s_2) : write_fasta(discard_fh_2, s_2); delete s_1; delete s_2; i++; } while ((s_1 = fh_1->next_seq()) != NULL && (s_2 = fh_2->next_seq()) != NULL); delete [] kmer; if (discards) { delete discard_fh_1; delete discard_fh_2; } // // Close the file and delete the Input object. // delete fh_1; delete fh_2; delete ofh_1; delete ofh_2; delete rem_fh; return 0; } int normalize_reads(string in_path, string in_file, SeqKmerHash &kmers, vector &kmer_keys, map &counter) { Input *fh; ofstream *discard_fh; string path; int pos = in_file.find_last_of("."); if (filter_abundant_k || filter_rare_k) { if (in_file.substr(pos - 4, 4) == ".fil") path = out_path + in_file; else path = out_path + in_file.substr(0, pos) + ".fil" + in_file.substr(pos); if (in_file_type == FileT::fastq) fh = new Fastq(path); else if (in_file_type == FileT::gzfastq) fh = new Fastq(path); else if (in_file_type == FileT::fasta) fh = new Fasta(path); else if (in_file_type == FileT::gzfasta) fh = new Fasta(path); else if (in_file_type == FileT::bustard) fh = new Bustard(path); } else { path = in_path + in_file; if (in_file_type == FileT::fastq) fh = new Fastq(path); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(path + ".gz"); else if (in_file_type == FileT::fasta) fh = new Fasta(path); else if (in_file_type == FileT::gzfasta) fh = new GzFasta(path + ".gz"); else if (in_file_type == FileT::bustard) fh = new Bustard(path); } // // Open the output file. // // if (filter_abundant_k || filter_rare_k) { // path = out_path + in_file.substr(0, pos) + ".norm" + in_file.substr(pos); // } else { // path = out_path + in_file.substr(0, pos) + ".norm" + in_file.substr(pos); // } path = out_path + in_file.substr(0, pos) + ".norm" + in_file.substr(pos); ofstream *out_fh = new ofstream(path.c_str(), ifstream::out); if (out_fh->fail()) { cerr << "Error opening normalized output file '" << path << "'\n"; exit(1); } // // Open a file for recording discarded reads // if (discards) { if (filter_abundant_k || filter_rare_k) path = out_path + in_file.substr(0, pos) + ".fil.discards" + in_file.substr(pos); else path = out_path + in_file.substr(0, pos) + ".discards" + in_file.substr(pos); discard_fh = new ofstream(path.c_str(), ifstream::out); if (discard_fh->fail()) { cerr << "Error opening discard output file '" << path << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s = fh->next_seq(); if (s == NULL) { cerr << "Unable to allocate Seq object.\n"; exit(1); } int num_kmers; bool retain; char *kmer = new char[kmer_len + 1]; long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read " << i << " \r"; counter["total"]++; // // Drop this sequence if it has too many rare or abundant kmers. // retain = true; num_kmers = strlen(s->seq) - kmer_len + 1; retain = normalize_kmer_lookup(kmers, s->seq, kmer, num_kmers, kmer_keys); if (retain) { counter["retained"]++; out_file_type == FileT::fastq ? write_fastq(out_fh, s) : write_fasta(out_fh, s); } else { counter["overep"]++; } if (discards && !retain) out_file_type == FileT::fastq ? write_fastq(discard_fh, s) : write_fasta(discard_fh, s); delete s; i++; } while ((s = fh->next_seq()) != NULL); delete [] kmer; if (discards) delete discard_fh; // // Close the file and delete the Input object. // delete fh; delete out_fh; return 0; } int populate_kmers(vector > &pair_files, vector > &files, SeqKmerHash &kmers, vector &kmers_keys) { // // Break each read down into k-mers and create a hash map of those k-mers // recording in which sequences they occur. // uint j = 1; uint cnt = files.size() + pair_files.size(); for (uint i = 0; i < files.size(); i++) { cerr << "Generating kmers from file " << j << " of " << cnt << " [" << files[i].second << "]\n"; process_file_kmers(files[i].first + files[i].second, kmers, kmers_keys); j++; } for (uint i = 0; i < pair_files.size(); i++) { cerr << "Generating kmers from file " << j << " of " << cnt << " [" << pair_files[i].second << "]\n"; process_file_kmers(pair_files[i].first + pair_files[i].second, kmers, kmers_keys); j++; } cerr << kmers.size() << " unique k-mers recorded.\n"; return 0; } int read_kmer_freq(string in_path, SeqKmerHash &kmer_map, vector &kmer_map_keys) { cerr << "Reading kmer frequencies from '" << in_path.c_str() << "'...\n"; ifstream fh(in_path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening rare kmer frequency input file '" << in_path << "'\n"; exit(1); } char *hash_key; bool exists; int len, cnt; char kmer[id_len]; char line[max_len]; vector parts; long i = 1; while (fh.good()) { if (i % 10000 == 0) cerr << " Processing kmer " << i << " \r"; fh.getline(line, max_len); len = strlen(line); if (len == 0) continue; // // Check that there is no carraige return in the buffer. // if (line[len - 1] == '\r') line[len - 1] = '\0'; // // Ignore comments // if (line[0] == '#') continue; // // Parse the kmer and the number of times it occurred // // parse_tsv(line, parts); if (parts.size() != 2) { cerr << "kmer frequencies are not formated correctly: expecting two, tab separated columns, found " << parts.size() << ".\n"; exit(0); } strcpy(kmer, parts[1].c_str()); cnt = is_integer(kmer); if (cnt < 0) { cerr << "Non integer found in second column.\n"; exit(0); } strcpy(kmer, parts[0].c_str()); exists = kmer_map.count(kmer) == 0 ? false : true; if (exists) { cerr << "Warning: kmer '" << kmer << "' already exists in the kmer hash map.\n"; hash_key = kmer; kmer_map[hash_key] += cnt; } else { hash_key = new char [strlen(kmer) + 1]; strcpy(hash_key, kmer); kmer_map_keys.push_back(hash_key); kmer_map[hash_key] = cnt; } i++; } fh.close(); cerr << kmer_map.size() << " unique k-mers read.\n"; kmer_len = strlen(kmer_map.begin()->first); cerr << "Setting kmer length to " << kmer_len << "bp.\n"; return 0; } int write_kmer_freq(string path, SeqKmerHash &kmer_map) { cerr << "Writing kmer frequencies to '" << path.c_str() << "'..."; ofstream out_fh(path.c_str(), ifstream::out); if (out_fh.fail()) { cerr << "Error opening rare kmer output file '" << path << "'\n"; exit(1); } SeqKmerHash::iterator i; out_fh << "# Kmer\tCount\n"; for (i = kmer_map.begin(); i != kmer_map.end(); i++) { out_fh << i->first << "\t" << i->second << "\n"; } out_fh.close(); cerr << "done.\n"; return 0; } int process_file_kmers(string path, SeqKmerHash &kmer_map, vector &kmer_map_keys) { vector kmers; char *hash_key; bool exists; int j; Input *fh = NULL; if (in_file_type == FileT::fastq) fh = new Fastq(path); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(path + ".gz"); else if (in_file_type == FileT::fasta) fh = new Fasta(path); else if (in_file_type == FileT::gzfasta) fh = new GzFasta(path + ".gz"); else if (in_file_type == FileT::bustard) fh = new Bustard(path.c_str()); // // Read in the first record, initializing the Seq object s. // Seq *s = fh->next_seq(); if (s == NULL) { cerr << "Unable to allocate Seq object.\n"; exit(1); } int num_kmers; char *kmer = new char [kmer_len + 1]; long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read " << i << " \r"; num_kmers = strlen(s->seq) - kmer_len + 1; // // Generate and hash the kmers for this raw read // kmer[kmer_len] = '\0'; for (j = 0; j < num_kmers; j++) { strncpy(kmer, s->seq + j, kmer_len); exists = kmer_map.count(kmer) == 0 ? false : true; if (exists) { hash_key = kmer; } else { hash_key = new char [kmer_len + 1]; strcpy(hash_key, kmer); kmer_map_keys.push_back(hash_key); } kmer_map[hash_key]++; } delete s; i++; } while ((s = fh->next_seq()) != NULL); delete [] kmer; // // Close the file and delete the Input object. // delete fh; return 0; } int generate_kmer_dist(SeqKmerHash &kmer_map) { SeqKmerHash::iterator i; map bins; cerr << "Generating kmer distribution...\n"; for (i = kmer_map.begin(); i != kmer_map.end(); i++) bins[i->second]++; map::iterator j; vector > sorted_kmers; for (j = bins.begin(); j != bins.end(); j++) sorted_kmers.push_back(make_pair(j->first, j->second)); cout << "KmerFrequency\tCount\n"; for (unsigned long k = 0; k < sorted_kmers.size(); k++) cout << sorted_kmers[k].first << "\t" << sorted_kmers[k].second << "\n"; return 0; } int calc_kmer_median(SeqKmerHash &kmers, double &kmer_med, double &kmer_mad) { kmer_med = 0.0; kmer_mad = 0.0; int num_kmers = kmers.size(); vector freqs, residuals; freqs.reserve(num_kmers); SeqKmerHash::iterator i; for (i = kmers.begin(); i != kmers.end(); i++) freqs.push_back(i->second); sort(freqs.begin(), freqs.end()); kmer_med = num_kmers % 2 == 0 ? (double) (freqs[num_kmers / 2 - 1] + freqs[num_kmers / 2]) / 2.0 : (double) freqs[num_kmers / 2 - 1]; // // Calculate the median absolute deviation. // residuals.reserve(num_kmers); for (int j = 0; j < num_kmers; j++) residuals.push_back(abs(freqs[j] - (int) kmer_med)); sort(residuals.begin(), residuals.end()); kmer_mad = num_kmers % 2 == 0 ? (double) (residuals[num_kmers / 2 - 1] + residuals[num_kmers / 2]) / 2.0 : (double) residuals[num_kmers / 2 - 1]; return 0; } int kmer_map_cmp(pair a, pair b) { return (a.second < b.second); } inline bool normalize_kmer_lookup(SeqKmerHash &kmer_map, char *read, char *kmer, int num_kmers, vector &kmer_keys) { kmer[kmer_len] = '\0'; int cnt = 0; bool retain = true; // // Generate kmers from this read, increment kmer frequency in dataset. // vector sorted_cnts; sorted_cnts.reserve(num_kmers); // cout << "# " << read << "\n"; for (int j = 0; j < num_kmers; j++) { strncpy(kmer, read + j, kmer_len); cnt = kmer_map.count(kmer) > 0 ? kmer_map[kmer] : 0; sorted_cnts.push_back(cnt); // cout << kmer << "\t" << j << "\t" << cnt << "\n"; } // // Calculate the median kmer frequency along the read. // sort(sorted_cnts.begin(), sorted_cnts.end()); double median = num_kmers % 2 == 0 ? (double) (sorted_cnts[num_kmers / 2 - 1] + sorted_cnts[num_kmers / 2]) / 2.0 : (double) sorted_cnts[num_kmers / 2 - 1]; // cout << "# median: " << median << "\n"; if (median > normalize_lim) retain = false; // // Generate and hash the kmers for this raw read // bool exists; char *hash_key; kmer[kmer_len] = '\0'; for (int j = 0; j < num_kmers; j++) { strncpy(kmer, read + j, kmer_len); exists = kmer_map.count(kmer) == 0 ? false : true; if (exists) { hash_key = kmer; } else { hash_key = new char [kmer_len + 1]; strcpy(hash_key, kmer); kmer_keys.push_back(hash_key); } kmer_map[hash_key]++; } return retain; } inline int kmer_lookup(SeqKmerHash &kmer_map, char *read, char *kmer, int num_kmers, int &rare_k, int &abundant_k) { // // Generate kmers from this read, lookup kmer frequency in dataset. // rare_k = 0; abundant_k = 0; kmer[kmer_len] = '\0'; int cnt = 0; vector cnts, sorted_cnts; cnts.reserve(num_kmers); sorted_cnts.reserve(num_kmers); // cout << "# " << read << "\n"; for (int j = 0; j < num_kmers; j++) { strncpy(kmer, read + j, kmer_len); cnt = kmer_map[kmer]; cnts.push_back(cnt); sorted_cnts.push_back(cnt); // cout << kmer << "\t" << j << "\t" << cnt << "\n"; if (cnt >= max_k_freq) abundant_k++; } // // Calculate the median kmer frequency along the read. // sort(sorted_cnts.begin(), sorted_cnts.end()); double median = num_kmers % 2 == 0 ? (double) (sorted_cnts[num_kmers / 2 - 1] + sorted_cnts[num_kmers / 2]) / 2.0 : (double) sorted_cnts[num_kmers / 2 - 1]; // cout << "# median: " << median << "\n"; double bound = round(median * min_k_pct); // cout << "# kmer cov bound: " << bound << "\n"; // // Look for runs of rare kmers. // // We will slide a window across the read, f represents the front of the window, b // represents the back. Each time a kmer is below the bound we will increment run_cnt, // which represents the number of kmers in the window below the bound. If 2/3 of the // kmers in the window go below the bound, assume a sequencing error has occurred. // int run_cnt = 0; int b = 0; for (int f = 0; f < num_kmers; f++) { if (f >= kmer_len) { b++; if (cnts[b] <= bound) run_cnt--; } if (cnts[f] <= bound) { run_cnt++; if (run_cnt >= min_lim) { rare_k++; // cout << "# Rejecting read, position: " << f << "; run_cnt: " << run_cnt << "\n"; return 0; } } // cout << "# b: " << b << "; f: " << f << "; run_cnt: " << run_cnt << "; counts[front]: " << cnts[f] << "; bound: " << bound << "\n"; } // cout << "\n\n"; return 0; } // inline int // kmer_lookup(SeqKmerHash &kmer_map, // char *read, char *kmer, // int num_kmers, // int &rare_k, int &abundant_k, bool &complex) // { // // // // Generate kmers from this read, lookup kmer frequency in dataset. // // // rare_k = 0; // abundant_k = 0; // complex = false; // kmer[kmer_len] = '\0'; // int cnt = 0; // int rare_k_lim = (int) round((double) kmer_len * (1.0/2.0)); // vector cnts; // cnts.reserve(num_kmers); // // cout << "# " << read << "\n"; // for (int j = 0; j < num_kmers; j++) { // strncpy(kmer, read + j, kmer_len); // cnt = kmer_map[kmer]; // if (cnt >= 100000) // cnts.push_back(100000); // else if (cnt >= 10000) // cnts.push_back(10000); // else if (cnt >= 1000) // cnts.push_back(1000); // else if (cnt >= 100) // cnts.push_back(100); // else if (cnt >= 10) // cnts.push_back(10); // else // cnts.push_back(1); // // cout << kmer << "\t" << j << "\t" << cnt << "\n"; // if (cnt >= max_k_freq) abundant_k++; // } // // // // // // Detect the number of kmer coverage transitions. // // // // // int t = 0; // // int cov = cnts[0]; // // cout << "\nDetermining transitions:\n" << kmer << "\t" << "0" << "\t" << cnts[0] << "\n"; // // for (int j = 1; j < num_kmers; j++) // // if (cnts[j] != cov) { // // cov = cnts[j]; // // t++; // // cout << kmer << "\t" << j << "\t" << cnts[j] << ": Transition." << "\n"; // // } else { // // cout << kmer << "\t" << j << "\t" << cnts[j] << "\n"; // // } // // cerr << t << " total cnts.\n"; // // // // Look for runs of kmers at various orders of magnitude. // // // // We will slide a window across the read, f represents the front of the window, b // // represents the back. Each time a kmer is below the bound we will increment run_cnt, // // which represents the number of kmers in the window below the bound. If 2/3 of the // // kmers in the window go below the bound, assume a sequencing error has occurred. // // Run counters: // // 1 10 100 1k 10k 100k // // runs[0] runs[1] runs[2] runs[3] runs[4] runs[5] // int runs[6] = {0}; // int prev_cnt, run_cnt, tot_trans; // int f = 0; // while (f < num_kmers) { // tot_trans = 0; // run_cnt = 1; // prev_cnt = cnts[f]; // f++; // while (f < num_kmers && cnts[f] == prev_cnt) { // // cout << "# window front: " << f << "; run_cnt: " << run_cnt << "; prev_cnt: " << prev_cnt << "\n"; // f++; // run_cnt++; // } // if (run_cnt >= rare_k_lim) { // // cout << "# found transition run, position: " << f-1 << "; run_cnt: " << run_cnt << "\n"; // switch(prev_cnt) { // case 1: // runs[0]++; // break; // case 10: // runs[1]++; // break; // case 100: // runs[2]++; // break; // case 1000: // runs[3]++; // break; // case 10000: // runs[4]++; // break; // case 100000: // runs[5]++; // break; // } // } // for (int j = 0; j < 6; j++) // if (runs[j] > 0) tot_trans++; // // cout << "# Total transitions: " << tot_trans << "\n"; // if (tot_trans >= transition_lim) { // // cout << "# Rejecting read.\n"; // rare_k++; // return 0; // } // } // // cout << "\n\n"; // return 0; // } int free_kmer_hash(SeqKmerHash &kmer_map, vector &kmer_map_keys) { for (uint i = 0; i < kmer_map_keys.size(); i++) { delete [] kmer_map_keys[i]; } kmer_map_keys.clear(); kmer_map.clear(); return 0; } int print_results(map > &counters) { map >::iterator it; string log_path = out_path + "kmer_filter.log"; ofstream log(log_path.c_str()); if (log.fail()) { cerr << "Unable to open log file '" << log_path << "'\n"; return 0; } cerr << "Outputing details to log: '" << log_path << "'\n\n"; log << "File\t" << "Retained Reads\t" << "Rare K\t" << "Abundant K\t" << "Total\n"; for (it = counters.begin(); it != counters.end(); it++) { log << it->first << "\t" << it->second["retained"] << "\t" << it->second["rare_k"] << "\t" << it->second["abundant_k"] << "\t" << it->second["total"] << "\n"; } map c; c["total"] = 0; // // Total up the individual counters // for (it = counters.begin(); it != counters.end(); it++) { c["total"] += it->second["total"]; c["retained"] += it->second["retained"]; c["rare_k"] += it->second["rare_k"]; c["abundant_k"] += it->second["abundant_k"]; } cerr << c["total"] << " total sequences;\n" << " " << c["rare_k"] << " rare k-mer reads;\n" << " " << c["abundant_k"] << " abundant k-mer reads;\n" << c["retained"] << " retained reads.\n"; log << "Total Sequences\t" << c["total"] << "\n" << "Retained Reads\t" << c["retained"] << "\n"; log.close(); return 0; } int build_file_list(vector &in_files, vector > &files) { string file, suffix; int pos; // // Scan a directory for a list of files. // if (in_path.length() > 0) { struct dirent *direntry; DIR *dir = opendir(in_path.c_str()); if (dir == NULL) { cerr << "Unable to open directory '" << in_path << "' for reading.\n"; exit(1); } while ((direntry = readdir(dir)) != NULL) { file = direntry->d_name; if (file.substr(0, 1) == ".") continue; // // If the file is gzip'ed, remove the '.gz' suffix. // pos = file.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file.substr(pos) == ".gz") { file = file.substr(0, pos); pos = file.find_last_of("."); } // // Check that the remaining file name has the right suffix. // suffix = file.substr(pos + 1); if (in_file_type == FileT::fastq && (suffix.substr(0, 2) == "fq" || suffix.substr(0, 5) == "fastq")) files.push_back(make_pair(in_path, file)); else if (in_file_type == FileT::fasta && (suffix.substr(0, 2) == "fa" || suffix.substr(0, 5) == "fasta")) files.push_back(make_pair(in_path, file)); } if (files.size() == 0) cerr << "Unable to locate any input files to process within '" << in_path << "'\n"; } else { string path; for (uint i = 0; i < in_files.size(); i++) { // // Files specified directly: // Break off file path and store path and file name. // Check if this is a gzip'ed file and if so, remove 'gz' suffix. // file = in_files[i]; pos = file.find_last_of("."); if ((in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) && file.substr(pos) == ".gz") { file = file.substr(0, pos); pos = file.find_last_of("."); } pos = file.find_last_of("/"); path = file.substr(0, pos + 1); files.push_back(make_pair(path, file.substr(pos+1))); } } return 0; } int parse_command_line(int argc, char* argv[]) { string pair_1, pair_2; int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"discards", no_argument, NULL, 'D'}, {"pair_1", required_argument, NULL, '1'}, {"pair_2", required_argument, NULL, '2'}, {"infile_type", required_argument, NULL, 'i'}, {"outfile_type", required_argument, NULL, 'y'}, {"file", required_argument, NULL, 'f'}, {"path", required_argument, NULL, 'p'}, {"outpath", required_argument, NULL, 'o'}, {"k_dist", no_argument, NULL, 'I'}, {"rare", no_argument, NULL, 'R'}, {"abundant", no_argument, NULL, 'A'}, {"normalize", required_argument, NULL, 'N'}, {"k_len", required_argument, NULL, 'K'}, {"max_k_freq", required_argument, NULL, 'M'}, {"min_lim", required_argument, NULL, 'F'}, {"max_lim", required_argument, NULL, 'G'}, {"min_k_pct", required_argument, NULL, 'P'}, {"read_k_freq", required_argument, NULL, 'r'}, {"write_k_freq", required_argument, NULL, 'w'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvRADkP:N:I:w:r:K:F:G:M:m:i:y:f:o:t:p:1:2:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'i': if (strcasecmp(optarg, "fasta") == 0) in_file_type = FileT::fasta; else if (strcasecmp(optarg, "gzfasta") == 0) in_file_type = FileT::gzfasta; else if (strcasecmp(optarg, "gzfastq") == 0) in_file_type = FileT::gzfastq; else in_file_type = FileT::fastq; break; case 'y': if (strcasecmp(optarg, "fasta") == 0) out_file_type = FileT::fasta; else out_file_type = FileT::fastq; break; case 'f': in_files.push_back(optarg); break; case '1': pair_1 = optarg; break; case '2': pair_2 = optarg; if (pair_1.length() == 0) help(); in_pair_files.push_back(pair_1); in_pair_files.push_back(pair_2); pair_1 = ""; pair_2 = ""; break; case 'p': in_path = optarg; break; case 'o': out_path = optarg; break; case 'D': discards = true; break; case 'I': kmer_distr = true; break; case 'R': filter_rare_k = true; break; case 'A': filter_abundant_k = true; break; case 'N': normalize = true; normalize_lim = is_integer(optarg); break; case 'K': kmer_len = is_integer(optarg); break; case 'M': max_k_freq = is_integer(optarg); break; case 'F': min_lim = is_integer(optarg); break; case 'G': max_lim = is_integer(optarg); break; case 'P': min_k_pct = is_double(optarg); break; case 'r': read_k_freq = true; k_freq_path = optarg; break; case 'w': write_k_freq = true; k_freq_path = optarg; break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (in_files.size() == 0 && in_pair_files.size() == 0 && in_path.length() == 0) { cerr << "You must specify an input file of a directory path to a set of input files.\n"; help(); } if (in_files.size() > 0 && in_path.length() > 0) { cerr << "You must specify either a single input file (-f) or a directory path (-p), not both.\n"; help(); } if (in_path.length() > 0 && in_path.at(in_path.length() - 1) != '/') in_path += "/"; if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (in_file_type == FileT::unknown) in_file_type = FileT::fastq; if (read_k_freq && write_k_freq) { cerr << "You may either read a set of kmer frequencies, or write kmer frequencies, not both.\n"; help(); } if (min_k_pct < 0.0 || min_k_pct > 1.0) { cerr << "Percentage to consider a kmer rare must be between 0 and 1.0.\n"; help(); } // // Check that the output path exists. // struct stat info; if (stat(out_path.c_str(), &info) != 0) { cerr << "Unable to locate the specified output path, '" << out_path << "'\n"; exit(1); } return 0; } void version() { std::cerr << "kmer_filter " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "kmer_filter " << VERSION << "\n" << "kmer_filter [-f in_file_1 [-f in_file_2...] | -p in_dir] [-1 pair_1 -2 pair_2 [-1 pair_1...]] -o out_dir [-i type] [-y type] [-D] [-h]\n" << " f: path to the input file if processing single-end seqeunces.\n" << " i: input file type, either 'bustard' for the Illumina BUSTARD output files, 'fasta', 'fastq', 'gzfasta', or 'gzfastq' (default 'fastq').\n" << " p: path to a directory of files (for single-end files only).\n" << " 1: specify the first in a pair of files to be processed together.\n" << " 2: specify the second in a pair of files to be processed together.\n" << " o: path to output the processed files.\n" << " y: output type, either 'fastq' or 'fasta' (default fastq).\n" << " D: capture discarded reads to a file.\n" << " h: display this help messsage.\n\n" << " Filtering options:\n" << " --rare: turn on filtering based on rare k-mers.\n" << " --abundant: turn on filtering based on abundant k-mers.\n" << " --k_len : specify k-mer size (default 15).\n\n" << " Advanced filtering options:\n" << " --max_k_freq : specify the number of times a kmer must occur to be considered abundant (default 20,000).\n" << " --min_lim : specify number of rare kmers occuring in a row required to discard a read (default 80% of the k-mer length).\n" << " --max_lim : specify number of abundant kmers required to discard a read (default 80% of the k-mers in a read).\n\n" << " Normalize data:\n" << " --normalize : normalize read depth according to k-mer coverage.\n\n" << " Characterizing K-mers:\n" << " --write_k_freq: write kmers along with their frequency of occurrence and exit.\n" << " --k_dist: print k-mer frequency distribution and exit.\n\n" << " Advanced input options:\n" << " --read_k_freq : read a set of kmers along with their frequencies of occurrence instead of reading raw input files.\n" << "\n"; exit(0); } stacks-1.35/src/kmer_filter.h000644 000765 000024 00000006742 12441417455 016765 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __KMER_FILTER_H__ #define __KMER_FILTER_H__ #include "constants.h" #include #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include #include #include #include using std::stringstream; using std::istream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::pair; #include using std::unordered_map; #ifdef HAVE_SPARSEHASH #include using google::sparse_hash_map; #endif #include "clean.h" #include "utils.h" #include "kmers.h" #include "write.h" #include "BustardI.h" // Reading input files in Tab-separated Bustard format #include "FastaI.h" // Reading input files in FASTA format #include "FastqI.h" // Reading input files in FASTQ format #include "gzFasta.h" // Reading gzipped input files in FASTA format #include "gzFastq.h" // Reading gzipped input files in FASTQ format #ifdef HAVE_SPARSEHASH typedef sparse_hash_map SeqKmerHash; #else typedef unordered_map SeqKmerHash; #endif void help( void ); void version( void ); int parse_command_line(int, char**); int build_file_list(vector &, vector > &); int process_reads(string, string, SeqKmerHash &, map &); int process_paired_reads(string, string, string, string, SeqKmerHash &, map &); int print_results(map > &); // // Functions to normalize read depth // int normalize_reads(string, string, SeqKmerHash &, vector &, map &); int normalize_paired_reads(string, string, string, string, SeqKmerHash &, vector &, map &); bool normalize_kmer_lookup(SeqKmerHash &, char *, char *, int, vector &); // // Functions for finding and removing reads with rare kmers // int populate_kmers(vector > &, vector > &, SeqKmerHash &, vector &); int process_file_kmers(string, SeqKmerHash &, vector &); int generate_kmer_dist(SeqKmerHash &); int calc_kmer_median(SeqKmerHash &, double &, double &); int kmer_map_cmp(pair, pair); int kmer_lookup(SeqKmerHash &, char *, char *, int, int &, int &); int free_kmer_hash(SeqKmerHash &, vector &); int read_kmer_freq(string, SeqKmerHash &, vector &); int write_kmer_freq(string, SeqKmerHash &); #endif // __KMER_FILTER_H__ stacks-1.35/src/kmers.cc000644 000765 000024 00000024706 12533677757 015760 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // kmers.cc -- routines to generate and hash K-mers // #include "kmers.h" int determine_kmer_length(int read_len, int dist) { int kmer_len, span, min_matches; // // If distance allowed between sequences is 0, then k-mer length equals read length. // if (dist == 0) return read_len; // // Longer k-mer lengths will provide a smaller hash, with better key placement. // Increase the kmer_len until we start to miss hits at the given distance. Then // back the kmer_len off one unit to get the final value. // for (kmer_len = 5; kmer_len < read_len; kmer_len += 2) { span = (kmer_len * (dist + 1)) - 1; min_matches = read_len - span; if (min_matches <= 0) break; } if (kmer_len >= read_len) { cerr << "Unable to find a suitable k-mer length for matching.\n"; exit(1); } kmer_len -= 2; return kmer_len; } int calc_min_kmer_matches(int kmer_len, int dist, int read_len, bool exit_err) { int span, min_matches; span = (kmer_len * (dist + 1)) - 1; min_matches = read_len - span; if (min_matches <= 0) { cerr << "Warning: combination of k-mer length (" << kmer_len << ") and edit distance (" << dist << ") allows for " << "sequences to be missed by the matching algorithm.\n"; } if (min_matches <= 0 && exit_err) exit(1); else if (min_matches <= 0) min_matches = 1; cerr << " Minimum number of k-mers to define a match: " << min_matches << "\n"; return min_matches; } int generate_kmers(const char *seq, int kmer_len, int num_kmers, vector &kmers) { char *kmer; const char *k = seq; for (int i = 0; i < num_kmers; i++) { kmer = new char[kmer_len + 1]; strncpy(kmer, k, kmer_len); kmer[kmer_len] = '\0'; kmers.push_back(kmer); k++; } return 0; } int generate_permutations(map &pstrings, int width) { int i, j, rem, div, num; char *p; // // Given a k-mer that allows wildcards -- 'N' characters, we need to generate all // possible k-mers. To do so, we will generate a range of numbers that we convert to // base 4, assuming that 0 = 'A', 1 = 'C', 2 = 'G', 3 = 'T'. // const int base = 4; int range = (int) pow(4, width); // // Create an array of strings to hold the permuted nucleotides. // char **strings = new char * [range]; for (i = 0; i < range; i++) strings[i] = new char[width + 1]; for (i = 0; i < range; i++) { for (j = 0; j < width; j++) strings[i][j] = 'A'; strings[i][width] = '\0'; } for (i = 0; i < range; i++) { // // Convert this number to base 4 // p = strings[i]; p += width - 1; num = i; do { div = (int) floor(num / base); rem = num % base; switch(rem) { case 0: *p = 'A'; break; case 1: *p = 'C'; break; case 2: *p = 'G'; break; case 3: *p = 'T'; break; } num = div; p--; } while (div > 0); } pstrings[width] = strings; return 0; } int populate_kmer_hash(map &merged, KmerHashMap &kmer_map, vector &kmer_map_keys, int kmer_len) { map::iterator it; MergedStack *tag; vector kmers; bool exists; // // Break each stack down into k-mers and create a hash map of those k-mers // recording in which sequences they occur. // int num_kmers = strlen(merged.begin()->second->con) - kmer_len + 1; for (it = merged.begin(); it != merged.end(); it++) { tag = it->second; // Don't compute distances for masked tags if (tag->masked) continue; generate_kmers(tag->con, kmer_len, num_kmers, kmers); // Hash the kmers for (int j = 0; j < num_kmers; j++) { exists = kmer_map.count(kmers[j]) == 0 ? false : true; kmer_map[kmers[j]].push_back(tag->id); if (exists) delete [] kmers[j]; else kmer_map_keys.push_back(kmers[j]); } kmers.clear(); } //dump_kmer_map(kmer_map); return 0; } int populate_kmer_hash(map &catalog, CatKmerHashMap &kmer_map, vector &kmer_map_keys, int kmer_len) { map::iterator it; vector >::iterator allele; vector kmers; Locus *tag; char *hash_key; bool exists; // // Break each stack down into k-mers and create a hash map of those k-mers // recording in which sequences they occur. // int num_kmers = strlen(catalog.begin()->second->con) - kmer_len + 1; for (it = catalog.begin(); it != catalog.end(); it++) { tag = it->second; // // Iterate through the possible Catalog alleles // for (allele = tag->strings.begin(); allele != tag->strings.end(); allele++) { // // Generate and hash the kmers for this allele string // generate_kmers(allele->second.c_str(), kmer_len, num_kmers, kmers); for (int j = 0; j < num_kmers; j++) { hash_key = kmers[j]; exists = kmer_map.count(hash_key) == 0 ? false : true; kmer_map[hash_key].push_back(make_pair(allele->first, tag->id)); if (exists) delete [] kmers[j]; else kmer_map_keys.push_back(hash_key); } kmers.clear(); } } //dump_kmer_map(kmer_map); return 0; } int free_kmer_hash(CatKmerHashMap &kmer_map, vector &kmer_map_keys) { for (uint i = 0; i < kmer_map_keys.size(); i++) { kmer_map[kmer_map_keys[i]].clear(); } kmer_map.clear(); for (uint i = 0; i < kmer_map_keys.size(); i++) { delete [] kmer_map_keys[i]; } kmer_map_keys.clear(); return 0; } int free_kmer_hash(KmerHashMap &kmer_map, vector &kmer_map_keys) { for (uint i = 0; i < kmer_map_keys.size(); i++) { kmer_map[kmer_map_keys[i]].clear(); } kmer_map.clear(); for (uint i = 0; i < kmer_map_keys.size(); i++) { delete [] kmer_map_keys[i]; } kmer_map_keys.clear(); return 0; } int dist(const char *tag_1, Locus *tag_2, allele_type allele) { int dist = 0; const char *p = tag_1; const char *p_end = p + strlen(p); const char *q = NULL; // // Identify which matching string has the proper allele // vector >::iterator it; for (it = tag_2->strings.begin(); it != tag_2->strings.end(); it++) if (it->first == allele) q = it->second.c_str(); if (q == NULL) return -1; const char *q_end = q + strlen(q); // Count the number of characters that are different // between the two sequences. while (p < p_end && q < q_end) { dist += (*p == *q) ? 0 : 1; p++; q++; } return dist; } int dist(Locus *tag_1, Locus *tag_2) { int dist = 0; char *p = tag_1->con; char *q = tag_2->con; char *p_end = p + tag_1->len; char *q_end = q + tag_2->len; if (tag_1->len != tag_2->len) { if (tag_1->len < tag_2->len) dist += tag_2->len - tag_1->len; else if (tag_1->len > tag_2->len) dist += tag_1->len - tag_2->len; } // // Count the number of characters that are different // between the two sequences. // while (p < p_end && q < q_end) { dist += (*p == *q) ? 0 : 1; p++; q++; } return dist; } int dist(MergedStack *tag_1, MergedStack *tag_2) { int dist = 0; char *p = tag_1->con; char *q = tag_2->con; char *p_end = p + tag_1->len; char *q_end = q + tag_2->len; // // If the sequences are of different lengths, count the missing // nucleotides as mismatches. // if (tag_1->len != tag_2->len) { if (tag_1->len < tag_2->len) dist += tag_2->len - tag_1->len; else if (tag_1->len > tag_2->len) dist += tag_1->len - tag_2->len; } // // Count the number of characters that are different // between the two sequences. // while (p < p_end && q < q_end) { dist += (*p == *q) ? 0 : 1; p++; q++; } return dist; } int dist(MergedStack *tag_1, char *seq) { int dist = 0; char *p = tag_1->con; char *q = seq; uint q_len = strlen(q); char *p_end = p + tag_1->len; char *q_end = q + q_len; // // If the sequences are of different lengths, count the missing // nucleotides as mismatches. // if (tag_1->len != q_len) { if (tag_1->len < q_len) dist += q_len - tag_1->len; else if (tag_1->len > q_len) dist += tag_1->len - q_len; } // // Count the number of characters that are different // between the two sequences. // while (p < p_end && q < q_end) { dist += (*p == *q) ? 0 : 1; p++; q++; } return dist; } bool compare_dist(pair a, pair b) { return (a.second < b.second); } int dump_kmer_map(KmerHashMap &kmer_map) { KmerHashMap::iterator kit; vector::iterator vit; cerr << kmer_map.size() << " keys in the map.\n"; int i = 1; for (kit = kmer_map.begin(); kit != kmer_map.end(); kit++) { cerr << "Key #" << i << " " << kit->first << ": "; for (vit = (kit->second).begin(); vit != (kit->second).end(); vit++) cerr << " " << *vit; cerr << "\n"; i++; if (i > 1000) break; } return 0; } stacks-1.35/src/kmers.h000644 000765 000024 00000006010 12335173442 015564 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __KMERS_H__ #define __KMERS_H__ #include "constants.h" #include #include #include using std::string; #include using std::vector; #include using std::map; #include using std::pair; using std::make_pair; #include using std::ifstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::unordered_map; #ifdef HAVE_SPARSEHASH #include using google::sparse_hash_map; #endif #include "stacks.h" #include "locus.h" #include "mstack.h" #include "input.h" struct hash_charptr { size_t operator()(const char *__s) const { size_t __result = static_cast(14695981039346656037ULL); unsigned int __len = strlen(__s); for (unsigned int i = 0; i < __len; i++) { __result ^= static_cast(__s[i]); __result *= static_cast(1099511628211ULL); } return __result; } }; struct eqstr { bool operator()(const char* s1, const char* s2) const { return strcmp(s1, s2) == 0; } }; #ifdef HAVE_SPARSEHASH typedef sparse_hash_map, hash_charptr, eqstr> KmerHashMap; typedef sparse_hash_map >, hash_charptr, eqstr> CatKmerHashMap; #else typedef unordered_map, hash_charptr, eqstr> KmerHashMap; typedef unordered_map >, hash_charptr, eqstr> CatKmerHashMap; #endif int determine_kmer_length(int, int); int calc_min_kmer_matches(int, int, int, bool); int generate_kmers(const char *, int, int, vector &); int populate_kmer_hash(map &, KmerHashMap &, vector &, int); int populate_kmer_hash(map &, CatKmerHashMap &, vector &, int); int free_kmer_hash(KmerHashMap &, vector &); int free_kmer_hash(CatKmerHashMap &, vector &); int generate_permutations(map &, int); // // Utilities // int dist(const char *, Locus *, allele_type); int dist(Locus *, Locus *); int dist(MergedStack *, MergedStack *); int dist(MergedStack *, char *); // // For sorting functions. // bool compare_dist(pair, pair); // // Debugging // int dump_kmer_map(KmerHashMap &); #endif // __KMERS_H__ stacks-1.35/src/locus.cc000644 000765 000024 00000005610 12533677757 015755 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // locus.cc -- routines for the Locus class and its derivatives. // #include "locus.h" uint Locus::sort_bp(uint k) { if (this->loc.strand == plus) return this->loc.bp + k; else return (k == 0 ? this->loc.bp - this->len + 1 : this->loc.bp - k); } int Locus::snp_index(uint col) { for (uint i = 0; i < this->snps.size(); i++) if (this->snps[i]->col == col) return i; return -1; } int Locus::add_consensus(const char *seq) { if (this->con != NULL) delete [] this->con; this->len = strlen(seq); this->con = new char[this->len + 1]; strcpy(this->con, seq); return 0; } int Locus::populate_alleles() { vector::iterator i; map::iterator j; string s; int k; // // Is this effective? // for (uint n = 0; n < this->strings.size(); n++) { this->strings[n].first.clear(); this->strings[n].second.clear(); } this->strings.clear(); if (this->snps.size() == 0) { this->strings.push_back(make_pair("consensus", this->con)); return 0; } for (j = this->alleles.begin(); j != this->alleles.end(); j++) { s = this->con; k = 0; for (i = this->snps.begin(); i != this->snps.end(); i++) { if ((*i)->col < this->len) s.replace((*i)->col, 1, 1, j->first[k]); k++; } this->strings.push_back(make_pair(j->first, s)); } return 0; } bool bp_compare(Locus *a, Locus *b) { return (a->sort_bp() < b->sort_bp()); } QLocus::~QLocus() { vector::iterator it; for (it = this->matches.begin(); it != this->matches.end(); it++) delete *it; } int QLocus::add_match(int catalog_id, allele_type cat_type, allele_type query_type, int distance) { Match *m = new Match; m->cat_id = catalog_id; m->cat_type = cat_type; m->query_type = query_type; m->dist = distance; this->matches.push_back(m); return 0; } int QLocus::add_match(int catalog_id, allele_type cat_type) { Match *m = new Match; m->cat_id = catalog_id; m->cat_type = cat_type; m->query_type = ""; m->dist = 0; this->matches.push_back(m); return 0; } stacks-1.35/src/locus.h000644 000765 000024 00000012206 12533677757 015616 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __LOCUS_H__ #define __LOCUS_H__ #include #include using std::string; #include using std::vector; #include using std::map; #include #include using std::pair; using std::make_pair; #include "constants.h" #include "stacks.h" typedef struct match { uint cat_id; allele_type cat_type; allele_type query_type; uint dist; } Match; class Locus { public: int id; // Locus ID int sample_id; // Sample ID int depth; // Stack depth char *con; // Consensus sequence char *model; // Model calls for each nucleotide uint len; // Sequence length double lnl; // Log likelihood of this locus // // Flags // bool blacklisted; bool deleveraged; bool lumberjackstack; vector comp; // Raw components in this stack. vector reads; // Sequence reads contributing to this locus. vector comp_cnt; // Counter for internal stacks merged into this locus. vector comp_type; // Read types for reads contributing to this locus. PhyLoc loc; // Physical genome location of this stack. vector snps; // Single Nucleotide Polymorphisms in this stack. map alleles; // Map of the allelic configuration of SNPs in this stack along with the count of each vector > strings; // Strings for matching (representing the various allele combinations) Locus() { id = 0; sample_id = 0; depth = 0; model = NULL; con = NULL; len = 0; lnl = 0.0; blacklisted = false; deleveraged = false; lumberjackstack = false; } virtual ~Locus() { delete [] con; delete [] model; for (uint i = 0; i < snps.size(); i++) delete snps[i]; for (uint i = 0; i < comp.size(); i++) delete [] comp[i]; for (uint i = 0; i < reads.size(); i++) delete [] reads[i]; } uint sort_bp(uint k = 0); int snp_index(uint); int add_consensus(const char *); virtual int populate_alleles(); }; // // Query Locus Class // class QLocus : public Locus { public: vector matches; // Matching tags found for the catalog. QLocus(): Locus() {} ~QLocus(); int add_match(int, allele_type, allele_type, int); int add_match(int, allele_type); }; // // Catalog Locus Class, for use in cstacks, contains catalog loci and records the // constiuent tags this locus was built from. // class CLocus : public Locus { public: vector > sources; // Sample/ID pairs for the sources contributing to this catalog entry int merge_snps(QLocus *); int reduce_alleles(set &); }; // // Catalog Summary Locus Class; used in genotypes and populations, records a catalog // locus with summary information derived from individuals in the population. // class CSLocus : public Locus { public: CSLocus() : Locus() { this->f = 0.0; this->cnt = 0; this->hcnt = 0; this->gcnt = 0; this->trans_gcnt = 0; this->chisq = 1.0; this->confounded_cnt = 0; }; string annotation; string marker; string uncor_marker; map hap_cnts; // Counts of each observed haplotype for this locus in the population. double f; // Inbreeder's coefficient map gmap; // Observed haplotype to genotype map for this locus. int confounded_cnt; // Number of samples/progeny containing confounded loci (more than one // locus from an individual sample matches this catalog locus). int hcnt; // Number of samples/progeny containing a haplotype for this locus. int cnt; // Number of samples/progeny containing data for this locus. int gcnt; // Number of progeny containing a valid genotype. int trans_gcnt; // Number of progeny containing a valid // genotype, translated for a particular map type. double chisq; // Chi squared p-value testing the null hypothesis of no segregation distortion. }; bool bp_compare(Locus *, Locus *); #endif // __LOCUS_H__ stacks-1.35/src/log_utils.cc000644 000765 000024 00000002473 12533677757 016635 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #include "log_utils.h" int init_log(ofstream &fh, int argc, char **argv) { // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%F %T", timeinfo); // // Write the command line that was executed. // for (int i = 0; i < argc; i++) { fh << argv[i]; if (i < argc - 1) fh << " "; } fh << "\n" << argv[0] << " version " << VERSION << " executed " << date << "\n\n"; return 0; } stacks-1.35/src/log_utils.h000644 000765 000024 00000002161 12533677757 016471 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __LOG_UTILS_H__ #define __LOG_UTILS_H__ #include #include #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::stringstream; #include "constants.h" int init_log(ofstream &, int, char **); #endif // __LOG_UTILS_H__ stacks-1.35/src/models.cc000644 000765 000024 00000042666 12441417455 016110 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010 - 2012, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // models.cc -- routines to detect polymorphism (snp) and detect a lack of polymorphism (fixed). // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id$ // #include "models.h" snp_type call_multinomial_snp(MergedStack *tag, int col, map &n, bool record_snps) { vector > nuc; map::iterator i; int total = 0; for (i = n.begin(); i != n.end(); i++) { if (i->first != 'N') { total += i->second; nuc.push_back(make_pair(i->first, i->second)); } } sort(nuc.begin(), nuc.end(), compare_pair); // // If this column was simply uncalled Ns, return. // if (nuc[0].second == 0) { if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_unk; snp->col = col; snp->lratio = 0; snp->rank_1 = 'N'; snp->rank_2 = '-'; tag->snps.push_back(snp); } return snp_type_unk; } // // Method of Paul Hohenlohe , personal communication. // // For a diploid individual, there are ten possible genotypes // (four homozygous and six heterozygous genotypes). We calculate // the likelihood of each possible genotype by using a multinomial // sampling distribution, which gives the probability of observing // a set of read counts (n1,n2,n3,n4) given a particular genotype. // double nuc_1 = nuc[0].second; double nuc_2 = nuc[1].second; double nuc_3 = nuc[2].second; double nuc_4 = nuc[3].second; double l_ratio = 0; l_ratio = (nuc_1 * log(nuc_1 / total)); if (total - nuc_1 > 0) l_ratio += ((total - nuc_1) * log((total - nuc_1) / (3 * total))); if (nuc_1 + nuc_2 > 0) l_ratio -= ((nuc_1 + nuc_2) * log((nuc_1 + nuc_2) / (2 * total))); if (nuc_3 + nuc_4 > 0) l_ratio -= ((nuc_3 + nuc_4) * log((nuc_3 + nuc_4) / (2 * total))); l_ratio *= 2; snp_type res; if (l_ratio <= heterozygote_limit) { // // This locus is a heterozygote. // if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_het; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = nuc[1].first; tag->snps.push_back(snp); } res = snp_type_het; } else if (l_ratio >= homozygote_limit) { // // This locus is a homozygote. // if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_hom; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = '-'; tag->snps.push_back(snp); } res = snp_type_hom; } else { // // Unknown whether this is a heterozygote or homozygote. // if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_unk; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = nuc[1].second > 0 ? nuc[1].first : '-'; tag->snps.push_back(snp); } res = snp_type_unk; } return res; } snp_type call_multinomial_snp(Locus *tag, int col, map &n) { vector > nuc; map::iterator i; int total = 0; for (i = n.begin(); i != n.end(); i++) { if (i->first != 'N') { total += i->second; nuc.push_back(make_pair(i->first, i->second)); } } sort(nuc.begin(), nuc.end(), compare_pair); // // If this column was simply uncalled Ns, return. // if (nuc[0].second == 0) { tag->snps[col]->type = snp_type_unk; tag->snps[col]->col = col; tag->snps[col]->lratio = 0; tag->snps[col]->rank_1 = 'N'; tag->snps[col]->rank_2 = '-'; return snp_type_unk; } // // Method of Paul Hohenlohe , personal communication. // // For a diploid individual, there are ten possible genotypes // (four homozygous and six heterozygous genotypes). We calculate // the likelihood of each possible genotype by using a multinomial // sampling distribution, which gives the probability of observing // a set of read counts (n1,n2,n3,n4) given a particular genotype. // double nuc_1 = nuc[0].second; double nuc_2 = nuc[1].second; double nuc_3 = nuc[2].second; double nuc_4 = nuc[3].second; double l_ratio = 0; l_ratio = (nuc_1 * log(nuc_1 / total)); if (total - nuc_1 > 0) l_ratio += ((total - nuc_1) * log((total - nuc_1) / (3 * total))); if (nuc_1 + nuc_2 > 0) l_ratio -= ((nuc_1 + nuc_2) * log((nuc_1 + nuc_2) / (2 * total))); if (nuc_3 + nuc_4 > 0) l_ratio -= ((nuc_3 + nuc_4) * log((nuc_3 + nuc_4) / (2 * total))); l_ratio *= 2; snp_type res; if (l_ratio <= heterozygote_limit) { // // This locus is a heterozygote. // tag->snps[col]->type = snp_type_het; tag->snps[col]->col = col; tag->snps[col]->lratio = l_ratio; tag->snps[col]->rank_1 = nuc[0].first; tag->snps[col]->rank_2 = nuc[1].first; res = snp_type_het; } else if (l_ratio >= homozygote_limit) { // // This locus is a homozygote. // tag->snps[col]->type = snp_type_hom; tag->snps[col]->col = col; tag->snps[col]->lratio = l_ratio; tag->snps[col]->rank_1 = nuc[0].first; tag->snps[col]->rank_2 = '-'; res = snp_type_hom; } else { // // Unknown whether this is a heterozygote or homozygote. // tag->snps[col]->type = snp_type_unk; tag->snps[col]->col = col; tag->snps[col]->lratio = l_ratio; tag->snps[col]->rank_1 = nuc[0].first; tag->snps[col]->rank_2 = nuc[1].second > 0 ? nuc[1].first : '-'; res = snp_type_unk; } return res; } snp_type call_bounded_multinomial_snp(MergedStack *tag, int col, map &n, bool record_snps) { vector > nuc; map::iterator i; double total = 0.0; for (i = n.begin(); i != n.end(); i++) { if (i->first != 'N') { total += i->second; nuc.push_back(make_pair(i->first, i->second)); } } sort(nuc.begin(), nuc.end(), compare_pair); // // If this column was simply uncalled Ns, return. // if (nuc[0].second == 0) { if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_unk; snp->col = col; snp->lratio = 0; snp->rank_1 = 'N'; snp->rank_2 = '-'; tag->snps.push_back(snp); } return snp_type_unk; } double nuc_1 = nuc[0].second; double nuc_2 = nuc[1].second; double nuc_3 = nuc[2].second; double nuc_4 = nuc[3].second; // // Method of Paul Hohenlohe , personal communication. // // // Calculate the site specific error rate for homozygous and heterozygous genotypes. // double epsilon_hom = (4.0 / 3.0) * ((total - nuc_1) / total); double epsilon_het = 2.0 * ((nuc_3 + nuc_4) / total); // cerr << "Epsilon_hom: " << epsilon_hom << "; epsilon_het: " << epsilon_het << "\n"; // // Check if the error rate is above or below the specified bound. // if (epsilon_hom < bound_low) epsilon_hom = bound_low; else if (epsilon_hom > bound_high) epsilon_hom = bound_high; if (epsilon_het < bound_low) epsilon_het = bound_low; else if (epsilon_het > bound_high) epsilon_het = bound_high; // // Calculate the log likelihood for the homozygous and heterozygous genotypes. // double ln_L_hom = nuc_1 * log(1 - ((3.0/4.0) * epsilon_hom)); ln_L_hom += epsilon_hom > 0 ? ((nuc_2 + nuc_3 + nuc_4) * log(epsilon_hom / 4.0)) : 0; double ln_L_het = (nuc_1 + nuc_2) * log(0.5 - (epsilon_het / 4.0)); ln_L_het += epsilon_het > 0 ? ((nuc_3 + nuc_4) * log(epsilon_het / 4.0)) : 0; // // Calculate the likelihood ratio. // double l_ratio = 2 * (ln_L_hom - ln_L_het); // cerr << " Nuc_1: " << nuc_1 << " Nuc_2: " << nuc_2 << " Nuc_3: " << nuc_3 << " Nuc_4: " << nuc_4 // << " epsilon homozygote: " << epsilon_hom // << " epsilon heterozygote: " << epsilon_het // << " Log likelihood hom: " << ln_L_hom // << " Log likelihood het: " << ln_L_het // << " Likelihood ratio: " << l_ratio << "\n"; snp_type res; if (l_ratio <= heterozygote_limit) { // // This locus is a heterozygote. // if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_het; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = nuc[1].first; tag->snps.push_back(snp); } res = snp_type_het; } else if (l_ratio >= homozygote_limit) { // // This locus is a homozygote. // if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_hom; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = '-'; tag->snps.push_back(snp); } res = snp_type_hom; } else { // // Unknown whether this is a heterozygote or homozygote. // if (record_snps) { SNP *snp = new SNP; snp->type = snp_type_unk; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = nuc[1].first; tag->snps.push_back(snp); } res = snp_type_unk; } return res; } snp_type call_bounded_multinomial_snp(Locus *tag, int col, map &n) { vector > nuc; map::iterator i; double total = 0.0; for (i = n.begin(); i != n.end(); i++) { if (i->first != 'N') { total += i->second; nuc.push_back(make_pair(i->first, i->second)); } } sort(nuc.begin(), nuc.end(), compare_pair); // // If this column was simply uncalled Ns, return. // if (nuc[0].second == 0) { tag->snps[col]->type = snp_type_unk; tag->snps[col]->col = col; tag->snps[col]->lratio = 0; tag->snps[col]->rank_1 = 'N'; tag->snps[col]->rank_2 = '-'; return snp_type_unk; } double nuc_1 = nuc[0].second; double nuc_2 = nuc[1].second; double nuc_3 = nuc[2].second; double nuc_4 = nuc[3].second; // // Method of Paul Hohenlohe , personal communication. // // // Calculate the site specific error rate for homozygous and heterozygous genotypes. // double epsilon_hom = (4.0 / 3.0) * ((total - nuc_1) / total); double epsilon_het = 2.0 * ((nuc_3 + nuc_4) / total); // cerr << "Epsilon_hom: " << epsilon_hom << "; epsilon_het: " << epsilon_het << "\n"; // // Check if the error rate is above or below the specified bound. // if (epsilon_hom < bound_low) epsilon_hom = bound_low; else if (epsilon_hom > bound_high) epsilon_hom = bound_high; if (epsilon_het < bound_low) epsilon_het = bound_low; else if (epsilon_het > bound_high) epsilon_het = bound_high; // // Calculate the log likelihood for the homozygous and heterozygous genotypes. // double ln_L_hom = nuc_1 * log(1 - ((3.0/4.0) * epsilon_hom)); ln_L_hom += epsilon_hom > 0 ? ((nuc_2 + nuc_3 + nuc_4) * log(epsilon_hom / 4.0)) : 0; double ln_L_het = (nuc_1 + nuc_2) * log(0.5 - (epsilon_het / 4.0)); ln_L_het += epsilon_het > 0 ? ((nuc_3 + nuc_4) * log(epsilon_het / 4.0)) : 0; // // Calculate the likelihood ratio. // double l_ratio = 2 * (ln_L_hom - ln_L_het); // cerr << " Nuc_1: " << nuc_1 << " Nuc_2: " << nuc_2 << " Nuc_3: " << nuc_3 << " Nuc_4: " << nuc_4 // << " epsilon homozygote: " << epsilon_hom // << " epsilon heterozygote: " << epsilon_het // << " Log likelihood hom: " << ln_L_hom // << " Log likelihood het: " << ln_L_het // << " Likelihood ratio: " << l_ratio << "\n"; snp_type res; if (l_ratio <= heterozygote_limit) { // // This locus is a heterozygote. // tag->snps[col]->type = snp_type_het; tag->snps[col]->col = col; tag->snps[col]->lratio = l_ratio; tag->snps[col]->rank_1 = nuc[0].first; tag->snps[col]->rank_2 = nuc[1].first; res = snp_type_het; } else if (l_ratio >= homozygote_limit) { // // This locus is a homozygote. // tag->snps[col]->type = snp_type_hom; tag->snps[col]->col = col; tag->snps[col]->lratio = l_ratio; tag->snps[col]->rank_1 = nuc[0].first; tag->snps[col]->rank_2 = '-'; res = snp_type_hom; } else { // // Unknown whether this is a heterozygote or homozygote. // tag->snps[col]->type = snp_type_unk; tag->snps[col]->col = col; tag->snps[col]->lratio = l_ratio; tag->snps[col]->rank_1 = nuc[0].first; tag->snps[col]->rank_2 = nuc[1].first; res = snp_type_unk; } return res; } int call_multinomial_fixed (MergedStack *tag, int col, map &n) { const double nucleotide_fixed_limit = 1.92; vector > nuc; map::iterator i; int total = 0; for (i = n.begin(); i != n.end(); i++) { if (i->first != 'N') { total += i->second; nuc.push_back(make_pair(i->first, i->second)); } } sort(nuc.begin(), nuc.end(), compare_pair); if (nuc[0].second == 0) { SNP *snp = new SNP; snp->type = snp_type_unk; snp->col = col; snp->lratio = 0; snp->rank_1 = 'N'; snp->rank_2 = '-'; tag->snps.push_back(snp); return snp_type_unk; } // // Method of Paul Hohenlohe , personal communication. // // Each population sample contains DNA from 6 individuals, so a // sample of 12 alleles from the population. We want to assign a // nucleotide (A,C,G,T) to each position where the population is // fixed or nearly so, and N to each position that is either // polymorphic within the population or has insufficient coverage // depth to make a call. We can do this with a likelihood ratio // test of the read counts, testing whether the allele frequency // of the dominant allele is significantly larger than some // threshold p) , stepping through each nucleotide position across // RAD tags. // double nuc_1 = nuc[0].second; double nuc_2 = nuc[1].second; double n_ratio = 0.0; double l_ratio = 0.0; double epsilon = -1 * (log(1 - barcode_err_freq) / barcode_size); n_ratio = nuc_1 / (nuc_1 + nuc_2); l_ratio = nuc_1 * log( ((4 * nuc_1 * (1 - epsilon)) + ((nuc_1 + nuc_2) * epsilon)) / ((4 * p_freq * (nuc_1 + nuc_2) * (1 - epsilon)) + ((nuc_1 + nuc_2) * epsilon)) ); l_ratio += nuc_2 * log( ((4 * nuc_2 * (1 - epsilon)) + ((nuc_1 + nuc_2) * epsilon)) / ((4 * (1 - p_freq) * (nuc_1 + nuc_2) * (1 - epsilon)) + ((nuc_1 + nuc_2) * epsilon)) ); //cerr << "Nuc_1: " << nuc_1 << " Nuc_2: " << nuc_2 << " Likelihood ratio: " << l_ratio << "\n"; if (n_ratio < p_freq || l_ratio < nucleotide_fixed_limit) { // // This position is likely a SNP, record it's homozygosity as 'unknown'. // SNP *snp = new SNP; snp->type = snp_type_unk; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = nuc[1].first; tag->snps.push_back(snp); } else { // // Otherwise, this position is homozygous. // SNP *snp = new SNP; snp->type = snp_type_hom; snp->col = col; snp->lratio = l_ratio; snp->rank_1 = nuc[0].first; snp->rank_2 = nuc[1].first; tag->snps.push_back(snp); } return 0; } // // ln L(1/2) = ln(n! / n_1!n_2!n_3!n_4!) + // (n_1 + n_2) * ln(n_1 + n_2 / 2n) + // (n_3 + n_4) * ln(n_3 + n_4 / 2n) // double heterozygous_likelihood(int col, map &nuc) { vector > cnts; map::iterator i; double n = 0; for (i = nuc.begin(); i != nuc.end(); i++) { n += i->second; cnts.push_back(make_pair(i->first, i->second)); } sort(cnts.begin(), cnts.end(), compare_pair); double n_1 = cnts[0].second; double n_2 = cnts[1].second; double n_3 = cnts[2].second; double n_4 = cnts[3].second; double term_1 = reduced_log_factorial(n, n_1) - (log_factorial(n_2) + log_factorial(n_3) + log_factorial(n_4)); double term_3 = (n_3 + n_4 > 0) ? log((n_3 + n_4) / (2 * n)) : 0; double lnl = term_1 + ((n_1 + n_2) * log((n_1 + n_2) / (2 * n))) + ((n_3 + n_4) * term_3); return lnl; } // // ln L(1/1) = ln(n! / n_1!n_2!n_3!n_4!) + // n_1 * ln(n_1 / n) + // (n - n_1) * ln(n - n_1 / 3n) // double homozygous_likelihood(int col, map &nuc) { vector > cnts; map::iterator i; double n = 0; for (i = nuc.begin(); i != nuc.end(); i++) { n += i->second; cnts.push_back(make_pair(i->first, i->second)); } sort(cnts.begin(), cnts.end(), compare_pair); double n_1 = cnts[0].second; double n_2 = cnts[1].second; double n_3 = cnts[2].second; double n_4 = cnts[3].second; double term_1 = reduced_log_factorial(n, n_1) - (log_factorial(n_2) + log_factorial(n_3) + log_factorial(n_4)); double term_3 = n - n_1 > 0 ? log((n - n_1) / (3 * n)) : 0; double lnl = term_1 + (n_1 * log(n_1 / n)) + ((n - n_1) * term_3); return lnl; } stacks-1.35/src/models.h000644 000765 000024 00000004153 12441417455 015737 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2012, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __MODELS_H__ #define __MODELS_H__ #include #include #include using std::string; #include using std::vector; #include using std::map; #include using std::pair; using std::make_pair; #include #include using std::ifstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include "constants.h" #include "utils.h" #include "mstack.h" #include "locus.h" // // Possible models for calling nucleotide positions as fixed or variable // enum modelt {fixed, snp, bounded}; // // For use with the multinomial model to call fixed nucleotides. // extern const int barcode_size; extern modelt model_type; extern double p_freq; extern double barcode_err_freq; extern double bound_low; extern double bound_high; extern double heterozygote_limit; extern double homozygote_limit; snp_type call_bounded_multinomial_snp(MergedStack *, int, map &, bool); snp_type call_bounded_multinomial_snp(Locus *, int, map &); snp_type call_multinomial_snp(MergedStack *, int, map &, bool); snp_type call_multinomial_snp(Locus *, int, map &); int call_multinomial_fixed(MergedStack *, int, map &); double heterozygous_likelihood(int, map &); double homozygous_likelihood(int, map &); #endif // __MODELS_H__ stacks-1.35/src/mst.cc000644 000765 000024 00000016406 12441417455 015421 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // mst.cc -- routines to implement the Minimum Spanning Tree Class:. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id$ // #include "mst.h" Edge *Node::add_edge(Node *n, int dist) { Edge *e = new Edge; e->child = n; e->dist = dist; this->edges.push_back(e); return e; } bool min_span_tree_cmp(const Node *lhs, const Node *rhs) { return (lhs->min_dist > rhs->min_dist); } Node *MinSpanTree::add_node(int id) { Node *n = new Node(id); this->nodes[id] = n; return n; } Node *MinSpanTree::add_node(string label) { // // Obtain an ID for this node. // uint id = this->id_cnt; Node *n = new Node(id); n->label = label; this->nodes[id] = n; this->node_key[label] = id; this->id_cnt++; return n; } Node *MinSpanTree::node(int id) { return this->nodes[id]; } Node *MinSpanTree::node(string label) { uint id = this->node_key[label]; return this->nodes[id]; } Node *MinSpanTree::head() { return this->nodes.begin()->second; } int MinSpanTree::node_count() { return this->nodes.size(); } bool MinSpanTree::connected(int *ids, int size) { set valid, visited; queue q; if (size == 1) return true; for (int i = 0; i < size; i++) valid.insert(ids[i]); // // Take the first ID and begin traversing the tree. If we hit // a node not in the ids set, stop traversing this branch. Check that // all nodes are directly connected. // int valid_cnt = 0; Node *n = this->node(ids[0]); q.push(n); while (!q.empty() && valid_cnt < size) { n = q.front(); q.pop(); visited.insert(n->id); if (valid.count(n->id)) { valid_cnt++; for (uint i = 0; i < n->min_adj_list.size(); i++) if (visited.count(n->min_adj_list[i]->id) == false) q.push(n->min_adj_list[i]); } } if (valid_cnt == size) return true; else return false; } // // Build a minimum spanning tree using Prim's alogorithm. Assume all necessary // nodes have been added using the add_node function. // int MinSpanTree::build_tree() { // // Vector, which we treat as a binary heap to access nodes that are of minimal distance // vector q; // // Select an initial node to process and initialize its minimum distance. // Node *n = this->nodes.begin()->second; n->min_dist = 0; // // Add all of the nodes to the binary heap; process them in order of min_dist // map::iterator it; for (it = this->nodes.begin(); it != this->nodes.end(); it++) q.push_back((*it).second); make_heap(q.begin(), q.end(), min_span_tree_cmp); while (q.size() > 0) { n = q.front(); pop_heap(q.begin(), q.end()); q.pop_back(); n->update = false; //cerr << "Examining node: " << n->id << " (" << n->min_dist << ")\n"; // // Record the minimum connection between parent and n. // if (n->parent != NULL) { n->parent->min_adj_list.push_back(n); n->min_adj_list.push_back(n->parent); } // // Iterate through all of the edges of n and update the // minimum distance to the proper nodes. // Edge *e; for (uint i = 0; i < n->edges.size(); i++) { e = n->edges[i]; if (e->child->update == true && e->dist < e->child->min_dist) { e->child->parent = n; e->child->min_dist = e->dist; //cerr << " Updating node: " << e->child->id << " to have distance: " << e->child->min_dist << "\n"; } } // // Resort the heap after possibly changing many min_dist values // make_heap(q.begin(), q.end(), min_span_tree_cmp); } return 0; } string MinSpanTree::vis(bool overlay) { uint j; double d, scale, scaled_d; char label[32]; int scale_factor = 20; // // Output a specification to visualize the minimum spanning tree using graphviz: // http://www.graphviz.org/ // stringstream data; data << "graph stacks_" << this->nodes.size() << " {\n" << "rankdir=LR\n" << "size=\"" << scale_factor << "!\"\n" << "overlap=false\n" << "node [shape=circle style=filled fillcolor=\"#3875d7\" fontname=\"Arial\"];\n" << "edge [fontsize=8.0 fontname=\"Arial\" color=\"#aaaaaa\"];\n"; map::iterator i; set visited; queue q; // // If overlay==true, write the minimum spanning tree on top of the full tree as a subgraph. // data << "subgraph mst {\n" << " edge [penwidth=5 fontsize=12.0 fontcolor=\"black\" color=\"black\"]\n" << " node [fillcolor=\"red\" fontcolor=\"white\"]\n"; Node *n = this->head(); q.push(n); while (!q.empty()) { n = q.front(); q.pop(); visited.insert(n->id); for (uint i = 0; i < n->min_adj_list.size(); i++) { data << " "; n->label.length() > 0 ? data << n->label : data << n->id; data << "--"; n->min_adj_list[i]->label.length() > 0 ? (data << n->min_adj_list[i]->label) : (data << n->min_adj_list[i]->id); data << "\n"; if (visited.count(n->min_adj_list[i]->id) == 0) q.push(n->min_adj_list[i]); } } data << "}\n"; // // Scale the graph to display on a scale_factor inch canvas. Find the largest edge weight // and scale the edge lengths to fit the canvas. // for (i = this->nodes.begin(); i != this->nodes.end(); i++) { n = i->second; for (j = 0; j < n->edges.size(); j++) scale = n->edges[j]->dist > scale ? n->edges[j]->dist : scale; } scale = scale / scale_factor; // // Write out edges. // for (i = this->nodes.begin(); i != this->nodes.end(); i++) { n = i->second; for (j = 0; j < n->edges.size(); j++) { d = n->edges[j]->dist; scaled_d = d / scale; scaled_d = scaled_d < 0.75 ? 0.75 : scaled_d; sprintf(label, "%.1f", d); n->label.length() > 0 ? (data << n->label) : (data << n->id); data << " -- "; n->edges[j]->child->label.length() > 0 ? (data << n->edges[j]->child->label) : (data << n->edges[j]->child->id); data << " [len=" << scaled_d << ", label=" << label << "];\n"; } } data << "}\n"; return data.str(); } stacks-1.35/src/mst.h000644 000765 000024 00000004541 12441417455 015260 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __MST_H__ #define __MST_H__ #include #include using std::string; #include using std::map; #include using std::vector; #include using std::set; #include using std::queue; #include using std::stringstream; #include using std::cerr; #include "constants.h" typedef unsigned int uint; class Node; class Edge { public: uint dist; // Distance or weight Node *child; }; class Node { public: uint id; string label; vector edges; Node *parent; bool update; uint min_dist; // // List of adjacent nodes that are connected by minimal distance // vector min_adj_list; Node(uint id) { this->id = id; this->parent = NULL; this->update = true; this->min_dist = 1000000; } ~Node() { for (uint i = 0; i < this->edges.size(); i++) delete this->edges[i]; } Edge *add_edge(Node *, int); }; class MinSpanTree { map nodes; map node_key; uint id_cnt; public: MinSpanTree() { id_cnt = 0; } ~MinSpanTree() { for (uint i = 0; i < this->nodes.size(); i++) delete this->nodes[i]; } Node *add_node(int id); Node *add_node(string label); int build_tree(); int node_count(); Node *node(int id); Node *node(string label); Node *head(); bool connected(int *, int); string vis(bool); }; bool min_span_tree_cmp(const Node *, const Node *); #endif // __MST_H__ stacks-1.35/src/mstack.cc000644 000765 000024 00000020164 12441417455 016074 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // mstack.cc -- implementation of the MergedStack Class // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id: mstack.cc 1987 2010-11-01 05:43:50Z catchen $ // #include "mstack.h" #include "models.h" MergedStack::MergedStack() { this->id = 0; this->count = 0; this->len = 0; this->con = NULL; this->matrix = NULL; this->lnl = 0.0; this->cohort_id = -1; this->deleveraged = false; this->masked = false; this->blacklisted = false; this->lumberjackstack = false; } MergedStack::~MergedStack() { delete [] this->con; for (uint i = 0; i < snps.size(); i++) delete this->snps[i]; delete [] this->matrix; } int MergedStack::add_consensus(const char *seq) { if (this->con != NULL) delete [] this->con; this->len = strlen(seq); this->con = new char[len + 1]; strncpy(this->con, seq, len); this->con[len] = '\0'; return 0; } int MergedStack::add_consensus(DNASeq *seq) { if (this->con != NULL) delete [] this->con; this->len = seq->size; this->con = new char[this->len + 1]; this->con = seq->seq(this->con); return 0; } int MergedStack::add_consensus(DNANSeq *seq) { if (this->con != NULL) delete [] this->con; this->len = seq->size(); this->con = new char[this->len + 1]; this->con = seq->seq(this->con); return 0; } int MergedStack::add_dist(const int id, const int dist) { // // Store the ID and distance as a pair, ID in the first position, // dist in the second. // pair p(id, dist); this->dist.push_back(p); return 0; } DNASeq **MergedStack::gen_matrix(map &unique, map &rem) { Stack *tag; // // Create a two-dimensional array, each row containing one read. For // each unique tag that has been merged together, add the sequence for // that tag into our array as many times as it originally occurred. // // We do not allocate memory for the second dimension of the array, we simply // reuse the existing char arrays in the unique and rem maps // uint cnt = this->count + this->remtags.size(); if (this->matrix != NULL) delete [] this->matrix; this->matrix = new DNASeq * [cnt]; vector::iterator j; int i = 0; for (j = this->utags.begin(); j != this->utags.end(); j++) { tag = unique[*j]; for (uint k = 0; k < tag->count(); k++) { this->matrix[i] = tag->seq; i++; } } // For each remainder tag that has been merged into this Stack, add the sequence. for (j = this->remtags.begin(); j != this->remtags.end(); j++) { this->matrix[i] = rem[*j]->seq; i++; } return this->matrix; } DNANSeq ** MergedStack::gen_matrix(map &unique) { PStack *tag; // // Create a two-dimensional array, each row containing one read. For // each unique tag that has been merged together, add the sequence for // that tag into our array as many times as it originally occurred. // // We do not allocate memory for the second dimension of the array, we simply // reuse the existing char arrays in the unique and rem maps // uint cnt = this->count; if (this->pmatrix != NULL) delete [] this->matrix; this->pmatrix = new DNANSeq * [cnt]; vector::iterator j; int i = 0; for (j = this->utags.begin(); j != this->utags.end(); j++) { tag = unique[*j]; for (uint k = 0; k < tag->count; k++) { this->pmatrix[i] = tag->seq; i++; } } return this->pmatrix; } double MergedStack::calc_likelihood() { if (this->matrix == NULL || this->snps.size() == 0) return 0; // // Iterate over each column of the array and call the consensus base. // int row, col, tot; int length = this->matrix[0]->size; int height = this->count + this->remtags.size(); map nuc; map::iterator max, n; DNASeq *d; this->lnl = 0; for (col = 0; col < length; col++) { nuc['A'] = 0; nuc['G'] = 0; nuc['C'] = 0; nuc['T'] = 0; // // Count the nucleotide type at each position in the column. // for (row = 0; row < height; row++) { d = this->matrix[row]; nuc[(*d)[col]]++; } // // Find the base with a plurality of occurances and call it. // max = nuc.end(); tot = 0; for (n = nuc.begin(); n != nuc.end(); n++) { tot += n->second; if (max == nuc.end() || n->second > max->second) max = n; } // // For nucleotide positions with potential polymorphism (i.e. two or more alleles at // the locus that differ at that position), first find the ML genotype (call_multinomial_snp). // If it returns 'het' calculate the heterozygous_likelihood(), otherwise calculate homozygous // likelihood. // snp_type res = this->snps[col]->type; if (res == snp_type_het) this->lnl += heterozygous_likelihood(col, nuc); else if (res == snp_type_hom) this->lnl += homozygous_likelihood(col, nuc); else { double homlnl = homozygous_likelihood(col, nuc); double hetlnl = heterozygous_likelihood(col, nuc); this->lnl += hetlnl > homlnl ? hetlnl : homlnl; } } return this->lnl; } double MergedStack::calc_likelihood_pstacks() { if (this->pmatrix == NULL || this->snps.size() == 0) return 0; // // Iterate over each column of the array and call the consensus base. // int row, col, tot; int length = this->pmatrix[0]->size(); int height = this->count; map nuc; map::iterator max, n; DNANSeq *d; this->lnl = 0; for (col = 0; col < length; col++) { nuc['A'] = 0; nuc['G'] = 0; nuc['C'] = 0; nuc['T'] = 0; // // Count the nucleotide type at each position in the column. // for (row = 0; row < height; row++) { d = this->pmatrix[row]; nuc[(*d)[col]]++; } // // Find the base with a plurality of occurances and call it. // max = nuc.end(); tot = 0; for (n = nuc.begin(); n != nuc.end(); n++) { tot += n->second; if (max == nuc.end() || n->second > max->second) max = n; } // // For nucleotide positions with potential polymorphism (i.e. two or more alleles at // the locus that differ at that position), first find the ML genotype (call_multinomial_snp). // If it returns 'het' calculate the heterozygous_likelihood(), otherwise calculate homozygous // likelihood. // snp_type res = this->snps[col]->type; if (res == snp_type_het) this->lnl += heterozygous_likelihood(col, nuc); else if (res == snp_type_hom) this->lnl += homozygous_likelihood(col, nuc); else { double homlnl = homozygous_likelihood(col, nuc); double hetlnl = heterozygous_likelihood(col, nuc); this->lnl += hetlnl > homlnl ? hetlnl : homlnl; } } return this->lnl; } string MergedStack::write_cmb() { stringstream s; uint size = this->utags.size(); s << "{"; for (uint i = 0; i < size; i++) { s << this->utags[i]; if (i < size - 1) s << ", "; } s << "}"; return s.str(); } stacks-1.35/src/mstack.h000644 000765 000024 00000005435 12441417455 015742 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2012, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __MSTACK_H__ #define __MSTACK_H__ #include using std::string; #include using std::vector; #include using std::map; #include using std::pair; using std::make_pair; #include using std::cerr; #include "stacks.h" class MergedStack { public: int id; // Identifier for the merged stack. char *con; // Consensus sequence uint len; // Sequence length // // Stack component parts // int count; // Number of merged stacks vector utags; // Stack IDs that have been merged into this MergedStack vector > dist; // Vector describing the distance between this stack and other stacks. vector remtags; // Remainder tag IDs that have been merged into this Stack DNASeq **matrix; // Two-dimensional array for iterating over the combined stack (stacks and remainders). DNANSeq **pmatrix; // Two-dimensional array for iterating over the combined stack aligned to a reference.. int cohort_id; // Group ID of all stacks that were originally part of the same subgraph double lnl; // Log likelihood of this stack // // Mapping components // PhyLoc loc; // Physical genome location of this Stack. vector snps; // Single Nucleotide Polymorphisms found in this Stack map alleles; // Set of alleles defined by the SNPs found in this Stack // // Flags // bool deleveraged; bool masked; bool blacklisted; bool lumberjackstack; MergedStack(); ~MergedStack(); int add_consensus(const char *); int add_consensus(DNASeq *); int add_consensus(DNANSeq *); int add_dist(const int id, const int dist); DNASeq **gen_matrix(map &, map &); DNANSeq **gen_matrix(map &); double calc_likelihood(); double calc_likelihood_pstacks(); string write_cmb(); }; #endif // __MSTACK_H__ stacks-1.35/src/ordered.h000644 000765 000024 00000025332 12533677757 016121 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __ORDERED_H__ #define __ORDERED_H__ #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; #include using std::vector; #include using std::map; #include using std::set; #include "PopSum.h" enum loc_type {haplotype, snp}; template class Ordered { public: ofstream *log_fh; PopSum *psum; int incompatible_loci; int multiple_loci; Ordered() { } virtual ~Ordered() { } int init_sites(vector &, map &, vector &); int init_sites(vector &, map &, vector &, int); int init_sites(vector &, map &, vector &, int, int); int init_haplotypes(vector &, map &, vector &); }; template int Ordered::init_sites(vector &sites, map &sites_key, vector &sorted_loci) { CSLocus *loc; LocTally *ltally; int len; set bps; // // We need to create an array to store all the SNPs for exporting. We must // account for positions in the genome that are covered by more than one RAD tag. // for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; len = strlen(loc->con); ltally = this->psum->locus_tally(loc->id); for (int k = 0; k < len; k++) { if (ltally->nucs[k].allele_cnt == 2) bps.insert(ltally->nucs[k].bp); } } sites.resize(bps.size(), NULL); // // Create a key describing where in the sites array to find each basepair coordinate. // set::iterator it; int i = 0; for (it = bps.begin(); it != bps.end(); it++) { sites_key[*it] = i; i++; } return 0; } template int Ordered::init_sites(vector &sites, map &sites_key, vector &sorted_loci, int pop_id) { CSLocus *loc; LocSum *lsum; int len; set bps; // // We need to create an array to store all the summary statistics for smoothing. We must // account for positions in the genome that are covered by more than one RAD tag. // for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; len = strlen(loc->con); lsum = this->psum->pop(loc->id, pop_id); for (int k = 0; k < len; k++) { if (lsum->nucs[k].num_indv > 0) bps.insert(lsum->nucs[k].bp); } } sites.resize(bps.size(), NULL); // // Create a key describing where in the sites array to find each basepair coordinate. // set::iterator it; int i = 0; for (it = bps.begin(); it != bps.end(); it++) { sites_key[*it] = i; i++; } return 0; } template int Ordered::init_sites(vector &sites, map &sites_key, vector &sorted_loci, int pop_id_1, int pop_id_2) { CSLocus *loc; LocSum *lsum_1, *lsum_2; int len; set bps; // // We need to create an array to store all the pair values for computing smoothed Fst. We must // account for positions in the genome that are covered by more than one RAD tag. // for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; len = strlen(loc->con); lsum_1 = this->psum->pop(loc->id, pop_id_1); lsum_2 = this->psum->pop(loc->id, pop_id_2); for (int k = 0; k < len; k++) { if (lsum_1->nucs[k].num_indv > 0 && lsum_2->nucs[k].num_indv > 0) bps.insert(lsum_1->nucs[k].bp); // slow } } sites.resize(bps.size(), NULL); // // Create a key describing where in the sites array to find each basepair coordinate. // set::iterator it; int i = 0; for (it = bps.begin(); it != bps.end(); it++) { sites_key[*it] = i; // slow i++; } return 0; } template int Ordered::init_haplotypes(vector &sites, map &sites_key, vector &sorted_loci) { CSLocus *loc; int bp; set bps; for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; bp = loc->sort_bp(); bps.insert(bp); } sites.resize(bps.size(), NULL); // // Create a key describing where in the sites array to find each basepair coordinate. // set::iterator it; int i = 0; for (it = bps.begin(); it != bps.end(); it++) { sites_key[*it] = i; i++; } return 0; } template class OHaplotypes: public Ordered { public: OHaplotypes(): Ordered() { } int order(vector &, map &, vector &); }; template int OHaplotypes::order(vector &sites, map &sites_key, vector &sorted_loci) { this->init_haplotypes(sites, sites_key, sorted_loci); return 0; }; template class OPopPair: public Ordered { public: OPopPair(PopSum *psum, ofstream &log_fh): Ordered() { this->log_fh = &log_fh; this->psum = psum; } int order(vector &, map &, vector &, int, int); }; template int OPopPair::order(vector &sites, map &sites_key, vector &sorted_loci, int pop_1, int pop_2) { CSLocus *loc; StatT *pair; int len; this->incompatible_loci = 0; this->multiple_loci = 0; this->init_sites(sites, sites_key, sorted_loci, pop_1, pop_2); for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; len = strlen(loc->con); for (int k = 0; k < len; k++) { pair = this->psum->Fst(loc->id, pop_1, pop_2, k); // // Locus is incompatible, log this position. // if (pair == NULL) { this->incompatible_loci++; *(this->log_fh) << "between_population\t" << "incompatible_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp(k) << "\t" << k << "\t" << pop_1 << "\t" << pop_2 << "\n"; delete pair; continue; } pair->loc_id = loc->id; pair->bp = loc->sort_bp(k); pair->col = k; // // Locus is fixed in both populations, or was only found in one population. // if (pair->pi == 0) { delete pair; continue; } // // Check if this basepair position is already covered by a RAD site. // if (sites[sites_key[pair->bp]] != NULL) { this->multiple_loci++; *(this->log_fh) << "between_population\t" << "multiple_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << pair->bp << "\t" << k << "\t" << pop_key[pop_1] << "\t" << pop_key[pop_2] << "\n"; delete pair; continue; } sites[sites_key[pair->bp]] = pair; } } return 0; }; template class OSumStat: public Ordered { public: OSumStat(PopSum *psum, ofstream &log_fh): Ordered() { this->log_fh = &log_fh; this->psum = psum; } int order(vector &, vector &, int); }; template int OSumStat::order(vector &sites, vector &sorted_loci, int pop_id) { this->incompatible_loci = 0; this->multiple_loci = 0; map sites_key; this->init_sites(sites, sites_key, sorted_loci, pop_id); CSLocus *loc; LocSum *lsum; int len; // // Assign nucleotides to their proper, ordered location in the genome, // checking that a site hasn't already been covered by another RAD locus. // for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; len = strlen(loc->con); lsum = this->psum->pop(loc->id, pop_id); for (int k = 0; k < len; k++) { if (lsum->nucs[k].num_indv == 0) continue; if (sites_key.count(lsum->nucs[k].bp) == 0) { cerr << "Error: locus " << lsum->nucs[k].loc_id << " at " << lsum->nucs[k].bp << "bp is not defined in the sites map.\n"; } else if (sites[sites_key[lsum->nucs[k].bp]] == NULL) { sites[sites_key[lsum->nucs[k].bp]] = &(lsum->nucs[k]); } else { this->multiple_loci++; *(this->log_fh) << "within_population\t" << "multiple_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << lsum->nucs[k].bp << "\t" << k << "\t" << pop_id << "\t" << "conflicts with locus " << sites[sites_key[lsum->nucs[k].bp]]->loc_id << "\n"; } } } return 0; }; template class OLocTally: public Ordered { public: OLocTally(PopSum *psum, ofstream &log_fh): Ordered() { this->log_fh = &log_fh; this->psum = psum; } int order(vector &, vector &); }; template int OLocTally::order(vector &sites, vector &sorted_loci) { this->incompatible_loci = 0; this->multiple_loci = 0; map sites_key; this->init_sites(sites, sites_key, sorted_loci); CSLocus *loc; LocTally *ltally; int len; // // Assign nucleotides to their proper, ordered location in the genome, // checking that a site hasn't already been covered by another RAD locus. // for (uint pos = 0; pos < sorted_loci.size(); pos++) { loc = sorted_loci[pos]; len = strlen(loc->con); ltally = this->psum->locus_tally(loc->id); for (int k = 0; k < len; k++) { if (ltally->nucs[k].allele_cnt != 2) continue; if (sites_key.count(ltally->nucs[k].bp) == 0) { cerr << "Error: locus " << ltally->nucs[k].loc_id << " at " << ltally->nucs[k].bp << "bp is not defined in the sites map.\n"; } else if (sites[sites_key[ltally->nucs[k].bp]] == NULL) { sites[sites_key[ltally->nucs[k].bp]] = &(ltally->nucs[k]); } else { this->multiple_loci++; *(this->log_fh) << "within_population\t" << "multiple_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << ltally->nucs[k].bp << "\t" << k << "\t" << "conflicts with locus " << sites[sites_key[ltally->nucs[k].bp]]->loc_id << "\n"; } } } return 0; }; #endif // __ORDERED_H__ stacks-1.35/src/phasedstacks.cc000644 000765 000024 00000140303 12533677757 017304 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // phasedstacks -- analyse phased data, descended from a Stacks analysis. // #include "phasedstacks.h" // Global variables to hold command-line options. FileT in_file_type = FileT::unknown; int num_threads = 1; int batch_id = 0; string cat_path; string in_path; string out_path; string out_file; string pmap_path; bool haplotypes = false; bool write_zeros = true; double p_value_cutoff = 0.05; double chi_sq_limit = 3.84; double minor_freq_lim = 0.1; double min_inform_pairs = 0.90; uint max_pair_dist = 1000000; uint bucket_dist = 5000; double dprime_threshold = false; double dprime_threshold_level = 0.0; set whitelist, blacklist; map pop_map; map pop_cnts; int main (int argc, char* argv[]) { parse_command_line(argc, argv); if (p_value_cutoff == 0.1) { chi_sq_limit = 2.71; } else if (p_value_cutoff == 0.05) { chi_sq_limit = 3.84; } else if (p_value_cutoff == 0.01) { chi_sq_limit = 6.64; } else if (p_value_cutoff == 0.001) { chi_sq_limit = 10.83; } cerr << "Minor allele frequency cutoff: " << minor_freq_lim << "\n" << "Looking for "; switch(in_file_type) { case FileT::beagle: cerr << "Beagle"; break; case FileT::phase: cerr << "PHASE"; break; case FileT::fastphase: default: cerr << "fastPhase"; break; } cerr << " input files.\n" << "Size of buckets for binning D' values at a particular distance: " << bucket_dist / 1000 << "kb.\n"; if (dprime_threshold) cerr << "D' Threshold set at " << dprime_threshold_level << ". D' values above this limit will be set to 1.0, values below will be set to 0.0.\n"; // // Parse the population map. // parse_population_map(pmap_path, pop_map, pop_cnts); // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif vector > files; if (!build_file_list(files)) exit(1); cerr << "Identified " << files.size() << " files.\n"; // // Open the log file. // stringstream log; log << "phasedstacks.log"; string log_path = in_path + log.str(); ofstream log_fh(log_path.c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening log file '" << log_path << "'\n"; exit(1); } init_log(log_fh, argc, argv); // // Load the catalog // cerr << "Parsing the catalog...\n"; stringstream catalog_file; map catalog; bool compressed = false; int res; catalog_file << cat_path << "batch_" << batch_id << ".catalog"; if ((res = load_loci(catalog_file.str(), catalog, false, false, compressed)) == 0) { cerr << "Unable to load the catalog '" << catalog_file.str() << "'\n"; return 0; } cerr << "done.\n"; // // Implement the black/white list // reduce_catalog(catalog, whitelist, blacklist); map fgt_block_lens, fgt_snp_cnts; map dp_block_lens, dp_snp_cnts; // // Vectors to store D' measures of SNPs at bucketed distances. // vector dprime_buckets, dprime_bucket_cnts; for (uint i = 0; i < files.size(); i++) { // if (files[i].second != "batch_1.groupV.phase") continue; PhasedSummary *psum = NULL; if (in_file_type == FileT::fastphase) { if ((psum = parse_fastphase(in_path + files[i].second)) == NULL) { cerr << "Unable to parse fastPhase input files.\n"; exit(1); } } else if (in_file_type == FileT::beagle && haplotypes) { if ((psum = parse_beagle_haplotypes(catalog, in_path + files[i].second)) == NULL) { cerr << "Unable to parse Beagle input files.\n"; exit(1); } } else if (in_file_type == FileT::beagle) { if ((psum = parse_beagle(catalog, in_path + files[i].second)) == NULL) { cerr << "Unable to parse Beagle input files.\n"; exit(1); } } // // Summarize the genotypes in the populations. // summarize_phased_genotypes(psum); // for (uint j = 0; j < psum->size; j++) { // cerr << "BP: " << psum->nucs[j].bp << "\t" // << "A: " << std::setw(3) << psum->nucs[j].nuc[0] << " " // << "C: " << std::setw(3) << psum->nucs[j].nuc[1] << " " // << "G: " << std::setw(3) << psum->nucs[j].nuc[2] << " " // << "T: " << std::setw(3) << psum->nucs[j].nuc[3] << "\n"; // } // // Calculate D' // cerr << "Calculating D'..."; calc_dprime(psum); cerr << "done.\n"; write_dprime(in_path + files[i].second, psum); // // Generate haplotype blocks based on D'. // dprime_blocks(in_path + files[i].second, pop_map, psum, dp_block_lens, dp_snp_cnts); // // Generate haplotype blocks using the four gamete test. // four_gamete_test(in_path + files[i].second, pop_map, psum, fgt_block_lens, fgt_snp_cnts); // // Bucket the D' measures by distance between SNPs. // bucket_dprime(dprime_buckets, dprime_bucket_cnts, psum); // // Free the Samples objects // delete psum; } // // Write average D' values bucketed according to their distance in the genome. // write_buckets(in_path, dprime_buckets, dprime_bucket_cnts); // // Write the FGT bucketed distances. // log_fh << "# Distribution of FGT haplotype block lengths.\n"; map::iterator buck_it; for (buck_it = fgt_block_lens.begin(); buck_it != fgt_block_lens.end(); buck_it++) log_fh << buck_it->first << "\t" << buck_it->second << "\n"; // // Write the FGT bucketed SNP counts. // log_fh << "\n\n" << "# Distribution of FGT SNP counts per haplotype block.\n"; for (buck_it = fgt_snp_cnts.begin(); buck_it != fgt_snp_cnts.end(); buck_it++) log_fh << buck_it->first << "\t" << buck_it->second << "\n"; // // Write the D' haplotype block bucketed distances. // log_fh << "\n\n" << "# Distribution of D' haplotype block lengths.\n"; for (buck_it = dp_block_lens.begin(); buck_it != dp_block_lens.end(); buck_it++) log_fh << buck_it->first << "\t" << buck_it->second << "\n"; // // Write the D' bucketed SNP counts. // log_fh << "\n\n" << "# Distribution of D' SNP counts per haplotype block.\n"; for (buck_it = dp_snp_cnts.begin(); buck_it != dp_snp_cnts.end(); buck_it++) log_fh << buck_it->first << "\t" << buck_it->second << "\n"; log_fh.close(); return 0; } int bucket_dprime(vector &dprime_buckets, vector &dprime_bucket_cnts, PhasedSummary *psum) { uint bucket, dist, max_bucket; uint max_dist = 0; // // Check that we have enough buckets in our vectors. Find the maximum distance between // SNPs on this chromosome and add buckets as necessary. // for (uint i = 0; i < psum->size; i++) { for (uint j = i+1; j < psum->size; j++) { if (psum->nucs[i].freq < minor_freq_lim || psum->nucs[j].freq < minor_freq_lim) continue; if (write_zeros == false && psum->dprime[i][j].chisq_p == false) continue; dist = psum->nucs[j].bp - psum->nucs[i].bp; max_dist = dist > max_dist ? dist : max_dist; } } max_bucket = max_dist / bucket_dist; if (dprime_buckets.size() < max_bucket) { uint cnt = max_bucket + 1 - dprime_buckets.size(); for (uint i = 0; i < cnt; i++) { dprime_buckets.push_back(0.0); dprime_bucket_cnts.push_back(0.0); } } // // Populate buckets // for (uint i = 0; i < psum->size; i++) { for (uint j = i+1; j < psum->size; j++) { if (psum->nucs[i].freq < minor_freq_lim || psum->nucs[j].freq < minor_freq_lim) continue; if (write_zeros == false && psum->dprime[i][j].chisq_p == false) continue; bucket = ((psum->nucs[j].bp - psum->nucs[i].bp) / bucket_dist); dprime_buckets[bucket] += (psum->dprime[i][j].chisq_p ? psum->dprime[i][j].dprime : 0.0); dprime_bucket_cnts[bucket]++; } } return 0; } int write_buckets(string path, vector &dprime_buckets, vector &dprime_bucket_cnts) { // // Write the bucketed D' data for plotting. // stringstream file; file << path << "Dprime_dist_buckets" << bucket_dist/1000 << "kb.tsv"; cerr << "Writing bucketed D' data to '" << file.str() << "'..."; ofstream fh(file.str().c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening D' file '" << file.str() << "'\n"; exit(1); } fh << "# Distance (Kb)\tD' Average\n"; for (uint i = 0; i < dprime_buckets.size(); i++) fh << (i * bucket_dist) << "\t" << std::setprecision(3) << (dprime_buckets[i] / dprime_bucket_cnts[i]) << "\n"; fh.close(); cerr << "done\n"; return 0; } int four_gamete_test(string path, map &pop_map, PhasedSummary *psum, map &len_buckets, map &snp_buckets) { // // Write haplotypes as found by the four gamete test: // Wang, et al., Am. J. Hum. Genet. 71:1227–1234, 2002 // string file = path + ".fgt.tsv"; cerr << "Determining four gamete test haplotypes blocks, writing to:\n '" << file << "'...\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening FGT file '" << file << "'\n"; exit(1); } fh << "# ID\tStart\tEnd\tLen\tSNP Count\tHaplotype Count\tHaplotype\tPopulations\tHapPopCnt\n"; uint id = 1; uint start, end, cnt, dist; bool bound; map buckets, snps; for (uint i = 0; i < psum->size; i++) { if (psum->nucs[i].freq < minor_freq_lim) continue; // // Start a new block. // start = i; bound = false; cnt = 0; uint j = i; do { if (psum->nucs[j].freq < minor_freq_lim) { j++; continue; } for (int k = j; k >= (int) start; k--) { if (psum->nucs[k].freq < minor_freq_lim) continue; if (psum->recomb[k][j] == true) { bound = true; end = j; } } j++; cnt++; } while (bound == false && j < psum->size); if (j == psum->size) end = j - 1; fh << id << "\t" << psum->nucs[start].bp << "\t" << psum->nucs[end].bp << "\t" << psum->nucs[end].bp - psum->nucs[start].bp + 1 << "\t" << cnt << "\t"; // // Bucket the SNP counts for plotting. // snps[cnt]++; // // Bucket the haplotype block lengths for plotting. // dist = (psum->nucs[end].bp - psum->nucs[start].bp + 1) / 10000 * 10000; buckets[dist]++; enumerate_haplotypes(fh, pop_map, psum, start, end); i = end; id++; } // // Write the bucketed distances. // fh << "\n\n" << "# Distribution of FGT haplotype block lengths.\n"; map::iterator it; for (it = buckets.begin(); it != buckets.end(); it++) { fh << it->first << "\t" << it->second << "\n"; len_buckets[it->first] += it->second; } // // Write the bucketed SNP counts. // fh << "\n\n" << "# Distribution of SNP counts per FGT haplotype block.\n"; for (it = snps.begin(); it != snps.end(); it++) { fh << it->first << "\t" << it->second << "\n"; snp_buckets[it->first] += it->second; } fh.close(); cerr << "done.\n"; return 0; } int dprime_blocks(string path, map &pop_map, PhasedSummary *psum, map &len_buckets, map &snp_buckets) { // // Generate haplotype blocks according to strength of linkage disequilibrium measured using D'. // Stacey B. Gabriel et al. (2002). The Structure of Haplotype Blocks in the Human Genome. Science 296:2225-2229 // string file = path + ".dpblocks.tsv"; cerr << "Determining D' haplotypes blocks, writing to:\n '" << file << "'...\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening D' blocks file '" << file << "'\n"; exit(1); } fh << "# ID\tStart\tEnd\tLen\tSNP Count\tHaplotype Count\tHaplotype\tPopulations\tHapPopCnt\n"; uint dist; set loci; vector > ld_pairs; map > ld_map; map buckets, snps; uint tot_pairs = 0; uint recomb_pairs = 0; for (uint i = 0; i < psum->size; i++) { if (psum->nucs[i].freq < minor_freq_lim) continue; for (uint j = i+1; j < psum->size; j++) { if (psum->nucs[j].freq < minor_freq_lim) continue; tot_pairs++; dist = psum->nucs[j].bp - psum->nucs[i].bp + 1; // // Does this pair of markers show a strong measure of LD? // if (psum->dprime[i][j].ci_high > 0.98 && psum->dprime[i][j].ci_low > 0.7 && dist <= max_pair_dist) { psum->dprime[i][j].type = strong_ld; ld_pairs.push_back(make_pair(i, j)); ld_map[i].push_back(j); loci.insert(i); loci.insert(j); } // // Does this pair of markers show a strong measure of historical recombination? // if (psum->dprime[i][j].ci_high < 0.9) { psum->dprime[i][j].type = recomb; recomb_pairs++; } } } // map >::iterator it; // for (it = ld_map.begin(); it != ld_map.end(); it++) { // cerr << " " << it->first << " ->\n"; // for (uint i = 0; i < it->second.size(); i++) // cerr << " " << it->second[i] << " dist: " << (psum->nucs[it->second[i]].bp - psum->nucs[it->first].bp + 1) << "bp\n"; // } cerr << " Total pairs examined: " << tot_pairs << "; Strong LD pairs: " << ld_pairs.size() << "; Recombination pairs: " << recomb_pairs << "; Informative markers: " << std::setprecision(3) << ((double) (ld_pairs.size() + recomb_pairs) / (double) tot_pairs) * 100 << "%\n"; // // Convert our list of loci into an ordered, linked list, where each node // represents a haplotype block. // dPrimeBlocks blocks; blocks.initialize(loci); // // Merge nodes together where D' is strong enough to maintain the block. // HBlock *cur; cur = blocks.head(); do { // // Can we merge these two nodes together? // if (check_adjacent_blocks(psum, cur)) { // cerr << " Merging blocks: "; // for (uint i = 0; i < cur->loci.size(); i++) // cerr << cur->loci[i] << ", "; // cerr << " and "; // for (uint i = 0; i < cur->next->loci.size(); i++) // cerr << cur->next->loci[i] << ", "; // cerr << "\n"; blocks.merge_adjacent(cur); } else { cur = cur->next; } } while (cur->next != NULL); // blocks.print(); // // Write the blocks. // uint id = 1; uint start, end; cur = blocks.head(); do { start = *(cur->loci.begin()); end = *(cur->loci.rbegin()); fh << id << "\t" << psum->nucs[start].bp << "\t" << psum->nucs[end].bp << "\t" << psum->nucs[end].bp - psum->nucs[start].bp + 1 << "\t" << cur->loci.size() << "\t"; // // Bucket the SNP counts for plotting. // snps[cur->loci.size()]++; // // Bucket the haplotype block lengths for plotting. // dist = (psum->nucs[end].bp - psum->nucs[start].bp + 1) / 10000 * 10000; buckets[dist]++; enumerate_haplotypes(fh, pop_map, psum, start, end); id++; cur = cur->next; } while (cur != NULL); // // Write the bucketed distances. // fh << "\n\n" << "# Distribution of D' haplotype block lengths.\n"; map::iterator it; for (it = buckets.begin(); it != buckets.end(); it++) { fh << it->first << "\t" << it->second << "\n"; len_buckets[it->first] += it->second; } // // Write the bucketed SNP counts. // fh << "\n\n" << "# Distribution of SNP counts per D' haplotype block.\n"; for (it = snps.begin(); it != snps.end(); it++) { fh << it->first << "\t" << it->second << "\n"; snp_buckets[it->first] += it->second; } fh.close(); cerr << "done.\n"; return 0; } bool check_adjacent_blocks(PhasedSummary *psum, HBlock *block) { // // Create a list of all loci contained in the two blocks. // uint start = *(block->loci.begin()); uint end = *(block->next->loci.rbegin()); // // Check the D' measure between each pair in the proposed combined block. // double tot = 0.0; double strong_ld = 0.0; for (uint i = start; i <= end; i++) { if (psum->nucs[i].freq < minor_freq_lim) continue; for (uint j = i + 1; j <= end; j++) { if (psum->dprime[i][j].type == uninformative || psum->nucs[j].freq < minor_freq_lim) continue; tot++; if (psum->dprime[i][j].type == strong_ld) strong_ld++; } } // cerr << "Comparing range " << start << " to " << end // << "; total pairs: " << tot << "; strong LD: " << strong_ld // << "; proportion: " << std::setprecision(3) << strong_ld / tot << "\n"; if (strong_ld / tot >= min_inform_pairs) return true; return false; } HBlock * dPrimeBlocks::merge_adjacent(HBlock *a) { // // Merge two adjacent nodes. // HBlock *b = a->next; for (uint i = 0; i < b->loci.size(); i++) a->loci.push_back(b->loci[i]); a->next = b->next; delete b; return a; } HBlock * dPrimeBlocks::initialize(set &loci) { set::iterator it, prev_it; HBlock *cur, *next; this->_head = new HBlock; it = loci.begin(); this->_head->loci.push_back(*it); it++; // // // // Create a node from each locus and add to it all immediately adjacent loci. // // // do { // this->_head->loci.push_back(*it); // prev_it = it; // it++; // } while (it != loci.end() && (*prev_it) + 1 == *it); next = this->_head; // if (it == loci.end()) return this->_head; do { cur = new HBlock; cur->loci.push_back(*it); it++; // do { // cur->loci.push_back(*it); // prev_it = it; // it++; // } while (it != loci.end() && // (*prev_it) + 1 == *it); next->next = cur; next = next->next; } while (it != loci.end()); return this->_head; } int dPrimeBlocks::print() { HBlock *cur = this->_head; while (cur != NULL) { for (uint i = 0; i < cur->loci.size(); i++) { if (i > 0) cerr << ", "; cerr << cur->loci[i]; } cerr << "\n"; cur = cur->next; } return 0; } int enumerate_haplotypes(ofstream &fh, map &pop_map, PhasedSummary *psum, uint start, uint end) { map >::iterator it; map > haplotypes; map::iterator sit; string haplotype; set pops; // // Enumerate all haplotypes occurring in this block. // for (uint k = 0; k < psum->sample_cnt; k++) { for (uint n = start; n <= end; n++) if (psum->nucs[n].freq >= minor_freq_lim) haplotype += psum->samples[k].nucs_1[n]; pops.insert(pop_map[psum->samples[k].name]); if (haplotypes.count(haplotype) == 0) haplotypes[haplotype][pop_map[psum->samples[k].name]] = 1; else haplotypes[haplotype][pop_map[psum->samples[k].name]]++; haplotype.clear(); } for (uint k = 0; k < psum->sample_cnt; k++) { for (uint n = start; n <= end; n++) if (psum->nucs[n].freq >= minor_freq_lim) haplotype += psum->samples[k].nucs_2[n]; pops.insert(pop_map[psum->samples[k].name]); if (haplotypes.count(haplotype) == 0) haplotypes[haplotype][pop_map[psum->samples[k].name]] = 1; else haplotypes[haplotype][pop_map[psum->samples[k].name]]++; haplotype.clear(); } // // Write the haplotypes. // float tot = 0.0; fh << haplotypes.size() << "\t"; for (it = haplotypes.begin(); it != haplotypes.end(); it++) { // // Haplotypes are stored per population; sum them up here. // for (sit = it->second.begin(); sit != it->second.end(); sit++) tot += sit->second; if (it != haplotypes.begin()) fh << ","; fh << it->first << "|" << std::setprecision(3) << tot / ((float) psum->sample_cnt * 2.0); } fh << "\t"; set::iterator pit; stringstream pops_str; // // Write which populations this haplotype block occurs in. // if (pops.size() == 0) fh << "-1\t"; else for (pit = pops.begin(); pit != pops.end(); pit++) pops_str << *pit << ","; fh << pops_str.str().substr(0, pops_str.str().length()-1); pops_str.str(""); // // Write the frequency of occurence of each haplotype in each population. // for (it = haplotypes.begin(); it != haplotypes.end(); it++) { pops_str << "\t"; for (pit = pops.begin(); pit != pops.end(); pit++) pops_str << (it->second)[*pit] << "|" << std::setprecision(3) << (float) (it->second)[*pit] / (float) (pop_cnts[*pit] * 2.0) << ","; fh << pops_str.str().substr(0, pops_str.str().length()-1); pops_str.str(""); } fh << "\n"; return 0; } int calc_dprime(PhasedSummary *psum) { #pragma omp parallel { char allele_A, allele_a, allele_B, allele_b; double freq_A, freq_a, freq_B, freq_b; double freq_AB, freq_Ab, freq_aB, freq_ab; double D, min, var, chisq; double tot = psum->sample_cnt * 2.0; uint hap_cnt; #pragma omp for schedule(dynamic, 1) for (uint i = 0; i < psum->size; i++) { // // Assign nucleotides to allele A, and a. // assign_alleles(psum->nucs[i], allele_A, allele_a, freq_A, freq_a); for (uint j = i+1; j < psum->size; j++) { // // Assign nucleotides to allele B, and b. // assign_alleles(psum->nucs[j], allele_B, allele_b, freq_B, freq_b); freq_AB = 0.0; freq_Ab = 0.0; freq_aB = 0.0; freq_ab = 0.0; hap_cnt = 0; D = 0.0; // // Tally up haplotype frequencies. // for (uint k = 0; k < psum->sample_cnt; k++) { if (psum->samples[k].nucs_1[i] == allele_A && psum->samples[k].nucs_1[j] == allele_B) freq_AB++; else if (psum->samples[k].nucs_1[i] == allele_A && psum->samples[k].nucs_1[j] == allele_b) freq_Ab++; else if (psum->samples[k].nucs_1[i] == allele_a && psum->samples[k].nucs_1[j] == allele_B) freq_aB++; else if (psum->samples[k].nucs_1[i] == allele_a && psum->samples[k].nucs_1[j] == allele_b) freq_ab++; if (psum->samples[k].nucs_2[i] == allele_A && psum->samples[k].nucs_2[j] == allele_B) freq_AB++; else if (psum->samples[k].nucs_2[i] == allele_A && psum->samples[k].nucs_2[j] == allele_b) freq_Ab++; else if (psum->samples[k].nucs_2[i] == allele_a && psum->samples[k].nucs_2[j] == allele_B) freq_aB++; else if (psum->samples[k].nucs_2[i] == allele_a && psum->samples[k].nucs_2[j] == allele_b) freq_ab++; } freq_AB = freq_AB / tot; freq_Ab = freq_Ab / tot; freq_aB = freq_aB / tot; freq_ab = freq_ab / tot; // // Using the four-gamete test, check whether recombination has occurred // between these two loci. // Four-gamete test: if no recombination has occurred between any two loci (SNPs) there will // be three haplotypes present, if recombination has occurred there will be four haplotypes. // hap_cnt += freq_AB > 0 ? 1 : 0; hap_cnt += freq_Ab > 0 ? 1 : 0; hap_cnt += freq_aB > 0 ? 1 : 0; hap_cnt += freq_ab > 0 ? 1 : 0; if (hap_cnt == 3) psum->recomb[i][j] = false; else psum->recomb[i][j] = true; D = freq_AB - (freq_A * freq_B); // cerr << "D_AB: " << D << "; "; // D = freq_Ab - (freq_A * freq_b); // cerr << "D_Ab: " << D << "; "; // D = freq_aB - (freq_a * freq_B); // cerr << "D_aB: " << D << "; "; // D = freq_ab - (freq_a * freq_b); // cerr << "D_ab: " << D << "\n"; // cerr << " freq_AB: " << freq_AB << "; freq_Ab: " << freq_Ab << "; freq_aB: " << freq_aB << "; freq_ab: " << freq_ab << "\n"; if (D > 0) { min = (freq_A * freq_b) < (freq_a * freq_B) ? (freq_A * freq_b) : (freq_a * freq_B); psum->dprime[i][j].dprime = min == 0 ? 0.0 : D / min; } else { min = (freq_A * freq_B) < (freq_a * freq_b) ? (freq_A * freq_B) : (freq_a * freq_b); psum->dprime[i][j].dprime = min == 0 ? 0.0 :(-1 * D) / min; } // // Test D against a chi square distribution with 1 degree of freedom to show // whether these two loci have a D that is statistically significantly different from 0. // chisq = (tot * (D * D)) / (freq_A * freq_a * freq_B * freq_b); if (chisq >= chi_sq_limit) psum->dprime[i][j].chisq_p = true; // // Calculate variance and confidence limits. // if (psum->dprime[i][j].chisq_p) { var = (1.0 / tot) * ((freq_A * freq_a * freq_B * freq_b) + ((1 - (2 * freq_A)) * (1 - (2 * freq_B)) * D) - (D * D)); psum->dprime[i][j].var = var; psum->dprime[i][j].ci_high = psum->dprime[i][j].dprime + (1.96 * sqrt(var)); psum->dprime[i][j].ci_low = psum->dprime[i][j].dprime - (1.96 * sqrt(var)); } else { psum->dprime[i][j].ci_high = 0.0; psum->dprime[i][j].ci_low = 0.0; } } } } return 0; } int assign_alleles(NucSum nsum, char &p_allele, char &q_allele, double &p_freq, double &q_freq) { p_allele = 0; q_allele = 0; uint i = 0; float tot = 0; while (p_allele == 0 && i < 4) { if (nsum.nuc[i] > 0) { tot += nsum.nuc[i]; switch(i) { case 0: p_allele = 'A'; p_freq = nsum.nuc[0]; break; case 1: p_allele = 'C'; p_freq = nsum.nuc[1]; break; case 2: p_allele = 'G'; p_freq = nsum.nuc[2]; break; case 3: p_allele = 'T'; p_freq = nsum.nuc[3]; break; } } i++; } while (q_allele == 0 && i < 4) { if (nsum.nuc[i] > 0) { tot += nsum.nuc[i]; switch(i) { case 1: q_allele = 'C'; q_freq = nsum.nuc[1]; break; case 2: q_allele = 'G'; q_freq = nsum.nuc[2]; break; case 3: q_allele = 'T'; q_freq = nsum.nuc[3]; break; } } i++; } p_freq = p_freq / tot; q_freq = 1 - p_freq; return 0; } int write_dprime(string path, PhasedSummary *psum) { // // Write the D' data for plotting as a heatmap. // string file = path + ".dprime.tsv"; cerr << "Writing D' data to '" << file << "'..."; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening D' file '" << file << "'\n"; exit(1); } fh << "# Basepair 1\tBasepair 2\tD'\tCorrected D'\tVariance\tCI Low\tCI High\n"; double dprime = 0.0; for (uint i = 0; i < psum->size; i++) { for (uint j = i+1; j < psum->size; j++) { if (psum->nucs[i].freq < minor_freq_lim || psum->nucs[j].freq < minor_freq_lim) continue; dprime = psum->dprime[i][j].dprime; if (dprime_threshold) dprime = dprime >= dprime_threshold_level ? 1.0 : 0.0; if (write_zeros == false && (dprime == 0.0 || psum->dprime[i][j].chisq_p == false)) continue; fh << psum->nucs[i].bp << "\t" << psum->nucs[j].bp << "\t" << std::setprecision(3) << dprime << "\t" << std::setprecision(3) << (psum->dprime[i][j].chisq_p ? dprime : 0.0) << "\t" << psum->dprime[i][j].var << "\t" << psum->dprime[i][j].ci_low << "\t" << psum->dprime[i][j].ci_high << "\n"; } } fh.close(); cerr << "done.\n"; return 0; } int summarize_phased_genotypes(PhasedSummary *psum) { // // Construct a two dimensional array out of all the nucleotide arrays in the samples. // char **gtypes = new char *[psum->sample_cnt]; for (uint i = 0; i < psum->sample_cnt; i++) { gtypes[i] = psum->samples[i].nucs_1; } // // Sum up the occurences of each nucleotide. // for (uint i = 0; i < psum->size; i++) { for (uint j = 0; j < psum->sample_cnt; j++) { switch(gtypes[j][i]) { case 'A': psum->nucs[i].nuc[0]++; break; case 'C': psum->nucs[i].nuc[1]++; break; case 'G': psum->nucs[i].nuc[2]++; break; case 'T': psum->nucs[i].nuc[3]++; break; case 'N': default: break; } } } // // Repeat for the second set of phased genotypes. // for (uint i = 0; i < psum->sample_cnt; i++) { gtypes[i] = psum->samples[i].nucs_2; } // // Sum up the occurences of each nucleotide. // for (uint i = 0; i < psum->size; i++) { for (uint j = 0; j < psum->sample_cnt; j++) { switch(gtypes[j][i]) { case 'A': psum->nucs[i].nuc[0]++; break; case 'C': psum->nucs[i].nuc[1]++; break; case 'G': psum->nucs[i].nuc[2]++; break; case 'T': psum->nucs[i].nuc[3]++; break; case 'N': default: break; } } // // Calculate minor allele frequency. // float tot = (float) psum->sample_cnt * 2.0; float freq = 0.0; for (uint j = 0; j < 4; j++) { if (psum->nucs[i].nuc[j] > 0) { freq = (float) psum->nucs[i].nuc[j] / tot; psum->nucs[i].freq = freq < psum->nucs[i].freq ? freq : psum->nucs[i].freq; } } } delete [] gtypes; return 0; } // // Code to parse fastPhase format. // PhasedSummary * parse_fastphase(string path) { ifstream fh; char line[max_len]; string buf, filepath; const char *p, *q, *end; int i, sindex, pos; memset(line, '\0', max_len); // // Read in the original fastPhase export from Stacks to obtain the original base pair positions. // // // Open the file for reading // filepath = path + ".inp"; fh.open(filepath.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening input file '" << path << "'\n"; return NULL; } cerr << "Parsing " << filepath << "...\n"; int num_samples, num_genotypes; char bp[id_len]; // // Get the number of samples in the dataset. // fh.getline(line, max_len); num_samples = is_integer(line); if (num_samples < 0) { cerr << "Unable to find the number of samples, should be the first line.\n"; return NULL; } // // Get the number of genotypes in the dataset. // fh.getline(line, max_len); num_genotypes = is_integer(line); if (num_genotypes < 0) { cerr << "Unable to find the number of genotypes, should be the second line.\n"; return NULL; } PhasedSummary *psum = new PhasedSummary(num_samples, num_genotypes); // // Get the set of base pair positions. // buf.clear(); do { fh.clear(); fh.getline(line, max_len); buf += line; } while (fh.fail() && !fh.bad() && !fh.eof()); i = 0; p = buf.c_str(); end = p + buf.length(); if (*p != 'P') { cerr << "Unable to locate line of basepair positions, should be the third line.\n"; delete psum; return NULL; } for (p += 2, q = p; p < end; p++, q++) { while (*q != ' ' && q < end) { q++; } strncpy(bp, p, q - p); bp[q - p] = '\0'; pos = is_integer(bp); if (pos < 0) { cerr << "Unable to parse base pair positions.\n"; delete psum; return NULL; } else { psum->nucs[i].bp = (uint) pos; } i++; p = q; } fh.close(); // // Open the file for reading // filepath = path + "_hapguess_switch.out"; fh.open(filepath.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening input file '" << path << "'\n"; return NULL; } cerr << "Parsing " << filepath << "...\n"; // // Read from the "*_hapguess_switch.out" file until we hit the genotypes section // marked by the string "BEGIN GENOTYPES". // do { fh.getline(line, max_len); if (!fh.good()) { cerr << "Unable to find file section entitled 'BEGIN GENOTYPES'\n"; delete psum; return NULL; } } while (strcmp(line, "BEGIN GENOTYPES") != 0); // // Now read lines from the file in groups of three: // 1. Sample label // 2. Phased genotypes from chromosome 1 // 3. Phased genotypes from chromosome 2 // Stop reading individuals when we encounter the string, "END GENOTYPES". // fh.getline(line, max_len); do { // // Create a new Sample object and store the sample label. // sindex = psum->add_sample(line); // // Get the first set of phased genotypes. // buf.clear(); do { fh.clear(); fh.getline(line, max_len); buf += line; } while (fh.fail() && !fh.bad() && !fh.eof()); // // Count the number of genotypes on this line (they should be space deliniated). // i = 0; for (p = buf.c_str(); *p != '\0'; p++) if (*p != ' ') psum->samples[sindex].size++; // // Store the genotypes into our internal buffer. // psum->samples[sindex].nucs_1 = new char[psum->samples[sindex].size]; for (p = buf.c_str(); *p != '\0'; p++) { if (*p == ' ') continue; psum->samples[sindex].nucs_1[i] = *p; i++; } // len = strlen(line); // if (line[len - 1] == '\r') line[len - 1] = '\0'; // // Get the second set of phased genotypes. // buf.clear(); do { fh.clear(); fh.getline(line, max_len); buf += line; } while (fh.fail() && !fh.bad() && !fh.eof()); i = 0; psum->samples[sindex].nucs_2 = new char[psum->samples[sindex].size]; for (p = buf.c_str(); *p != '\0'; p++) { if (*p == ' ') continue; psum->samples[sindex].nucs_2[i] = *p; i++; } // // Get the sample label of the next record. // fh.getline(line, max_len); } while (strcmp(line, "END GENOTYPES") != 0 && fh.good()); fh.close(); return psum; } // // Code to parse Beagle format. // PhasedSummary * parse_beagle(map &catalog, string path) { gzFile gz_fh; char *line; string buf, filepath; const char *p, *q; uint len, line_len, i, sindex; bool eol; line_len = max_len; line = new char[line_len]; memset(line, '\0', line_len); // // Open the Beagle file for reading // filepath = path + ".phased.gz"; gz_fh = gzopen(filepath.c_str(), "rb"); if (!gz_fh) { cerr << "Failed to open gzipped file '" << filepath << "': " << strerror(errno) << ".\n"; return NULL; } cerr << "Parsing " << filepath << "...\n"; vector parts; uint num_samples = 0; uint num_genotypes = 0; char cat_loc_str[id_len], col_str[id_len]; // // Parse the file twice. On the first round: // 1. Determine the number of samples in the dataset (column count) // 2. Determine the number of markers (row count). // On the second round, parse the SNP genotypes. // // // Read each line in the file. If it starts with: // '#' it is a comment, skip the line. // 'I' it is the list of samples, parse them. // 'S' is the population ID for each SNP, skip this line. // 'M' is a marker, count the number of markers. // do { eol = false; buf.clear(); do { gzgets(gz_fh, line, line_len); buf += line; len = strlen(line); if (len > 0 && line[len - 1] == '\n') { eol = true; line[len - 1] = '\0'; } } while (!gzeof(gz_fh) && !eol); if (line_len < buf.length()) { // cerr << "Resizing line buffer from " << line_len << " to " << buf.length() << "\n"; delete [] line; line = new char[buf.length() + 1]; line_len = buf.length() + 1; memset(line, '\0', line_len); } if (buf[0] == 'M') { num_genotypes++; } else if (buf[0] == 'I') { // // Count the number of samples. // parse_ssv(buf.c_str(), parts); num_samples = (parts.size() - 2) / 2; } } while (!gzeof(gz_fh)); PhasedSummary *psum = new PhasedSummary(num_samples, num_genotypes); for (uint j = 2; j < parts.size(); j++) { if (j % 2 == 0) { sindex = psum->add_sample(parts[j]); psum->samples[sindex].size = num_genotypes; psum->samples[sindex].nucs_1 = new char[psum->samples[sindex].size]; psum->samples[sindex].nucs_2 = new char[psum->samples[sindex].size]; } } cerr << " Found " << num_samples << " samples; " << num_genotypes << " genotypes.\n"; gzrewind(gz_fh); uint marker_num = 0; memset(line, '\0', line_len); do { do { gzgets(gz_fh, line, line_len); } while (!gzeof(gz_fh) && line[0] != 'M'); len = strlen(line); if (len == 0) break; if (len > 0 && line[len - 1] == '\n') line[len - 1] = '\0'; parse_ssv(line, parts); // // Parse the catalog locus ID and the column number of the SNP: // e.g. LocId_column or 10329_37 // p = parts[1].c_str(); for (q = p + 1; *q != '_' && *q != '\0'; q++); strncpy(cat_loc_str, p, q - p); cat_loc_str[q-p] = '\0'; q++; strcpy(col_str, q); psum->nucs[marker_num].clocus = is_integer(cat_loc_str); psum->nucs[marker_num].col = is_integer(col_str); // // Store the genotypes into our internal buffer. // sindex = 0; i = 2; while (i < parts.size()) { p = parts[i].c_str(); psum->samples[sindex].nucs_1[marker_num] = *p; i++; p = parts[i].c_str(); psum->samples[sindex].nucs_2[marker_num] = *p; i++; sindex++; } marker_num++; } while (!gzeof(gz_fh)); gzclose(gz_fh); // // Use the catalog to look up the basepair positions for each catalog locus. // CSLocus *loc; for (i = 0; i < psum->size; i++) { loc = catalog[psum->nucs[i].clocus]; psum->nucs[i].bp = loc->sort_bp(psum->nucs[i].col); } return psum; } // // Code to parse Beagle format. // PhasedSummary * parse_beagle_haplotypes(map &catalog, string path) { gzFile gz_fh; char *line; string buf, filepath; const char *p; uint len, line_len, i, j, sindex; bool eol; line_len = max_len; line = new char[line_len]; memset(line, '\0', line_len); // // Open the Beagle file for reading // filepath = path + ".phased.gz"; gz_fh = gzopen(filepath.c_str(), "rb"); if (!gz_fh) { cerr << "Failed to open gzipped file '" << filepath << "': " << strerror(errno) << ".\n"; return NULL; } cerr << "Parsing " << filepath << "...\n"; vector parts, samples; uint num_samples = 0; uint num_genotypes = 0; uint cat_loc; // // Parse the file twice. On the first round: // 1. Determine the number of samples in the dataset (column count) // 2. Determine the number of markers (row count). // On the second round, parse the SNP genotypes. // // // Read each line in the file. If it starts with: // '#' it is a comment, skip the line. // 'I' it is the list of samples, parse them. // 'S' is the population ID for each SNP, skip this line. // 'M' is a marker, count the number of markers. // do { eol = false; buf.clear(); do { gzgets(gz_fh, line, line_len); buf += line; len = strlen(line); if (len > 0 && line[len - 1] == '\n') { eol = true; line[len - 1] = '\0'; } } while (!gzeof(gz_fh) && !eol); if (line_len < buf.length()) { // cerr << "Resizing line buffer from " << line_len << " to " << buf.length() << "\n"; delete [] line; line = new char[buf.length() + 1]; line_len = buf.length() + 1; memset(line, '\0', line_len); } if (buf[0] == 'M') { // // Count the number of genotypes by counting the number or nucleotides in each // haplotype for each marker. // parse_ssv(buf.c_str(), parts); num_genotypes += parts[2].length(); } else if (buf[0] == 'I') { // // Count the number of samples. // parse_ssv(buf.c_str(), samples); num_samples = (samples.size() - 2) / 2; } } while (!gzeof(gz_fh)); PhasedSummary *psum = new PhasedSummary(num_samples, num_genotypes); for (uint j = 2; j < samples.size(); j++) { if (j % 2 == 0) { sindex = psum->add_sample(samples[j]); psum->samples[sindex].size = num_genotypes; psum->samples[sindex].nucs_1 = new char[psum->samples[sindex].size]; psum->samples[sindex].nucs_2 = new char[psum->samples[sindex].size]; } } cerr << " Found " << num_samples << " samples; " << num_genotypes << " genotypes.\n"; gzrewind(gz_fh); CSLocus *loc; uint hap_len = 0; uint marker_num = 0; memset(line, '\0', line_len); do { do { gzgets(gz_fh, line, line_len); } while (!gzeof(gz_fh) && line[0] != 'M'); len = strlen(line); if (len == 0) break; if (len > 0 && line[len - 1] == '\n') line[len - 1] = '\0'; parse_ssv(line, parts); // // Use the catalog to look up the basepair positions for each catalog locus. // cat_loc = is_integer(parts[1].c_str()); loc = catalog[cat_loc]; hap_len = parts[2].length(); if (hap_len != loc->snps.size()) cerr << "Haplotypes don't match between catalog and beagle; Locus ID: " << loc->id << "; beagle hap len: " << hap_len << "; catalog hap len: " << loc->snps.size() << "\n"; for (j = 0, i = marker_num; i < marker_num + hap_len; i++, j++) { psum->nucs[i].clocus = cat_loc; psum->nucs[i].col = loc->snps[j]->col; psum->nucs[i].bp = loc->sort_bp(psum->nucs[i].col); } // // Store the genotypes into our internal buffer. // sindex = 0; i = 2; while (i < parts.size()) { p = parts[i].c_str(); for (j = marker_num; j < marker_num + hap_len; j++) { psum->samples[sindex].nucs_1[j] = *p; p++; } i++; p = parts[i].c_str(); for (j = marker_num; j < marker_num + hap_len; j++) { psum->samples[sindex].nucs_2[j] = *p; p++; } i++; sindex++; } marker_num += hap_len; } while (!gzeof(gz_fh)); gzclose(gz_fh); return psum; } int parse_population_map(string popmap_path, map &pop_map, map &pop_cnts) { char line[max_len]; char pop_id_str[id_len]; vector parts; uint len; if (pmap_path.length() == 0) return 0; cerr << "Parsing population map.\n"; ifstream fh(popmap_path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening population map '" << popmap_path << "'\n"; return 0; } while (fh.good()) { fh.getline(line, max_len); len = strlen(line); if (len == 0) continue; // // Check that there is no carraige return in the buffer. // if (line[len - 1] == '\r') line[len - 1] = '\0'; // // Ignore comments // if (line[0] == '#') continue; // // Parse the population map, we expect: // // parse_tsv(line, parts); if (parts.size() != 2) { cerr << "Population map is not formated correctly: expecting two, tab separated columns, found " << parts.size() << ".\n"; return 0; } strncpy(pop_id_str, parts[1].c_str(), id_len); for (int i = 0; i < id_len && pop_id_str[i] != '\0'; i++) if (!isdigit(pop_id_str[i])) { cerr << "Population map is not formated correctly: expecting numerical ID in second column, found '" << parts[1] << "'.\n"; return 0; } // // Add the sample name to population number mapping. // pop_map[parts[0]] = atoi(parts[1].c_str()); if (pop_cnts.count(atoi(parts[1].c_str())) == 0) pop_cnts[atoi(parts[1].c_str())] = 1; else pop_cnts[atoi(parts[1].c_str())]++; } fh.close(); return 0; } int build_file_list(vector > &files) { vector parts; string pattern; // // Read all the files from the Stacks directory. // uint pos; string file; struct dirent *direntry; DIR *dir = opendir(in_path.c_str()); if (dir == NULL) { cerr << "Unable to open directory '" << in_path << "' for reading.\n"; exit(1); } switch(in_file_type) { case FileT::beagle: pattern = ".phased.gz"; break; case FileT::fastphase: default: pattern = "_hapguess_switch.out"; break; } while ((direntry = readdir(dir)) != NULL) { file = direntry->d_name; if (file == "." || file == "..") continue; pos = file.rfind(pattern); if (pos < file.length()) files.push_back(make_pair(1, file.substr(0, pos))); } closedir(dir); if (files.size() == 0) { cerr << "Unable to locate any input files to process within '" << in_path << "'\n"; return 0; } return 1; } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"haplotypes", no_argument, NULL, 'H'}, {"skip_zeros", no_argument, NULL, 'Z'}, {"infile_type", required_argument, NULL, 't'}, {"num_threads", required_argument, NULL, 'p'}, {"in_path", required_argument, NULL, 'P'}, {"cat_path", required_argument, NULL, 'S'}, {"pop_map", required_argument, NULL, 'M'}, {"batch_id", required_argument, NULL, 'b'}, {"dprime_bin_size", required_argument, NULL, 'B'}, {"minor_allele_freq", required_argument, NULL, 'a'}, {"min_inform_pairs", required_argument, NULL, 'm'}, {"dprime_threshold", required_argument, NULL, 'T'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvZHAb:M:t:P:S:p:a:B:T:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'b': batch_id = is_integer(optarg); if (batch_id < 0) { cerr << "Batch ID (-b) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 'p': num_threads = atoi(optarg); break; case 'a': minor_freq_lim = atof(optarg); break; case 'm': min_inform_pairs = atof(optarg); break; case 'P': in_path = optarg; break; case 'S': cat_path = optarg; break; case 't': if (strcasecmp(optarg, "phase") == 0) in_file_type = FileT::phase; else if (strcasecmp(optarg, "fastphase") == 0) in_file_type = FileT::fastphase; else if (strcasecmp(optarg, "beagle") == 0) in_file_type = FileT::beagle; else in_file_type = FileT::unknown; break; case 'M': pmap_path = optarg; break; case 'H': haplotypes = true; break; case 'Z': write_zeros = false; break; case 'B': bucket_dist = atoi(optarg); break; case 'T': dprime_threshold = true; dprime_threshold_level = atof(optarg); break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: help(); abort(); } } if (in_path.length() == 0) { cerr << "You must specify a path to the directory containing Stacks output files.\n"; help(); } if (in_path.at(in_path.length() - 1) != '/') in_path += "/"; if (minor_freq_lim > 0) { if (minor_freq_lim > 1) minor_freq_lim = minor_freq_lim / 100; if (minor_freq_lim > 0.5) { cerr << "Unable to parse the minor allele frequency\n"; help(); } } if (min_inform_pairs > 0) { if (min_inform_pairs > 1) min_inform_pairs = min_inform_pairs / 100; } return 0; } void version() { std::cerr << "phasedstacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "phasedstacks " << VERSION << "\n" << "phasedstacks -b id -S path -P path -t file_type [-p threads] [-M popmap] [-v] [-h]" << "\n" << " b: Stacks batch ID.\n" << " P: path to the phased output files.\n" << " S: path to the Stacks output files.\n" << " t: input file type. Supported types: fastphase, and beagle.\n" << " p: number of processes to run in parallel sections of code.\n" << " M: path to the population map, a tab separated file describing which individuals belong in which population.\n" << " v: print program version." << "\n" << " h: display this help messsage." << "\n" << " --haplotypes: data were phased as RAD locus haplotypes.\n" << " --dprime_bin_size: size of buckets for binning SNPs at a particular distance to calculate the mean D' value.\n" << " --dprime_threshold : if D' values fall above , set the D' to 1, otherwise set D' to 0.\n\n" << " Filtering options:\n" << " --skip_zeros: do not include D' values of zero in the D' output.\n" << " --minor_allele_freq: specify a minimum minor allele frequency required to process a nucleotide site (0 < a < 0.5).\n" << " --min_inform_pairs: when building D' haplotype blocks, the minimum number of informative D' measures to combine two blocks (default 0.9).\n\n"; exit(0); } stacks-1.35/src/phasedstacks.h000644 000765 000024 00000013033 12441417455 017126 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __PHASEDSTACKS_H__ #define __PHASEDSTACKS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::stringstream; #include #include using std::vector; #include using std::map; #include using std::set; #include "constants.h" #include "utils.h" #include "log_utils.h" #include "catalog_utils.h" #include "input.h" #include "sql_utilities.h" #include "locus.h" #ifdef HAVE_LIBZ #include #include #endif enum loc_t {strong_ld, recomb, uninformative}; class Sample { public: string name; int id; int size; char *nucs_1; char *nucs_2; Sample() { this->id = 0; this->size = 0; this->nucs_1 = NULL; this->nucs_2 = NULL; } ~Sample() { if (this->nucs_1 != NULL) delete [] this->nucs_1; if (this->nucs_2 != NULL) delete [] this->nucs_2; } }; class NucSum { public: uint bp; uint col; uint clocus; float freq; uint nuc[4]; // nuc[0] == A // nuc[1] == C // nuc[2] == G // nuc[3] == T NucSum() { this->freq = 1.0; this->bp = 0; this->clocus = 0; for (uint i = 0; i < 4; i++) this->nuc[i] = 0; } }; class dPrime { public: double dprime; bool chisq_p; double var; double ci_high; double ci_low; bool informative; loc_t type; dPrime() { this->dprime = 0.0; this->chisq_p = false; this->var = 0.0; this->ci_high = 0.0; this->ci_low = 0.0; this->type = uninformative; } }; class PhasedSummary { map sample_map; public: uint size; uint sample_cnt; NucSum *nucs; Sample *samples; dPrime **dprime; bool **recomb; PhasedSummary(uint num_samples, uint num_genotypes) { this->sample_cnt = num_samples; this->samples = new Sample[this->sample_cnt]; this->size = num_genotypes; this->nucs = new NucSum[this->size]; this->dprime = new dPrime *[this->size]; for (uint i = 0; i < this->size; i++) this->dprime[i] = new dPrime[this->size]; this->recomb = new bool *[this->size]; for (uint i = 0; i < this->size; i++) { this->recomb[i] = new bool[this->size]; memset(this->recomb[i], 0, this->size); } } ~PhasedSummary() { if (this->nucs != NULL) delete [] this->nucs; if (this->dprime != NULL) { for (uint i = 0; i < this->size; i++) delete [] this->dprime[i]; delete [] this->dprime; } if (this->recomb != NULL) { for (uint i = 0; i < this->size; i++) delete [] this->recomb[i]; delete [] this->recomb; } if (this->samples != NULL) delete [] this->samples; } int add_sample(string name) { uint i = this->sample_map.size(); this->sample_map[name] = i; this->samples[i].name = name; return i; } }; class HBlock { public: vector loci; HBlock *next; HBlock() { this->next = NULL; } }; class dPrimeBlocks { HBlock *_head; public: dPrimeBlocks() { this->_head = NULL; } ~dPrimeBlocks() { HBlock *cur, *next; cur = this->_head; next = cur->next; while (next != NULL) { delete cur; cur = next; next = cur->next; } } HBlock *head() { return this->_head; } HBlock *initialize(set &); HBlock *merge_adjacent(HBlock *); int print(); }; void help( void ); void version( void ); int parse_command_line(int, char**); int build_file_list(vector > &); int parse_population_map(string, map &, map &); PhasedSummary *parse_fastphase(string); PhasedSummary *parse_beagle(map &, string); PhasedSummary *parse_beagle_haplotypes(map &, string); int summarize_phased_genotypes(PhasedSummary *); int calc_dprime(PhasedSummary *); int assign_alleles(NucSum, char &, char &, double &, double &); int write_dprime(string, PhasedSummary *); int four_gamete_test(string, map &, PhasedSummary *, map &, map &); int dprime_blocks(string, map &, PhasedSummary *, map &, map &); bool check_adjacent_blocks(PhasedSummary *, HBlock *); int enumerate_haplotypes(ofstream &, map &, PhasedSummary *, uint, uint); int bucket_dprime(vector &, vector &, PhasedSummary *); int write_buckets(string, vector &, vector &); #endif // __PHASEDSTACKS_H__ stacks-1.35/src/PopMap.h000644 000765 000024 00000024232 12533677757 015667 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __POPMAP_H__ #define __POPMAP_H__ #include "stacks.h" #include "locus.h" #include #include using std::string; #include using std::vector; #include using std::map; #include using std::set; #include #include using std::pair; using std::make_pair; class Datum { public: int id; // Stack ID int merge_partner; // Stack ID of merged datum, if this datum was merged/phased from two, overlapping datums. int len; // Length of locus int tot_depth; // Stack depth vector depth; // Stack depth of each matching allele bool corrected; // Has this genotype call been corrected char *model; // String representing SNP model output for each nucleotide at this locus. char *gtype; // Genotype char *trans_gtype; // Translated Genotype double lnl; // Log likelihood of this locus. vector obshap; // Observed Haplotypes vector snps; Datum() { corrected = false; gtype = NULL; trans_gtype = NULL; model = NULL; tot_depth = 0; len = 0; lnl = 0.0; merge_partner = 0; } ~Datum() { for (uint i = 0; i < this->obshap.size(); i++) delete [] this->obshap[i]; for (uint i = 0; i < this->snps.size(); i++) delete this->snps[i]; delete [] this->gtype; delete [] this->trans_gtype; delete [] this->model; } }; template class PopMap { set > blacklist; int num_loci; int num_samples; Datum ***data; map locus_order; // LocusID => ArrayIndex; map catalog IDs to their first dimension // position in the Datum array. map rev_locus_order; map sample_order; // SampleID => ArrayIndex; map defining at what position in // the second dimension of the datum array each sample is stored. map rev_sample_order; public: map > ordered_loci; // Loci ordered by genomic position PopMap(int, int); ~PopMap(); int populate(vector &, map &, vector > &); int order_loci(map &); int prune(set &); int loci_cnt() { return this->num_loci; } int rev_locus_index(int index) { if (this->rev_locus_order.count(index) == 0) return -1; return this->rev_locus_order[index]; } int sample_cnt() { return this->num_samples; } int sample_index(int index) { if (this->sample_order.count(index) == 0) return -1; return this->sample_order[index]; } int rev_sample_index(int index) { if (this->rev_sample_order.count(index) == 0) return -1; return this->rev_sample_order[index]; } Datum **locus(int); Datum *datum(int, int); bool blacklisted(int, int); }; template PopMap::PopMap(int num_samples, int num_loci) { this->data = new Datum **[num_loci]; for (int i = 0; i < num_loci; i++) { this->data[i] = new Datum *[num_samples]; for (int j = 0; j < num_samples; j++) this->data[i][j] = NULL; } this->num_samples = num_samples; this->num_loci = num_loci; } template PopMap::~PopMap() { for (int i = 0; i < this->num_loci; i++) { for (int j = 0; j < this->num_samples; j++) delete this->data[i][j]; delete [] this->data[i]; } delete [] this->data; } template int PopMap::populate(vector &sample_ids, map &catalog, vector > &matches) { // // Record the array position of each sample that we will load. // for (uint i = 0; i < sample_ids.size(); i++) { this->sample_order[sample_ids[i]] = i; this->rev_sample_order[i] = sample_ids[i]; } // // Create an index showing what position each catalog locus is stored at in the datum // array. Create a second index allowing ordering of Loci by genomic position. // typename std::map::iterator it; uint i = 0; for (it = catalog.begin(); it != catalog.end(); it++) { this->locus_order[it->first] = i; this->rev_locus_order[i] = it->first; i++; } // // Sort the catalog loci on each chromosome according to base pair. // this->order_loci(catalog); // // Populate the datum array // Datum *d; int locus, sample; for (i = 0; i < matches.size(); i++) { for (uint j = 0; j < matches[i].size(); j++) { sample = this->sample_order[matches[i][j]->sample_id]; if (this->locus_order.count(matches[i][j]->cat_id) == 0) continue; locus = this->locus_order[matches[i][j]->cat_id]; // cerr << "Translating sample id: " << matches[i][j]->sample_id << " to index " << sample << "\n"; // cerr << "Translating locus id: " << matches[i][j]->cat_id << " to index " << locus << "\n"; if (this->data[locus][sample] == NULL) { if (this->blacklist.count(make_pair(matches[i][j]->sample_id, matches[i][j]->cat_id)) == 0) { // cerr << "Creating new datum for tag ID: " << matches[i][j]->tag_id << "\n"; d = new Datum; d->id = matches[i][j]->tag_id; char *h = new char[strlen(matches[i][j]->haplotype) + 1]; strcpy(h, matches[i][j]->haplotype); d->obshap.push_back(h); d->depth.push_back(matches[i][j]->depth); d->tot_depth += matches[i][j]->depth; d->lnl = matches[i][j]->lnl; this->data[locus][sample] = d; catalog[matches[i][j]->cat_id]->hcnt++; catalog[matches[i][j]->cat_id]->cnt++; } } else { // cerr << " Adding haplotype to existing datum: sample: " << matches[i][j]->sample_id << ". tag: " << matches[i][j]->tag_id << "\n"; // // Check that the IDs of the two matches are the same. If not, then two tags // match this locus and the locus is invalid, set back to NULL. // if (matches[i][j]->tag_id == this->data[locus][sample]->id) { char *h = new char[strlen(matches[i][j]->haplotype) + 1]; strcpy(h, matches[i][j]->haplotype); this->data[locus][sample]->obshap.push_back(h); this->data[locus][sample]->depth.push_back(matches[i][j]->depth); this->data[locus][sample]->tot_depth += matches[i][j]->depth; this->data[locus][sample]->lnl = matches[i][j]->lnl; } else { //cerr << " Deleting sample, multiple tag matches\n"; delete this->data[locus][sample]; this->data[locus][sample] = NULL; this->blacklist.insert(make_pair(matches[i][j]->sample_id, matches[i][j]->cat_id)); catalog[matches[i][j]->cat_id]->hcnt--; catalog[matches[i][j]->cat_id]->confounded_cnt++; } } } } return 0; } template int PopMap::order_loci(map &catalog) { this->ordered_loci.clear(); typename std::map::iterator it; for (it = catalog.begin(); it != catalog.end(); it++) { if (strlen(it->second->loc.chr) > 0) this->ordered_loci[it->second->loc.chr].push_back(it->second); } // // Sort the catalog loci on each chromosome according to base pair. // typename map >::iterator cit; for (cit = this->ordered_loci.begin(); cit != this->ordered_loci.end(); cit++) sort(cit->second.begin(), cit->second.end(), bp_compare); return 0; } template int PopMap::prune(set &remove_ids) { uint new_size = this->num_loci - remove_ids.size(); uint loc_id; map new_loc_order, new_rev_loc_order; Datum ***d = new Datum **[new_size]; int j = 0; for (int i = 0; i < this->num_loci; i++) { loc_id = this->rev_locus_order[i]; // // Keep this locus. // if (remove_ids.count(loc_id) == 0) { d[j] = this->data[i]; new_loc_order[loc_id] = j; new_rev_loc_order[j] = loc_id; j++; } else { // // Remove this locus. // for (int k = 0; k < this->num_samples; k++) delete this->data[i][k]; delete [] this->data[i]; } } delete [] this->data; this->data = d; this->locus_order.clear(); this->locus_order = new_loc_order; this->rev_locus_order.clear(); this->rev_locus_order = new_rev_loc_order; this->num_loci = new_size; // // Re-sort the catalog loci on each chromosome according to base pair. // map > new_ordered_loci; typename map >::iterator cit; for (cit = this->ordered_loci.begin(); cit != this->ordered_loci.end(); cit++) { for (uint k = 0; k < cit->second.size(); k++) { if (remove_ids.count(cit->second[k]->id) == 0) new_ordered_loci[cit->first].push_back(cit->second[k]); } } this->ordered_loci.clear(); this->ordered_loci = new_ordered_loci; for (cit = this->ordered_loci.begin(); cit != this->ordered_loci.end(); cit++) sort(cit->second.begin(), cit->second.end(), bp_compare); return new_size; } template Datum **PopMap::locus(int locus) { return this->data[this->locus_order[locus]]; } template Datum *PopMap::datum(int locus, int sample) { return this->data[this->locus_order[locus]][this->sample_order[sample]]; } template bool PopMap::blacklisted(int locus, int sample) { if (this->blacklist.count(make_pair(sample, locus)) > 0) return true; else return false; } #endif // __POPMAP_H__ stacks-1.35/src/PopSum.h000644 000765 000024 00000107233 12571641525 015703 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2012, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __POPSUM_H__ #define __POPSUM_H__ #include #include using std::string; #include using std::vector; #include using std::map; #include using std::set; #include #include using std::pair; using std::make_pair; #include #include #include "stacks.h" extern bool log_fst_comp; extern double minor_allele_freq; extern map pop_key; const uint PopStatSize = 5; class PopStat { public: int loc_id; int bp; bool fixed; double alleles; // Number of alleles sampled at this location. uint snp_cnt; // Number of SNPs in kernel-smoothed window centered on this SNP. double stat[PopStatSize]; double smoothed[PopStatSize]; double bs[PopStatSize]; PopStat() { this->loc_id = 0; this->bp = 0; this->fixed = false; this->alleles = 0.0; this->snp_cnt = 0; for (uint i = 0; i < PopStatSize; i++) { this->stat[i] = 0.0; this->smoothed[i] = 0.0; this->bs[i] = 0.0; } } virtual ~PopStat() { } }; class HapStat: public PopStat { // PopStat[0]: Phi_st // PopStat[1]: Phi_ct // PopStat[2]: Phi_sc // PopStat[3]: Fst' // PopStat[4]: D_est public: double *comp; uint popcnt; HapStat(): PopStat() { comp = NULL; } ~HapStat() { if (this->comp != NULL) delete [] comp; } }; class LocStat: public PopStat { // PopStat[0]: gene diversity // PopStat[1]: haplotype diversity (Pi) public: uint hap_cnt; // Number of unique haplotypes at this locus. string hap_str; // Human-readable string of haplotype counts. LocStat(): PopStat() { this->hap_cnt = 0; } ~LocStat() {}; }; class PopPair: public PopStat { // PopStat[0]: corrected Fst, (by p-value or Bonferroni p-value). // PopStat[1]: corrected AMOVA Fst public: int col; double pi; double fst; double fet_p; // Fisher's Exact Test p-value. double fet_or; // Fisher's exact test odds ratio. double or_se; // Fisher's exact test odds ratio standard error. double lod; // base 10 logarithm of odds score. double ci_low; // Fisher's exact test lower confidence interval. double ci_high; // Fisher's exact test higher confidence interval. double amova_fst; // AMOVA Fst method, from Weir, Genetic Data Analysis II . double *comp; PopPair() { col = 0; pi = 0.0; fst = 0.0; fet_p = 0.0; fet_or = 0.0; or_se = 0.0; lod = 0.0; ci_low = 0.0; ci_high = 0.0; amova_fst = 0.0; comp = NULL; } ~PopPair() { if (this->comp != NULL) delete [] comp; } }; class SumStat: public PopStat { // PopStat[0]: pi // PopStat[1]: fis public: bool incompatible_site; bool filtered_site; double num_indv; char p_nuc; char q_nuc; double p; double obs_het; double obs_hom; double exp_het; double exp_hom; double π SumStat(): PopStat(), pi(this->stat[0]) { num_indv = 0.0; p = 0.0; p_nuc = 0; q_nuc = 0; obs_het = 0.0; obs_hom = 0.0; exp_het = 0.0; exp_hom = 0.0; snp_cnt = 0; incompatible_site = false; filtered_site = false; } }; class LocSum { public: SumStat *nucs; // Array containing summary statistics for // each nucleotide position at this locus. LocSum(int len) { this->nucs = new SumStat[len]; } ~LocSum() { delete [] this->nucs; } }; class NucTally { public: int loc_id; int bp; uint16_t col; uint16_t num_indv; uint16_t pop_cnt; uint16_t allele_cnt; char p_allele; char q_allele; double p_freq; double obs_het; bool fixed; int priv_allele; NucTally() { loc_id = 0; bp = 0; col = 0; num_indv = 0; pop_cnt = 0; allele_cnt = 0; p_allele = 0; q_allele = 0; p_freq = 0.0; obs_het = 0.0; priv_allele = -1; fixed = true; } }; class LocTally { public: NucTally *nucs; LocTally(int len) { this->nucs = new NucTally[len]; } ~LocTally() { delete [] this->nucs; } }; // // Population Summary class contains a two dimensional array storing the // summary statistics for each locus in each of a set of populations: // // Pop1 Pop2 Pop3 // Locus1 +-LocSum----+-LocSum----+-LocSum // | | | | // | +-SumStat +-SumStat +-SumStat (Nuc0) // | +-SumStat +-SumStat +-SumStat (Nuc1) // | ... // Locus2 +-LocSum----+-LocSum----+-LocSum // | | | | // | +-SumStat +-SumStat +-SumStat (Nuc0) // | +-SumStat +-SumStat +-SumStat (Nuc1) // | ... // Locus3 +-LocSum----+-LocSum----+-LocSum // | | | | // | +-SumStat +-SumStat +-SumStat (Nuc0) // | +-SumStat +-SumStat +-SumStat (Nuc1) // | ... // ... // template class PopSum { int num_loci; int num_pops; LocSum ***data; LocTally **loc_tally; map locus_order; // LocusID => ArrayIndex; map catalog IDs to their first dimension // position in the LocSum array. map rev_locus_order; map pop_order; // PopulationID => ArrayIndex; map defining at what position in // the second dimension of the LocSum array each population is stored. map rev_pop_order; map pop_sizes; // The maximum size of each separate population. public: PopSum(int, int); ~PopSum(); int initialize(PopMap *); int add_population(map &, PopMap *, uint, uint, uint, bool, ofstream &); int tally(map &); int loci_cnt() { return this->num_loci; } int rev_locus_index(int index) { return this->rev_locus_order[index]; } int pop_cnt() { return this->num_pops; } int pop_index(int index) { return this->pop_order[index]; } int rev_pop_index(int index) { return this->rev_pop_order[index]; } int pop_size(int pop_id) { return this->pop_sizes[pop_id]; } LocSum **locus(int); LocSum *pop(int, int); LocTally *locus_tally(int); PopPair *Fst(int, int, int, int); int fishers_exact_test(PopPair *, double, double, double, double); private: int tally_heterozygous_pos(LocusT *, Datum **, LocSum *, int, int, uint, uint); int tally_fixed_pos(LocusT *, Datum **, LocSum *, int, uint, uint); int tally_ref_alleles(LocSum **, int, short unsigned int &, char &, char &, short unsigned int &, short unsigned int &); int tally_observed_haplotypes(vector &, int); double pi(double, double, double); double binomial_coeff(double, double); }; template PopSum::PopSum(int num_loci, int num_populations) { this->loc_tally = new LocTally *[num_loci]; this->data = new LocSum **[num_loci]; for (int i = 0; i < num_loci; i++) { this->data[i] = new LocSum *[num_populations]; for (int j = 0; j < num_populations; j++) this->data[i][j] = NULL; } this->num_pops = num_populations; this->num_loci = num_loci; } template PopSum::~PopSum() { for (int i = 0; i < this->num_loci; i++) { for (int j = 0; j < this->num_pops; j++) delete this->data[i][j]; delete [] this->data[i]; delete this->loc_tally[i]; } delete [] this->data; delete [] this->loc_tally; } template int PopSum::initialize(PopMap *pmap) { int locus_id; for (int i = 0; i < this->num_loci; i++) { locus_id = pmap->rev_locus_index(i); this->locus_order[locus_id] = i; this->rev_locus_order[i] = locus_id; } return 0; } template int PopSum::add_population(map &catalog, PopMap *pmap, uint population_id, uint start_index, uint end_index, bool verbose, ofstream &log_fh) { LocusT *loc; Datum **d; LocSum **s; uint locus_id, len; int res; set snp_cols; int incompatible_loci = 0; if (verbose) log_fh << "\n#\n# Recording sites that have incompatible loci -- loci with too many alleles present.\n" << "#\n" << "# Level\tAction\tLocus ID\tChr\tBP\tColumn\tPopID\n#\n"; // // Determine the index for this population // uint pop_index = this->pop_order.size() == 0 ? 0 : this->pop_order.size(); this->pop_order[population_id] = pop_index; this->rev_pop_order[pop_index] = population_id; // // Record the maximal size of this population. // this->pop_sizes[population_id] = end_index - start_index + 1; for (int i = 0; i < this->num_loci; i++) { locus_id = pmap->rev_locus_index(i); d = pmap->locus(locus_id); s = this->locus(locus_id); loc = catalog[locus_id]; // // Create an array of SumStat objects // len = strlen(loc->con); s[pop_index] = new LocSum(len); // // Check if this locus has already been filtered and is NULL in all individuals. // bool filtered = true; for (uint k = start_index; k <= end_index; k++) { if (d[k] != NULL) filtered = false; } if (filtered == true) { for (uint k = 0; k < len; k++) { s[pop_index]->nucs[k].filtered_site = true; } continue; } // // The catalog records which nucleotides are heterozygous. For these nucleotides we will // calculate observed genotype frequencies, allele frequencies, and expected genotype frequencies. // for (uint k = 0; k < loc->snps.size(); k++) { res = this->tally_heterozygous_pos(loc, d, s[pop_index], loc->snps[k]->col, k, start_index, end_index); // // If site is incompatible (too many alleles present), log it. // if (res < 0) { s[pop_index]->nucs[loc->snps[k]->col].incompatible_site = true; incompatible_loci++; if (verbose) log_fh << "within_population\t" << "incompatible_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp(loc->snps[k]->col) << "\t" << loc->snps[k]->col << "\t" << pop_key[population_id] << "\n"; } snp_cols.insert(loc->snps[k]->col); } // // For all other fixed sites, we just need to record them. // for (uint k = 0; k < len; k++) { if (snp_cols.count(k)) continue; this->tally_fixed_pos(loc, d, s[pop_index], k, start_index, end_index); } snp_cols.clear(); } cerr << "Population '" << pop_key[population_id] << "' contained " << incompatible_loci << " incompatible loci -- more than two alleles present.\n"; log_fh << "Population " << population_id << " contained " << incompatible_loci << " incompatible loci -- more than two alleles present.\n"; return 0; } template int PopSum::tally(map &catalog) { LocusT *loc; LocSum **s; LocTally *ltally; int locus_id, variable_pop; uint16_t p_cnt, q_cnt, len, col; for (int n = 0; n < this->num_loci; n++) { locus_id = this->rev_locus_index(n); loc = catalog[locus_id]; s = this->locus(locus_id); len = strlen(loc->con); ltally = new LocTally(len); this->loc_tally[n] = ltally; // for (uint i = 0; i < loc->snps.size(); i++) { // uint col = loc->snps[i]->col; for (col = 0; col < len; col++) { ltally->nucs[col].col = col; ltally->nucs[col].bp = loc->sort_bp(col); ltally->nucs[col].loc_id = locus_id; this->tally_ref_alleles(s, col, ltally->nucs[col].allele_cnt, ltally->nucs[col].p_allele, ltally->nucs[col].q_allele, p_cnt, q_cnt); // // Is this site variable? // if (ltally->nucs[col].allele_cnt > 1) ltally->nucs[col].fixed = false; for (int j = 0; j < this->num_pops; j++) { // // Sum the number of individuals examined at this locus across populations. // ltally->nucs[col].num_indv += s[j]->nucs[col].num_indv; ltally->nucs[col].pop_cnt += s[j]->nucs[col].num_indv > 0 ? 1 : 0; } for (int j = 0; j < this->num_pops; j++) { // // Sum the most frequent allele across populations. // if (s[j]->nucs[col].p_nuc == ltally->nucs[col].p_allele) ltally->nucs[col].p_freq += s[j]->nucs[col].p * (s[j]->nucs[col].num_indv / (double) ltally->nucs[col].num_indv); else ltally->nucs[col].p_freq += (1 - s[j]->nucs[col].p) * (s[j]->nucs[col].num_indv / (double) ltally->nucs[col].num_indv); // // Sum observed heterozygosity across populations. // ltally->nucs[col].obs_het += s[j]->nucs[col].obs_het * (s[j]->nucs[col].num_indv / (double) ltally->nucs[col].num_indv); } // // We want to report the most frequent allele as the P allele. Reorder the alleles // if necessary. // if (ltally->nucs[col].p_freq < 0.5) { char a = ltally->nucs[col].p_allele; ltally->nucs[col].p_allele = ltally->nucs[col].q_allele; ltally->nucs[col].q_allele = a; ltally->nucs[col].p_freq = 1 - ltally->nucs[col].p_freq; uint b = p_cnt; p_cnt = q_cnt; q_cnt = b; } // // Check if this is a private allele. Either the site is variable and // the allele exists in one population, or the site is fixed and one // population is homozygous for the private allele. // variable_pop = -1; if (p_cnt == 1 && q_cnt > 1) { for (int j = 0; j < this->num_pops; j++) if (s[j]->nucs[col].p_nuc == ltally->nucs[col].p_allele || s[j]->nucs[col].q_nuc == ltally->nucs[col].p_allele) variable_pop = j; } else if (p_cnt > 1 && q_cnt == 1) { for (int j = 0; j < this->num_pops; j++) if (s[j]->nucs[col].p_nuc == ltally->nucs[col].q_allele || s[j]->nucs[col].q_nuc == ltally->nucs[col].q_allele) variable_pop = j; } ltally->nucs[col].priv_allele = variable_pop; } } return 0; } template int PopSum::tally_ref_alleles(LocSum **s, int snp_index, short unsigned int &allele_cnt, char &p_allele, char &q_allele, short unsigned int &p_cnt, short unsigned int &q_cnt) { int nucs[4] = {0}; char nuc[2]; p_allele = 0; q_allele = 0; allele_cnt = 0; for (int j = 0; j < this->num_pops; j++) { nuc[0] = 0; nuc[1] = 0; nuc[0] = s[j]->nucs[snp_index].p_nuc; nuc[1] = s[j]->nucs[snp_index].q_nuc; for (uint k = 0; k < 2; k++) switch(nuc[k]) { case 'A': case 'a': nucs[0]++; break; case 'C': case 'c': nucs[1]++; break; case 'G': case 'g': nucs[2]++; break; case 'T': case 't': nucs[3]++; break; } } // // Determine how many alleles are present at this position in this population. // We cannot deal with more than two alternative alleles, if there are more than two // in a single population, print a warning and exclude this nucleotide position. // int i; for (i = 0; i < 4; i++) if (nucs[i] > 0) allele_cnt++; if (allele_cnt > 2) { p_allele = 0; q_allele = 0; return 0; } // // Record which nucleotide is the P allele and which is the Q allele. // i = 0; while (p_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 0: p_allele = 'A'; break; case 1: p_allele = 'C'; break; case 2: p_allele = 'G'; break; case 3: p_allele = 'T'; break; } } i++; } while (q_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 1: q_allele = 'C'; break; case 2: q_allele = 'G'; break; case 3: q_allele = 'T'; break; } } i++; } // // Tabulate the number of populations the p_allele and the q_allele occur in. // p_cnt = 0; q_cnt = 0; for (int j = 0; j < this->num_pops; j++) { nuc[0] = 0; nuc[1] = 0; nuc[0] = s[j]->nucs[snp_index].p_nuc; nuc[1] = s[j]->nucs[snp_index].q_nuc; for (uint k = 0; k < 2; k++) if (nuc[k] != 0 && nuc[k] == p_allele) p_cnt++; else if (nuc[k] != 0 && nuc[k] == q_allele) q_cnt++; } return 1; } template PopPair *PopSum::Fst(int locus, int pop_1, int pop_2, int pos) { LocSum *s_1 = this->pop(locus, pop_1); /////// SLOW! LocSum *s_2 = this->pop(locus, pop_2); PopPair *pair = new PopPair(); // // If this locus only appears in one population do not calculate Fst. // if (s_1->nucs[pos].num_indv == 0 || s_2->nucs[pos].num_indv == 0) return pair; // // Calculate Fst at a locus, sub-population relative to that found in the entire population // Fst = 1 - (Sum_j( (n_j choose 2) * pi_j)) / (pi_all * Sum_j( (n_j choose 2) )) // double n_1, n_2, pi_1, pi_2; n_1 = s_1->nucs[pos].num_indv * 2; n_2 = s_2->nucs[pos].num_indv * 2; pi_1 = s_1->nucs[pos].pi; pi_2 = s_2->nucs[pos].pi; if (pi_1 == 0 && pi_2 == 0 && s_1->nucs[pos].p_nuc == s_2->nucs[pos].p_nuc) return pair; // // Calculate Pi over the entire pooled population. // // First, make sure this site is compatible between the two populations (no more than two alleles). // char nucs[4]; int ncnt[4] = {0}; nucs[0] = s_1->nucs[pos].p_nuc; nucs[1] = s_1->nucs[pos].q_nuc; nucs[2] = s_2->nucs[pos].p_nuc; nucs[3] = s_2->nucs[pos].q_nuc; for (int i = 0; i < 4; i++) switch(nucs[i]) { case 'A': ncnt[0]++; break; case 'C': ncnt[1]++; break; case 'G': ncnt[2]++; break; case 'T': ncnt[3]++; break; } int allele_cnt = 0; for (int i = 0; i < 4; i++) if (ncnt[i] > 0) allele_cnt++; if (allele_cnt > 2) return NULL; double tot_alleles = n_1 + n_2; double p_1 = round(n_1 * s_1->nucs[pos].p); double q_1 = n_1 - p_1; double p_2 = s_1->nucs[pos].p_nuc == s_2->nucs[pos].p_nuc ? s_2->nucs[pos].p : (1 - s_2->nucs[pos].p); p_2 = round(n_2 * p_2); double q_2 = n_2 - p_2; double pi_all = this->pi(tot_alleles, p_1 + p_2, q_1 + q_2); double bcoeff_1 = this->binomial_coeff(n_1, 2); double bcoeff_2 = this->binomial_coeff(n_2, 2); double num = (bcoeff_1 * pi_1) + (bcoeff_2 * pi_2); double den = pi_all * (bcoeff_1 + bcoeff_2); double Fst = 1 - (num / den); pair->alleles = tot_alleles; pair->fst = Fst; pair->pi = pi_all; this->fishers_exact_test(pair, p_1, q_1, p_2, q_2); // cerr << "Locus: " << locus << ", pos: " << pos << "\n" // << " p_1.nuc: " << s_1->nucs[pos].p_nuc << "; q_1.nuc: " << s_1->nucs[pos].q_nuc // << "; p_2.nuc: " << s_2->nucs[pos].p_nuc << "; q_2.nuc: " << s_2->nucs[pos].q_nuc << "\n" // << " Total alleles: " << tot_alleles << "; " << " s_1.p: " << s_1->nucs[pos].p // << "; s_2.p: " << s_2->nucs[pos].p << "\n" // << " p_1: " << p_1 << "; q_1: " << q_1 << " p_2: " << p_2 << "; q_2: " << q_2 << "\n" // << " Pi1: " << pi_1 << "; Pi2: " << pi_2 << "; PiAll: " << pi_all << "\n" // << " N1: " << n_1 << "; N1 choose 2: " << bcoeff_1 << "\n" // << " N2: " << n_2 << "; N2 choose 2: " << bcoeff_2 << "\n" // << " Fst: " << Fst << "\n"; // // Calculate Fst (corrected for different samples sizes) using an AMOVA method, // correcting for unequal sample sizes. // Derived from Weir, _Genetic Data Analysis II_, chapter 5, "F Statistics,", pp166-167. // double p_1_freq = s_1->nucs[pos].p; double q_1_freq = 1 - p_1_freq; double p_2_freq = s_1->nucs[pos].p_nuc == s_2->nucs[pos].p_nuc ? s_2->nucs[pos].p : (1 - s_2->nucs[pos].p); double q_2_freq = 1 - p_2_freq; double p_avg_cor = ( (s_1->nucs[pos].num_indv * p_1_freq) + (s_2->nucs[pos].num_indv * p_2_freq) ) / ( s_1->nucs[pos].num_indv + s_2->nucs[pos].num_indv ); double n_avg_cor = 2 * ((s_1->nucs[pos].num_indv / 2) + (s_2->nucs[pos].num_indv / 2)); pair->amova_fst = ( (s_1->nucs[pos].num_indv * pow((p_1_freq - p_avg_cor), 2) + s_2->nucs[pos].num_indv * pow((p_2_freq - p_avg_cor), 2)) / n_avg_cor ) / (p_avg_cor * (1 - p_avg_cor)); if (log_fst_comp) { pair->comp = new double[18]; pair->comp[0] = n_1; pair->comp[1] = n_2; pair->comp[2] = tot_alleles; pair->comp[3] = p_1; pair->comp[4] = q_1; pair->comp[5] = p_2; pair->comp[6] = q_2; pair->comp[7] = pi_1; pair->comp[8] = pi_2; pair->comp[9] = pi_all; pair->comp[10] = bcoeff_1; pair->comp[11] = bcoeff_2; pair->comp[12] = p_1_freq; pair->comp[13] = q_1_freq; pair->comp[14] = p_2_freq; pair->comp[15] = q_2_freq; pair->comp[16] = p_avg_cor; pair->comp[17] = n_avg_cor; } // // // // Calculate Fst using a pure parametric method (assumes allele counts are real, not // // samples). Jakobsson, Edge, and Rosenberg. "The Relationship Between Fst and the // // Frequency of the Most Frequent Allele." Genetics 193:515-528. Equation 4. // // // double sigma_1 = p_1_freq + q_1_freq; // double sigma_2 = p_2_freq + q_2_freq; // double delta_1 = fabs(p_1_freq - p_2_freq); // double delta_2 = fabs(q_1_freq - q_2_freq); // pair->jakob_fst = (pow(delta_1, 2) + pow(delta_2, 2)) / ( 4 - (pow(sigma_1, 2) + pow(sigma_2, 2)) ); return pair; } template int PopSum::tally_fixed_pos(LocusT *locus, Datum **d, LocSum *s, int pos, uint start, uint end) { double num_indv = 0.0; char p_nuc = 0; for (uint i = start; i <= end; i++) { if (d[i] == NULL || pos >= d[i]->len) continue; // // Before counting this individual, make sure the model definitively called this // position as hEterozygous or hOmozygous. // if (d[i]->model[pos] == 'E') { cerr << "Warning: heterozygous model call at fixed nucleotide position: " << "locus " << locus->id << " individual " << d[i]->id << "; position: " << pos << "\n"; } num_indv++; p_nuc = locus->con[pos]; } // // Record the results in the PopSum object. // s->nucs[pos].loc_id = locus->id; s->nucs[pos].bp = locus->sort_bp(pos); s->nucs[pos].fixed = true; s->nucs[pos].num_indv = num_indv; s->nucs[pos].alleles = 2 * num_indv; if (num_indv > 0) { s->nucs[pos].p = 1.0; s->nucs[pos].p_nuc = p_nuc; s->nucs[pos].obs_hom = 1.0; s->nucs[pos].obs_het = 0.0; s->nucs[pos].exp_hom = 1.0; s->nucs[pos].exp_het = 0.0; s->nucs[pos].stat[0] = 0.0; // pi s->nucs[pos].stat[1] = -7.0; // fis } return 0; } template int PopSum::tally_heterozygous_pos(LocusT *locus, Datum **d, LocSum *s, int pos, int snp_index, uint start, uint end) { // // Tally up the genotype frequencies. // int nucs[4] = {0}; uint i; char nuc; //cerr << " Calculating summary stats at het locus " << locus->id << " position " << pos << "; snp_index: " << snp_index << "\n"; // // Iterate over each individual in this sub-population. // for (i = start; i <= end; i++) { if (d[i] == NULL || pos >= d[i]->len || d[i]->model[pos] == 'U') continue; // // Pull each allele for this SNP from the observed haplotype. // for (uint j = 0; j < d[i]->obshap.size(); j++) { nuc = d[i]->obshap[j][snp_index]; switch(nuc) { case 'A': case 'a': nucs[0]++; break; case 'C': case 'c': nucs[1]++; break; case 'G': case 'g': nucs[2]++; break; case 'T': case 't': nucs[3]++; break; } } } // // Determine how many alleles are present at this position in this population. // We cannot deal with more than two alternative alleles, if there are more than two // in a single population, print a warning and exclude this nucleotide position. // int allele_cnt = 0; for (i = 0; i < 4; i++) if (nucs[i] > 0) allele_cnt++; if (allele_cnt > 2) return -1; // // Record which nucleotide is the P allele and which is the Q allele. // char p_allele = 0; char q_allele = 0; i = 0; while (p_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 0: p_allele = 'A'; break; case 1: p_allele = 'C'; break; case 2: p_allele = 'G'; break; case 3: p_allele = 'T'; break; } } i++; } while (q_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 1: q_allele = 'C'; break; case 2: q_allele = 'G'; break; case 3: q_allele = 'T'; break; } } i++; } //cerr << " P Allele: " << p_allele << "; Q Allele: " << q_allele << "\n"; // // Calculate observed genotype frequencies. // double num_indv = 0.0; double obs_het = 0.0; double obs_p = 0.0; double obs_q = 0.0; for (i = start; i <= end; i++) { if (d[i] == NULL || pos >= d[i]->len) continue; // // Before counting this individual, make sure the model definitively called this // position as hEterozygous or hOmozygous. // if (d[i]->model[pos] == 'E' || d[i]->model[pos] == 'O') num_indv++; else continue; if (d[i]->obshap.size() > 1 && this->tally_observed_haplotypes(d[i]->obshap, snp_index) == 2) obs_het++; else if (d[i]->obshap[0][snp_index] == p_allele) obs_p++; else if (d[i]->obshap[0][snp_index] == q_allele) obs_q++; } //cerr << " Num Individuals: " << num_indv << "; Obs Hets: " << obs_het << "; Obs P: " << obs_p << "; Obs Q: " << obs_q << "\n"; if (num_indv == 0) return 0; // // Calculate total number of alleles // double tot_alleles = num_indv * 2; double allele_p = obs_het + (2 * obs_p); double allele_q = obs_het + (2 * obs_q); // // Calculate Pi, equivalent to expected heterozygosity (exp_het) // s->nucs[pos].stat[0] = this->pi(tot_alleles, allele_p, allele_q); if (s->nucs[pos].stat[0] == 0.0) s->nucs[pos].fixed = true; // // Convert to allele frequencies // allele_p = allele_p / tot_alleles; allele_q = allele_q / tot_alleles; //cerr << " P allele frequency: " << allele_p << "; Q allele frequency: " << allele_q << "\n"; // // // // If the minor allele frequency is below the cutoff, set it to zero. // // // if (minor_allele_freq > 0) { // if (allele_p < allele_q) { // if (allele_p < minor_allele_freq) { // s->nucs[pos].pi = 0.0; // s->nucs[pos].fixed = true; // s->nucs[pos].filtered_site = true; // return 0; // } // } else { // if (allele_q < minor_allele_freq) { // s->nucs[pos].pi = 0.0; // s->nucs[pos].fixed = true; // s->nucs[pos].filtered_site = true; // return 0; // } // } // } // // Calculate expected genotype frequencies. // double exp_het = 2 * allele_p * allele_q; // 2pq // double exp_p = allele_p * allele_p; // p^2 // double exp_q = allele_q * allele_q; // q^2 //cerr << " Expected Het: " << exp_het << "; Expected P: " << exp_p << "; Expected Q: " << exp_q << "\n"; obs_het = obs_het / num_indv; obs_p = obs_p / num_indv; obs_q = obs_q / num_indv; //cerr << " Obs Hets Freq: " << obs_het << "; Obs P Freq: " << obs_p << "; Obs Q Freq: " << obs_q << "\n"; // // Record the results in the PopSum object. // s->nucs[pos].loc_id = locus->id; s->nucs[pos].bp = locus->sort_bp(pos); s->nucs[pos].num_indv = num_indv; s->nucs[pos].alleles = tot_alleles; s->nucs[pos].p = allele_p > allele_q ? allele_p : allele_q; s->nucs[pos].p_nuc = allele_p > allele_q ? p_allele : q_allele; s->nucs[pos].q_nuc = allele_p > allele_q ? q_allele : p_allele; s->nucs[pos].obs_hom = 1 - obs_het; s->nucs[pos].obs_het = obs_het; s->nucs[pos].exp_hom = 1 - exp_het; s->nucs[pos].exp_het = exp_het; // // Calculate F_is, the inbreeding coefficient of an individual (I) relative to the subpopulation (S): // Fis = (exp_het - obs_het) / exp_het // double fis = s->nucs[pos].pi == 0 ? -7 : (s->nucs[pos].pi - obs_het) / s->nucs[pos].pi; s->nucs[pos].stat[1] = fis; return 0; } template int PopSum::tally_observed_haplotypes(vector &obshap, int snp_index) { int nucs[4] = {0}; char nuc; // // Pull each allele for this SNP from the observed haplotype. // for (uint j = 0; j < obshap.size(); j++) { nuc = obshap[j][snp_index]; switch(nuc) { case 'A': case 'a': nucs[0]++; break; case 'C': case 'c': nucs[1]++; break; case 'G': case 'g': nucs[2]++; break; case 'T': case 't': nucs[3]++; break; } } int allele_cnt = 0; for (int i = 0; i < 4; i++) if (nucs[i] > 0) allele_cnt++; return allele_cnt; } template int PopSum::fishers_exact_test(PopPair *pair, double p_1, double q_1, double p_2, double q_2) { // | Allele1 | Allele2 | // Fisher's Exact Test: -----+---------+---------+ // Pop1 | p_1 | q_1 | // Pop2 | p_2 | q_2 | // Probability p: // p = ((p_1 + q_1)!(p_2 + q_2)!(p_1 + p_2)!(q_1 + q_2)!) / (n! p_1! q_1! p_2! q_2!) // // According to: // Jerrold H. Zar, "A fast and efficient algorithm for the Fisher exact test." // Behavior Research Methods, Instruments, & Computers 1987, 19(4): 43-44 // // Probability p can be calculated as three binomial coefficients: // Let p_1 + q_1 = r_1; p_2 + q_2 = r_2; p_1 + p_2 = c_1; q_1 + q_2 = c_2 // // p = (r_1 choose p_1)(r_2 choose p_2) / (n choose c_1) // // Fisher's Exact test algorithm implemented according to Sokal and Rohlf, _Biometry_, section 17.4. // double r_1 = p_1 + q_1; double r_2 = p_2 + q_2; double c_1 = p_1 + p_2; double d_1 = p_1 * q_2; double d_2 = p_2 * q_1; double n = r_1 + r_2; double p = 0.0; // char p1_str[32], p2_str[32], q1_str[32], q2_str[32]; // sprintf(p1_str, "% 3.0f", p_1); // sprintf(q1_str, "% 3.0f", q_1); // sprintf(p2_str, "% 3.0f", p_2); // sprintf(q2_str, "% 3.0f", q_2); // // cerr // << " | Allele1 | Allele2 | " << "\n" // << "-----+---------+---------+" << "\n" // << "Pop1 | " << p1_str << " | " << q1_str << " |" << "\n" // << "Pop2 | " << p2_str << " | " << q2_str << " |" << "\n\n"; // // Compute the first tail. // double p1 = p_1; double q1 = q_1; double p2 = p_2; double q2 = q_2; double tail_1 = 0.0; double den = this->binomial_coeff(n, c_1); // // If (p_1*q_2 - p_2*q_1) < 0 decrease cells p_1 and q_2 by one and add one to p_2 and q_1. // Compute p and repeat until one or more cells equal 0. // if (d_1 - d_2 < 0) { do { p = (this->binomial_coeff(r_1, p1) * this->binomial_coeff(r_2, p2)) / den; tail_1 += p; p1--; q2--; p2++; q1++; } while (p1 >= 0 && q2 >= 0); } else { // // Else, if (p_1*q_2 - p_2*q_1) > 0 decrease cells p_2 and q_1 by one and add one to p_1 and q_2. // Compute p and repeat until one or more cells equal 0. // do { p = (this->binomial_coeff(r_1, p1) * this->binomial_coeff(r_2, p2)) / den; tail_1 += p; p2--; q1--; p1++; q2++; } while (p2 >= 0 && q1 >= 0); } // // Compute the second tail. // double tail_2 = 0.0; p = 0; // // If (p_1*q_2 - p_2*q_1) < 0, set to zero the smaller of the two frequencies, adjusting the other values // to keep the marginals the same. // if (d_1 - d_2 < 0) { if (p2 < q1) { q2 += p2; p1 += p2; q1 -= p2; p2 = 0; } else { p1 += q1; q2 += q1; p2 -= q1; q1 = 0; } } else { if (p1 < q2) { q1 += p1; p2 += p1; q2 -= p1; p1 = 0; } else { p2 += q2; q1 += q2; p1 -= q2; q2 = 0; } } // // If (p_1*q_2 - p_2*q_1) < 0 decrease cells p_1 and q_2 by one and add one to p_2 and q_1. // Compute p and repeat until tail_2 > tail_1. // if (d_1 - d_2 < 0) { do { p = (this->binomial_coeff(r_1, p1) * this->binomial_coeff(r_2, p2)) / den; tail_2 += p; p1--; q2--; p2++; q1++; } while (tail_2 < tail_1 && p1 >= 0 && q2 >= 0); tail_2 -= p; } else { // // Else, if (p_1*q_2 - p_2*q_1) > 0 decrease cells p_2 and q_1 by one and add one to p_1 and q_2. // Compute p and repeat until one or more cells equal 0. // do { p = (this->binomial_coeff(r_1, p1) * this->binomial_coeff(r_2, p2)) / den; tail_2 += p; p2--; q1--; p1++; q2++; } while (tail_2 < tail_1 && p2 >= 0 && q1 >= 0); tail_2 -= p; } pair->fet_p = tail_1 + tail_2; if (pair->fet_p > 1.0) pair->fet_p = 1.0; // // Calculate the odds ratio. To account for possible cases were one allele frequency is // zero, we will increment all allele frequencies by one. // if (p_1 == 0 || q_1 == 0 || p_2 == 0 || q_2 == 0) { p_1++; q_1++; p_2++; q_2++; } pair->fet_or = (p_1 * q_2) / (q_1 * p_2); double ln_fet_or = pair->fet_or > 0 ? log(pair->fet_or) : 0.0; // // Calculate the standard error of the natural log of the odds ratio // double se = pair->fet_or > 0 ? sqrt((1 / p_1) + (1 / q_1) + (1 / p_2) + (1 / q_2)) : 0.0; // // Calculate the confidence intervals of the natural log of the odds ratio. // double ln_ci_low = pair->fet_or > 0 ? ln_fet_or - (1.96 * se) : 0; double ln_ci_high = pair->fet_or > 0 ? ln_fet_or + (1.96 * se) : 0; // // Convert the confidence intervals out of natural log space // pair->ci_low = pair->fet_or > 0 ? exp(ln_ci_low) : 0; pair->ci_high = pair->fet_or > 0 ? exp(ln_ci_high) : 0; pair->lod = fabs(log10(pair->fet_or)); return 0; } template double PopSum::pi(double tot_alleles, double p, double q) { // // Calculate Pi, equivalent to expected heterozygosity: // pi = 1 - Sum_i( (n_i choose 2) ) / (n choose 2) // double pi = this->binomial_coeff(p, 2) + this->binomial_coeff(q, 2); pi = pi / binomial_coeff(tot_alleles, 2); pi = 1 - pi; return pi; } template double PopSum::binomial_coeff(double n, double k) { if (n < k) return 0.0; // // Compute the binomial coefficient using the method of: // Y. Manolopoulos, "Binomial coefficient computation: recursion or iteration?", // ACM SIGCSE Bulletin, 34(4):65-67, 2002. // double r = 1.0; double s = (k < n - k) ? n - k + 1 : k + 1; for (double i = n; i >= s; i--) r = r * i / (n - i + 1); return r; } template LocSum **PopSum::locus(int locus) { return this->data[this->locus_order[locus]]; } template LocSum *PopSum::pop(int locus, int pop_id) { return this->data[this->locus_order[locus]][this->pop_order[pop_id]]; } template LocTally *PopSum::locus_tally(int locus) { return this->loc_tally[this->locus_order[locus]]; } #endif // __POPSUM_H__ stacks-1.35/src/populations.cc000644 000765 000024 00001014240 12571641525 017167 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2012-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // populations -- generate population genetic statistics and output // haplotypes in a population context. // #include "populations.h" // Global variables to hold command-line options. int num_threads = 1; int batch_id = -1; string in_path; string out_path; string out_file; string pmap_path; string bl_file; string wl_file; string bs_wl_file; string enz; double sigma = 150000.0; double sample_limit = 0.0; int population_limit = 1; bool calc_fstats = false; bool bootstrap = false; bool bootstrap_fst = false; bool bootstrap_pifis = false; bool bootstrap_phist = false; bool bootstrap_div = false; bs_type bootstrap_type = bs_exact; int bootstrap_reps = 100; bool bootstrap_wl = false; bool write_single_snp = false; bool write_random_snp = false; bool merge_sites = false; bool expand_id = false; bool sql_out = false; bool vcf_out = false; bool vcf_haplo_out = false; bool fasta_out = false; bool fasta_strict_out = false; bool genepop_out = false; bool genomic_out = false; bool structure_out = false; bool phase_out = false; bool fastphase_out = false; bool beagle_out = false; bool beagle_phased_out = false; bool plink_out = false; bool hzar_out = false; bool treemix_out = false; bool phylip_out = false; bool phylip_var = false; bool phylip_var_all = false; bool ordered_export = false; bool kernel_smoothed = false; bool loci_ordered = false; bool log_fst_comp = false; bool verbose = false; bool filter_lnl = false; double lnl_limit = 0.0; int min_stack_depth = 0; double merge_prune_lim = 1.0; double minor_allele_freq = 0.0; double max_obs_het = 1.0; double p_value_cutoff = 0.05; corr_type fst_correction = no_correction; map pop_key, grp_key; map > pop_indexes; map > grp_members; set blacklist, bootstraplist; map > whitelist; // // Hold information about restriction enzymes // map renz; map renz_cnt; map renz_len; map renz_olap; int main (int argc, char* argv[]) { initialize_renz(renz, renz_cnt, renz_len); initialize_renz_olap(renz_olap); parse_command_line(argc, argv); cerr << "Fst kernel smoothing: " << (kernel_smoothed == true ? "on" : "off") << "\n" << "Bootstrap resampling: "; if (bootstrap) cerr << "on, " << (bootstrap_type == bs_exact ? "exact; " : "approximate; ") << bootstrap_reps << " reptitions\n"; else cerr << "off\n"; cerr << "Percent samples limit per population: " << sample_limit << "\n" << "Locus Population limit: " << population_limit << "\n" << "Minimum stack depth: " << min_stack_depth << "\n" << "Log liklihood filtering: " << (filter_lnl == true ? "on" : "off") << "; threshold: " << lnl_limit << "\n" << "Minor allele frequency cutoff: " << minor_allele_freq << "\n" << "Maximum observed heterozygosity cutoff: " << max_obs_het << "\n" << "Applying Fst correction: "; switch(fst_correction) { case p_value: cerr << "P-value correction.\n"; break; case bonferroni_win: cerr << "Bonferroni correction within sliding window.\n"; break; case bonferroni_gen: cerr << "Bonferroni correction across genome wide sites.\n"; break; case no_correction: cerr << "none.\n"; break; } // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif // // Seed the random number generator // srandom(time(NULL)); vector > files; if (!build_file_list(files, pop_indexes, grp_members)) exit(1); if (wl_file.length() > 0) { load_marker_column_list(wl_file, whitelist); cerr << "Loaded " << whitelist.size() << " whitelisted markers.\n"; } if (bl_file.length() > 0) { load_marker_list(bl_file, blacklist); cerr << "Loaded " << blacklist.size() << " blacklisted markers.\n"; } if (bs_wl_file.length() > 0) { load_marker_list(bs_wl_file, bootstraplist); cerr << "Loaded " << bootstraplist.size() << " markers to include when bootstrapping.\n"; } // // Open the log file. // stringstream log; log << "batch_" << batch_id << ".populations.log"; string log_path = in_path + log.str(); ofstream log_fh(log_path.c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening log file '" << log_path << "'\n"; exit(1); } init_log(log_fh, argc, argv); // // Load the catalog // stringstream catalog_file; map catalog; bool compressed = false; int res; catalog_file << in_path << "batch_" << batch_id << ".catalog"; if ((res = load_loci(catalog_file.str(), catalog, false, false, compressed)) == 0) { cerr << "Unable to load the catalog '" << catalog_file.str() << "'\n"; return 0; } // // Check the whitelist. // check_whitelist_integrity(catalog, whitelist); // // Implement the black/white list // reduce_catalog(catalog, whitelist, blacklist); // // If the catalog is not reference aligned, assign an arbitrary ordering to catalog loci. // loci_ordered = order_unordered_loci(catalog); // // Load matches to the catalog // vector > catalog_matches; map samples; vector sample_ids; for (int i = 0; i < (int) files.size(); i++) { vector m; load_catalog_matches(in_path + files[i].second, m); if (m.size() == 0) { cerr << "Warning: unable to find any matches in file '" << files[i].second << "', excluding this sample from population analysis.\n"; // // This case is generated by an existing, but empty file. // Remove this sample from the population index which was built from // existing files, but we couldn't yet check for empty files. // map >::iterator pit; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) if (i >= pit->second.first && i <= pit->second.second) { pit->second.second--; pit++; while (pit != pop_indexes.end()) { pit->second.first--; pit->second.second--; pit++; } break; } continue; } catalog_matches.push_back(m); if (samples.count(m[0]->sample_id) == 0) { samples[m[0]->sample_id] = files[i].second; sample_ids.push_back(m[0]->sample_id); } else { cerr << "Fatal error: sample ID " << m[0]->sample_id << " occurs twice in this data set, likely the pipeline was run incorrectly.\n"; exit(0); } } // // Create the population map // cerr << "Populating observed haplotypes for " << sample_ids.size() << " samples, " << catalog.size() << " loci.\n"; PopMap *pmap = new PopMap(sample_ids.size(), catalog.size()); pmap->populate(sample_ids, catalog, catalog_matches); // // Tabulate haplotypes present and in what combinations. // tabulate_haplotypes(catalog, pmap); // // Output a list of heterozygous loci and the associate haplotype frequencies. // if (sql_out) write_sql(catalog, pmap); log_fh << "# Distribution of population loci.\n"; log_haplotype_cnts(catalog, log_fh); apply_locus_constraints(catalog, pmap, pop_indexes, log_fh); log_fh << "# Distribution of population loci after applying locus constraints.\n"; log_haplotype_cnts(catalog, log_fh); cerr << "Loading model outputs for " << sample_ids.size() << " samples, " << catalog.size() << " loci.\n"; map::iterator it; map::iterator mit; Datum *d; CSLocus *loc; // // Load the output from the SNP calling model for each individual at each locus. This // model output string looks like this: // OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOUOOOOUOOOOOO // and records model calls for each nucleotide: O (hOmozygous), E (hEterozygous), U (Unknown) // for (uint i = 0; i < sample_ids.size(); i++) { map modres; load_model_results(in_path + samples[sample_ids[i]], modres); if (modres.size() == 0) { cerr << "Warning: unable to find any model results in file '" << samples[sample_ids[i]] << "', excluding this sample from population analysis.\n"; continue; } for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->datum(loc->id, sample_ids[i]); if (d != NULL) { if (modres.count(d->id) == 0) { cerr << "Fatal error: Unable to find model data for catalog locus " << loc->id << ", sample ID " << sample_ids[i] << ", sample locus " << d->id << "; likely IDs were mismatched when running pipeline.\n"; exit(0); } d->len = strlen(modres[d->id]->model); d->model = new char[d->len + 1]; strcpy(d->model, modres[d->id]->model); } } for (mit = modres.begin(); mit != modres.end(); mit++) delete mit->second; modres.clear(); } uint pop_id, start_index, end_index; map >::iterator pit; PopSum *psum = new PopSum(pmap->loci_cnt(), pop_indexes.size()); psum->initialize(pmap); for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start_index = pit->second.first; end_index = pit->second.second; pop_id = pit->first; cerr << "Generating nucleotide-level summary statistics for population '" << pop_key[pop_id] << "'\n"; psum->add_population(catalog, pmap, pop_id, start_index, end_index, verbose, log_fh); } cerr << "Tallying loci across populations..."; psum->tally(catalog); cerr << "done.\n"; // // We have removed loci that were below the -r and -p thresholds. Now we need to // identify individual SNPs that are below the -r threshold or the minor allele // frequency threshold (-a). In these cases we will remove the SNP, but keep the locus. // blacklist.clear(); int pruned_snps = prune_polymorphic_sites(catalog, pmap, psum, pop_indexes, whitelist, blacklist, log_fh); cerr << "Pruned " << pruned_snps << " variant sites due to filter constraints.\n"; if (!verbose) cerr << " (enable the --verbose flag to record the reason why each site was filtered in the batch_X.populations.log file.)\n"; // // Create an artificial whitelist if the user requested only the first or a random SNP per locus. // if (write_single_snp) implement_single_snp_whitelist(catalog, psum, whitelist); else if (write_random_snp) implement_random_snp_whitelist(catalog, psum, whitelist); // // Remove the accumulated SNPs // cerr << "Removing " << blacklist.size() << " additional loci for which all variant sites were filtered..."; set empty_list; reduce_catalog(catalog, empty_list, blacklist); reduce_catalog_snps(catalog, whitelist, pmap); int retained = pmap->prune(blacklist); cerr << " retained " << retained << " loci.\n"; // // Merge loci that overlap on a common restriction enzyme cut site. // map > merge_map; if (merge_sites && loci_ordered) merge_shared_cutsite_loci(catalog, pmap, psum, merge_map, log_fh); // // Regenerate summary statistics after pruning SNPs and merging loci. // delete psum; psum = new PopSum(pmap->loci_cnt(), pop_indexes.size()); psum->initialize(pmap); for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start_index = pit->second.first; end_index = pit->second.second; pop_id = pit->first; cerr << "Regenerating nucleotide-level summary statistics for population '" << pop_key[pop_id] << "'\n"; psum->add_population(catalog, pmap, pop_id, start_index, end_index, verbose, log_fh); } cerr << "Re-tallying loci across populations..."; psum->tally(catalog); cerr << "done.\n"; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = pit->first; if (kernel_smoothed && loci_ordered) { cerr << " Generating kernel-smoothed population statistics...\n"; kernel_smoothed_popstats(catalog, pmap, psum, pop_id, log_fh); } } calculate_haplotype_stats(files, pop_indexes, catalog, pmap, psum); if (calc_fstats) { calculate_haplotype_divergence(files, pop_indexes, grp_members, catalog, pmap, psum); calculate_haplotype_divergence_pairwise(files, pop_indexes, grp_members, catalog, pmap, psum); } // // Calculate and output the locus-level summary statistics. // calculate_summary_stats(files, pop_indexes, catalog, pmap, psum); // // Output the observed haplotypes. // write_generic(catalog, pmap, samples, false); // // Output data in requested formats // if (fasta_out) write_fasta(catalog, pmap, samples, sample_ids); if (fasta_strict_out) write_strict_fasta(catalog, pmap, samples, sample_ids); if (genepop_out && ordered_export) write_genepop_ordered(catalog, pmap, psum, pop_indexes, samples, log_fh); else if (genepop_out) write_genepop(catalog, pmap, psum, pop_indexes, samples); if (structure_out && ordered_export) write_structure_ordered(catalog, pmap, psum, pop_indexes, samples, log_fh); else if (structure_out) write_structure(catalog, pmap, psum, pop_indexes, samples); if (fastphase_out) write_fastphase(catalog, pmap, psum, pop_indexes, samples); if (phase_out) write_phase(catalog, pmap, psum, pop_indexes, samples); if (beagle_out) write_beagle(catalog, pmap, psum, pop_indexes, samples); if (beagle_phased_out) write_beagle_phased(catalog, pmap, psum, pop_indexes, samples); if (plink_out) write_plink(catalog, pmap, psum, pop_indexes, samples); if (hzar_out) write_hzar(catalog, pmap, psum, pop_indexes, samples); if (treemix_out) write_treemix(catalog, pmap, psum, pop_indexes, samples); if (phylip_out || phylip_var) write_phylip(catalog, pmap, psum, pop_indexes, samples); if (phylip_var_all) write_fullseq_phylip(catalog, pmap, psum, pop_indexes, samples); if (vcf_haplo_out) write_vcf_haplotypes(catalog, pmap, psum, samples, sample_ids); if (vcf_out && ordered_export) write_vcf_ordered(catalog, pmap, psum, samples, sample_ids, merge_map, log_fh); else if (vcf_out) write_vcf(catalog, pmap, psum, samples, sample_ids, merge_map); // // Calculate and write Fst. // if (calc_fstats) write_fst_stats(files, pop_indexes, catalog, pmap, psum, log_fh); // // Output nucleotide-level genotype calls for each individual. // if (genomic_out) write_genomic(catalog, pmap); log_fh.close(); return 0; } int apply_locus_constraints(map &catalog, PopMap *pmap, map > &pop_indexes, ofstream &log_fh) { uint pop_id, start_index, end_index; CSLocus *loc; Datum **d; if (sample_limit == 0 && population_limit == 0 && min_stack_depth == 0) return 0; if (verbose) log_fh << "\n#\n# List of loci removed by first filtering stage of sample and population constraints\n#\n" << "# Action\tLocus ID\tChr\tBP\tColumn\tReason\n"; map::iterator it; map >::iterator pit; uint pop_cnt = pop_indexes.size(); int *pop_order = new int [pop_cnt]; // Which population each sample belongs to. int *samples = new int [pmap->sample_cnt()]; // For the current locus, how many samples in each population. int *pop_cnts = new int [pop_cnt]; // The total number of samples in each population. int *pop_tot = new int [pop_cnt]; pop_id = 0; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start_index = pit->second.first; end_index = pit->second.second; pop_tot[pop_id] = 0; for (uint i = start_index; i <= end_index; i++) { samples[i] = pop_id; pop_tot[pop_id]++; } pop_order[pop_id] = pit->first; pop_id++; } for (uint i = 0; i < pop_cnt; i++) pop_cnts[i] = 0; double pct = 0.0; bool pop_limit = false; int pops = 0; int below_stack_dep = 0; uint below_lnl_thresh = 0; set blacklist; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->locus(loc->id); for (int i = 0; i < pmap->sample_cnt(); i++) { // // Check that each sample is over the minimum stack depth for this locus. // if (d[i] != NULL && min_stack_depth > 0 && d[i]->tot_depth < min_stack_depth) { below_stack_dep++; delete d[i]; d[i] = NULL; loc->hcnt--; } // // Check that each sample is over the log likelihood threshold. // if (d[i] != NULL && filter_lnl && d[i]->lnl < lnl_limit) { below_lnl_thresh++; delete d[i]; d[i] = NULL; loc->hcnt--; } } // // Tally up the count of samples in this population. // for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] != NULL) pop_cnts[samples[i]]++; } // // Check that the counts for each population are over sample_limit. If not, zero out // the members of that population. // for (uint i = 0; i < pop_cnt; i++) { pct = (double) pop_cnts[i] / (double) pop_tot[i]; if (pop_cnts[i] > 0 && pct < sample_limit) { //cerr << "Removing population " << pop_order[i] << " at locus: " << loc->id << "; below sample limit: " << pct << "\n"; start_index = pop_indexes[pop_order[i]].first; end_index = pop_indexes[pop_order[i]].second; for (uint j = start_index; j <= end_index; j++) { if (d[j] != NULL) { delete d[j]; d[j] = NULL; loc->hcnt--; } } pop_cnts[i] = 0; } } // // Check that this locus is present in enough populations. // for (uint i = 0; i < pop_cnt; i++) if (pop_cnts[i] > 0) pops++; if (pops < population_limit) { //cerr << "Removing locus: " << loc->id << "; below population limit: " << pops << "\n"; pop_limit = true; } if (pop_limit) { blacklist.insert(loc->id); if (verbose) log_fh << "removed_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp() << "\t" << 0 << "\tfailed_population_limit\n"; } for (uint i = 0; i < pop_cnt; i++) pop_cnts[i] = 0; pop_limit = false; pops = 0; } // // Remove loci // if (min_stack_depth > 0) cerr << "Removed " << below_stack_dep << " samples from loci that are below the minimum stack depth of " << min_stack_depth << "x\n"; if (filter_lnl) cerr << "Removed " << below_lnl_thresh << " samples from loci that are below the log likelihood threshold of " << lnl_limit << "\n"; cerr << "Removing " << blacklist.size() << " loci that did not pass sample/population constraints..."; set whitelist; reduce_catalog(catalog, whitelist, blacklist); int retained = pmap->prune(blacklist); cerr << " retained " << retained << " loci.\n"; delete [] pop_cnts; delete [] pop_tot; delete [] pop_order; delete [] samples; if (retained == 0) exit(0); return 0; } int prune_polymorphic_sites(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map > &whitelist, set &blacklist, ofstream &log_fh) { map > new_wl; vector pop_prune_list; CSLocus *loc; LocTally *t; LocSum **s; Datum **d; bool sample_prune, maf_prune, het_prune, inc_prune; int size, pruned = 0; uint pop_id, start_index, end_index; if (verbose) log_fh << "\n#\n# List of pruned nucleotide sites\n#\n" << "# Action\tLocus ID\tChr\tBP\tColumn\tReason\n"; // // If the whitelist is populated, use it as a guide for what loci to consider. // // Construct a new whitelist along the way, that is a subset of the existing list. // if (whitelist.size() > 0) { map >::iterator it; for (it = whitelist.begin(); it != whitelist.end(); it++) { // // A locus on the whitelist may have already been filtered out. // if (catalog.count(it->first) == 0) continue; loc = catalog[it->first]; t = psum->locus_tally(loc->id); s = psum->locus(loc->id); // // Check that each SNP in this locus is above the sample_limit and that // each SNP is above the minor allele frequency. If so, add it back to // the whiteliest. // size = it->second.size(); for (uint i = 0; i < loc->snps.size(); i++) { // // If it is not already in the whitelist, ignore it. // if (size > 0 && it->second.count(loc->snps[i]->col) == 0) continue; // // If the site is fixed, ignore it. // if (t->nucs[loc->snps[i]->col].fixed == true) continue; sample_prune = false; maf_prune = false; het_prune = false; inc_prune = false; pop_prune_list.clear(); for (int j = 0; j < psum->pop_cnt(); j++) { pop_id = psum->rev_pop_index(j); if (s[j]->nucs[loc->snps[i]->col].incompatible_site) inc_prune = true; else if (s[j]->nucs[loc->snps[i]->col].num_indv == 0 || (double) s[j]->nucs[loc->snps[i]->col].num_indv / (double) psum->pop_size(pop_id) < sample_limit) pop_prune_list.push_back(pop_id); } // // Check how many populations have to be pruned out due to sample limit. If less than // population limit, prune them; if more than population limit, mark locus for deletion. // if ((psum->pop_cnt() - pop_prune_list.size()) < (uint) population_limit) { sample_prune = true; } else { for (uint j = 0; j < pop_prune_list.size(); j++) { if (s[psum->pop_index(pop_prune_list[j])]->nucs[loc->snps[i]->col].num_indv == 0) continue; start_index = pop_indexes[pop_prune_list[j]].first; end_index = pop_indexes[pop_prune_list[j]].second; d = pmap->locus(loc->id); for (uint k = start_index; k <= end_index; k++) { if (d[k] == NULL || loc->snps[i]->col >= (uint) d[k]->len) continue; if (d[k]->model != NULL) { d[k]->model[loc->snps[i]->col] = 'U'; } } } } if (t->nucs[loc->snps[i]->col].allele_cnt > 1) { // // Test for minor allele frequency. // if ((1 - t->nucs[loc->snps[i]->col].p_freq) < minor_allele_freq) maf_prune = true; // // Test for observed heterozygosity. // if (t->nucs[loc->snps[i]->col].obs_het > max_obs_het) het_prune = true; } if (maf_prune == false && het_prune == false && sample_prune == false && inc_prune == false) { new_wl[loc->id].insert(loc->snps[i]->col); } else { pruned++; if (verbose) { log_fh << "pruned_polymorphic_site\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp(loc->snps[i]->col) << "\t" << loc->snps[i]->col << "\t"; if (inc_prune) log_fh << "incompatible_site\n"; else if (sample_prune) log_fh << "sample_limit\n"; else if (maf_prune) log_fh << "maf_limit\n"; else if (het_prune) log_fh << "obshet_limit\n"; else log_fh << "unknown_reason\n"; } } } // // If no SNPs were retained for this locus, then mark it to be removed entirely. // if (new_wl.count(loc->id) == 0) { if (verbose) log_fh << "removed_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp() << "\t" << 0 << "\tno_snps_remaining\n"; blacklist.insert(loc->id); } } } else { // // Otherwise, just iterate over the catalog. // map::iterator it; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; // // If this locus is fixed, don't try to filter it out. // if (loc->snps.size() == 0) { new_wl.insert(make_pair(loc->id, std::set())); continue; } t = psum->locus_tally(loc->id); s = psum->locus(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { // // If the site is fixed, ignore it. // if (t->nucs[loc->snps[i]->col].fixed == true) continue; sample_prune = false; maf_prune = false; het_prune = false; inc_prune = false; pop_prune_list.clear(); for (int j = 0; j < psum->pop_cnt(); j++) { pop_id = psum->rev_pop_index(j); if (s[j]->nucs[loc->snps[i]->col].incompatible_site) inc_prune = true; else if (s[j]->nucs[loc->snps[i]->col].num_indv == 0 || (double) s[j]->nucs[loc->snps[i]->col].num_indv / (double) psum->pop_size(pop_id) < sample_limit) pop_prune_list.push_back(pop_id); } // // Check how many populations have to be pruned out due to sample limit. If less than // population limit, prune them; if more than population limit, mark locus for deletion. // if ((psum->pop_cnt() - pop_prune_list.size()) < (uint) population_limit) { sample_prune = true; } else { for (uint j = 0; j < pop_prune_list.size(); j++) { if (s[psum->pop_index(pop_prune_list[j])]->nucs[loc->snps[i]->col].num_indv == 0) continue; start_index = pop_indexes[pop_prune_list[j]].first; end_index = pop_indexes[pop_prune_list[j]].second; d = pmap->locus(loc->id); for (uint k = start_index; k <= end_index; k++) { if (d[k] == NULL || loc->snps[i]->col >= (uint) d[k]->len) continue; if (d[k]->model != NULL) { d[k]->model[loc->snps[i]->col] = 'U'; } } } } if (t->nucs[loc->snps[i]->col].allele_cnt > 1) { // // Test for minor allele frequency. // if ((1 - t->nucs[loc->snps[i]->col].p_freq) < minor_allele_freq) maf_prune = true; // // Test for observed heterozygosity. // if (t->nucs[loc->snps[i]->col].obs_het > max_obs_het) het_prune = true; } if (maf_prune == false && het_prune == false && sample_prune == false && inc_prune == false) { new_wl[loc->id].insert(loc->snps[i]->col); } else { pruned++; if (verbose) { log_fh << "pruned_polymorphic_site\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp(loc->snps[i]->col) << "\t" << loc->snps[i]->col << "\t"; if (inc_prune) log_fh << "incompatible_site\n"; else if (sample_prune) log_fh << "sample_limit\n"; else if (maf_prune) log_fh << "maf_limit\n"; else if (het_prune) log_fh << "obshet_limit\n"; else log_fh << "unknown_reason\n"; } } } // // If no SNPs were retained for this locus, then mark it to be removed entirely. // if (new_wl.count(loc->id) == 0) { if (verbose) log_fh << "removed_locus\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp() << "\t" << 0 << "\tno_snps_remaining\n"; blacklist.insert(loc->id); } } } whitelist = new_wl; return pruned; } bool order_unordered_loci(map &catalog) { map::iterator it; CSLocus *loc; set chrs; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (strlen(loc->loc.chr) > 0) chrs.insert(loc->loc.chr); } // // This data is already reference aligned. // if (chrs.size() > 0) return true; cerr << "Catalog is not reference aligned, arbitrarily ordering catalog loci.\n"; uint bp = 1; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; loc->loc.chr = new char[3]; strcpy(loc->loc.chr, "un"); loc->loc.bp = bp; bp += strlen(loc->con); } return false; } int log_haplotype_cnts(map &catalog, ofstream &log_fh) { map::iterator it; map valid, absent, confounded; CSLocus *loc; int missing; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; if (valid.count(loc->hcnt) == 0) valid[loc->hcnt] = 1; else valid[loc->hcnt]++; if (confounded.count(loc->confounded_cnt) == 0) confounded[loc->confounded_cnt] = 1; else confounded[loc->confounded_cnt]++; missing = loc->cnt - loc->hcnt; if (absent.count(missing) == 0) absent[missing] = 1; else absent[missing]++; } map::iterator cnt_it; log_fh << "# Distribution of valid loci matched to catalog locus.\n" << "# Valid samples at locus\tCount\n"; for (cnt_it = valid.begin(); cnt_it != valid.end(); cnt_it++) log_fh << cnt_it->first << "\t" << cnt_it->second << "\n"; log_fh << "# Distribution of confounded loci at catalog locus.\n" << "# Confounded samples at locus\tCount\n"; for (cnt_it = confounded.begin(); cnt_it != confounded.end(); cnt_it++) log_fh << cnt_it->first << "\t" << cnt_it->second << "\n"; log_fh << "# Distribution of missing loci at catalog loci.\n" << "# Absent samples at locus\tCount\n"; for (cnt_it = absent.begin(); cnt_it != absent.end(); cnt_it++) log_fh << cnt_it->first << "\t" << cnt_it->second << "\n"; return 0; } int tabulate_haplotypes(map &catalog, PopMap *pmap) { map::iterator it; vector::iterator hit; Datum **d; CSLocus *loc; double mean, cnt; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->locus(loc->id); mean = 0.0; cnt = 0.0; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; if (d[i]->obshap.size() > 1) loc->marker = "heterozygous"; mean += d[i]->lnl; cnt++; } if (loc->marker.length() > 0) { create_genotype_map(loc, pmap); call_population_genotypes(loc, pmap); } loc->lnl = mean / cnt; } return 0; } int merge_shared_cutsite_loci(map &catalog, PopMap *pmap, PopSum *psum, map > &merge_map, ofstream &log_fh) { map >::iterator it; CSLocus *cur, *next; Datum **d_1, **d_2; double prune_pct; uint unmergable, tot_loci, tot_samp; uint success = 0; uint failure = 0; uint overlap = 0; uint simple_merge_cnt = 0; uint complex_merge_cnt = 0; uint missing_samps_cnt = 0; uint phase_fail_cnt = 0; uint nomapping_cnt = 0; uint multimapping_cnt = 0; uint multifails_cnt = 0; tot_loci = pmap->loci_cnt(); set loci_to_destroy; map missing_samps_dist; cerr << "To merge adjacent loci at least " << merge_prune_lim * 100 << "% of samples must have both adjacent loci;" << " the remaining " << 100 - (merge_prune_lim * 100) << "% of individuals will be pruned.\n" << "Attempting to merge adjacent loci that share a cutsite..."; if (verbose) log_fh << "\n#\n# List of locus pairs that share a cutsite that failed to merge because they could not be phased.\n#\n"; // // Iterate over each chromosome. // for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { // // Iterate over each ordered locus on this chromosome. // next = it->second[0]; for (uint pos = 1; pos < it->second.size(); pos++) { cur = next; next = it->second[pos]; // // Do these two loci overlap? // +Must occur on opposite strands // +Must overlap according to the length of the cutsite. // if (((cur->loc.strand == minus && next->loc.strand == plus) && ((int) (cur->loc.bp - next->loc.bp + 1) == renz_olap[enz])) || ((cur->loc.strand == plus && next->loc.strand == minus) && ((int) (next->loc.bp - cur->loc.bp + 1) == renz_olap[enz]))) { overlap++; d_1 = pmap->locus(cur->id); d_2 = pmap->locus(next->id); unmergable = 0; tot_samp = 0; // // Check if all members of the population contain these two loci (or are missing both). // for (int i = 0; i < pmap->sample_cnt(); i++) { if (d_1[i] != NULL || d_2[i] != NULL) tot_samp++; if ((d_1[i] != NULL && d_2[i] == NULL) || (d_1[i] == NULL && d_2[i] != NULL)) unmergable++; } prune_pct = (double) (tot_samp - unmergable) / (double) tot_samp; // // If some of the individuals only have one locus and not the other, prune them out. // if (prune_pct < 1.0 && prune_pct >= merge_prune_lim) { for (int i = 0; i < pmap->sample_cnt(); i++) if (d_1[i] != NULL && d_2[i] == NULL) { delete d_1[i]; d_1[i] = NULL; } else if (d_1[i] == NULL && d_2[i] != NULL) { delete d_2[i]; d_2[i] = NULL; } } // // If possible, merge the two loci together. // if (prune_pct < merge_prune_lim) { int pct = (int) (prune_pct * 100); missing_samps_dist[pct]++; if (verbose) log_fh << "Missing samples, Sink Locus: " << cur->id << "; Source Locus: " << next->id << "; " << pct << "% present (" << 100 - pct << "% missing)\n"; missing_samps_cnt++; failure++; continue; } phaset res = merge_and_phase_loci(pmap, cur, next, loci_to_destroy, log_fh); switch(res) { case multiple_fails: if (verbose) log_fh << "Failed to phase, Sink Locus: " << cur->id << "; Source Locus: " << next->id << "; " << "multiple failures\n"; multifails_cnt++; phase_fail_cnt++; failure++; break; case multimapping_fail: if (verbose) log_fh << "Failed to phase, Sink Locus: " << cur->id << "; Source Locus: " << next->id << "; " << "multimapping in one or more individuals\n"; multimapping_cnt++; phase_fail_cnt++; failure++; break; case nomapping_fail: if (verbose) log_fh << "Failed to phase, Sink Locus: " << cur->id << "; Source Locus: " << next->id << "; " << "no mapping in one or more individuals\n"; nomapping_cnt++; phase_fail_cnt++; failure++; break; case complex_phase: if (verbose) log_fh << "Phased Sink Locus: " << cur->id << "; Source Locus: " << next->id << "; " << "a complex phasing operation.\n"; complex_merge_cnt++; success++; merge_map[cur->id] = make_pair(merge_sink, next->id); merge_map[next->id] = make_pair(merge_src, cur->id); break; case simple_merge: if (verbose) log_fh << "Phased Sink Locus: " << cur->id << "; Source Locus: " << next->id << "; " << "a simple merge operation.\n"; simple_merge_cnt++; success++; merge_map[cur->id] = make_pair(merge_sink, next->id); merge_map[next->id] = make_pair(merge_src, cur->id); break; default: cerr << "Warning: Merge failure.\n"; break; } } } } // // Remove those loci that have been merged from both the popualtion map and catalog. // set emptyset; pmap->prune(loci_to_destroy); reduce_catalog(catalog, emptyset, loci_to_destroy); cerr << "done.\n" << "Of " << tot_loci << " loci, " << overlap << " pairs share a cutsite; " << success << " pairs were merged; " << failure << " pairs failed to merge; " << pmap->loci_cnt() << " remaining loci.\n" << " Of those merged, " << simple_merge_cnt << " required only a simple merge without phasing; " << "while " << complex_merge_cnt << " required phasing.\n" << " Of those that failed to merge, " << missing_samps_cnt << " were missing one of the two haplotypes in one or more samples; " << "while " << phase_fail_cnt << " failed to be phased.\n" << " Of those that failed to phase, " << nomapping_cnt << " failed due to a lack of haplotype mappings; " << multimapping_cnt << " failed due to multiple haplotype mappings; " << multifails_cnt << " failed due to both.\n"; log_fh << "\n#\n# Merging adjacent loci with a shared restriction enzyme cutsite\n#\n" << "Of " << tot_loci << " loci, " << overlap << " pairs share a cutsite; " << success << " pairs were merged; " << failure << " pairs failed to merge; " << pmap->loci_cnt() << " remaining loci.\n" << " Of those merged, " << simple_merge_cnt << " required only a simple merge without phasing; " << "while " << complex_merge_cnt << " required phasing.\n" << " Of those that failed to merge, " << missing_samps_cnt << " were missing one of the two haplotypes in one or more samples; " << "while " << phase_fail_cnt << " failed to be phased.\n" << " Of those that failed to phase, " << nomapping_cnt << " failed due to a lack of haplotype mappings; " << multimapping_cnt << " failed due to multiple haplotype mappings; " << multifails_cnt << " failed due to both.\n"; log_fh << "#\n# Distribution of loci with samples missing one of two loci to be merged\n" << "# Percent samples with both loci present\tNumber of cases\n"; map::iterator mit; for (mit = missing_samps_dist.begin(); mit != missing_samps_dist.end(); mit++) log_fh << mit->first << "\t" << mit->second << "\n"; log_fh << "\n"; return 0; } phaset merge_and_phase_loci(PopMap *pmap, CSLocus *cur, CSLocus *next, set &loci_to_destroy, ofstream &log_fh) { Datum **d_1 = pmap->locus(cur->id); Datum **d_2 = pmap->locus(next->id); set phased_results; set phased_haplotypes; string merged_hap; char *h_1, *h_2; int merge_type; if (verbose) log_fh << "Attempting to phase source locus " << cur->id << " with sink locus " << next->id << "\n"; int sample_cnt = 0; int phased_sample_cnt = 0; // // Take a census of the already phased haplotypes. We have phased haplotypes // if for individual i: // 1. d_1 has a single haplotype and d_2 has a single haplotype // 2. d_1 has a single haplotpye and d_2 has multiple haplotypes // 3. d_1 has multiple haplotpyes and d_2 has a single haplotype // // If one or both of the loci have no SNPs, then the haplotype is // recorded as "consensus." Check that condition before we start merging. // if (cur->snps.size() > 0 && next->snps.size() > 0) merge_type = 0; else if (cur->snps.size() == 0) merge_type = 1; else if (next->snps.size() == 0) merge_type = 2; else merge_type = 3; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d_1[i] == NULL || d_2[i] == NULL) continue; else if (d_1[i]->obshap.size() > 1 && d_2[i]->obshap.size() > 1) continue; else { for (uint j = 0; j < d_1[i]->obshap.size(); j++) { for (uint k = 0; k < d_2[i]->obshap.size(); k++) { switch (merge_type) { case 0: merged_hap = string(d_1[i]->obshap[j]) + string(d_2[i]->obshap[k]); break; case 1: merged_hap = string(d_2[i]->obshap[k]); break; case 2: merged_hap = string(d_1[i]->obshap[j]); break; case 3: default: merged_hap = "consensus"; break; } phased_haplotypes.insert(merged_hap); // cerr << "Phasing: '" << d_1[i]->obshap[j] << "' + '" << d_2[i]->obshap[k] << "' => '" << merged_hap << "'\n"; } } phased_sample_cnt++; sample_cnt++; } } // // Indicate that these two loci had a simple merge, with no phasing necessary. // phased_results.insert(simple_merge); // // Now we need to check if we can phase the remaining haplotypes. // for (int i = 0; i < pmap->sample_cnt(); i++) { if (d_1[i] == NULL || d_2[i] == NULL) continue; else if (d_1[i]->obshap.size() > 1 && d_2[i]->obshap.size() > 1) { // cerr << "Attempting to phase individual " << i << ": " << d_1[i]->id << " / " << d_2[i]->id << "\n"; sample_cnt++; // // We should be able to find a sinlge phasing mapping for each haplotype from d_1 to d_2 // that includes all the haplotypes in these two loci. // vector > seen_phased; uint tot_obshap = d_1[i]->obshap.size() + d_2[i]->obshap.size(); uint phased_cnt = 0; for (uint j = 0; j < d_1[i]->obshap.size(); j++) { for (uint k = 0; k < d_2[i]->obshap.size(); k++) { // cerr << " " << d_1[i]->obshap[j] << " + " << d_2[i]->obshap[k]; // // Record each pair of haplotypes that has been seen phased previously. // if (phased_haplotypes.count(string(d_1[i]->obshap[j]) + string(d_2[i]->obshap[k]))) { seen_phased.push_back(make_pair(d_1[i]->obshap[j], d_2[i]->obshap[k])); // cerr << " => " << d_1[i]->obshap[j] << d_2[i]->obshap[k]; } // cerr << "\n"; } } // // Now, we will iterate over all sets of phased haplotypes and look // for combinations that use all four individual haplotypes. // for (uint j = 0; j < seen_phased.size(); j++) { for (uint k = j; k < seen_phased.size(); k++) { set incorporated_haplotypes; // // Count the number of distinct char pointers. If this combination // of haplotypes includes all unphased haplotypes, count it. // incorporated_haplotypes.insert(seen_phased[j].first); incorporated_haplotypes.insert(seen_phased[j].second); incorporated_haplotypes.insert(seen_phased[k].first); incorporated_haplotypes.insert(seen_phased[k].second); if (incorporated_haplotypes.size() == tot_obshap) phased_cnt++; } } // // If one pair of haplotypes is mapped, but the other is not, assume the second pair or // haplotypes must be phased by process of elimination. // if (phased_cnt == 0 && seen_phased.size() == 1) { h_1 = seen_phased[0].first == d_1[i]->obshap[1] ? d_1[i]->obshap[0] : d_1[i]->obshap[1]; h_2 = seen_phased[0].second == d_2[i]->obshap[1] ? d_2[i]->obshap[0] : d_2[i]->obshap[1]; phased_haplotypes.insert(string(h_1) + string(h_2)); phased_cnt++; // cerr << " Phasing: '" << hap_1 << "' + '" << hap_2 << "' => '" << string(hap_1) + string(hap_2) << "'\n"; } if (phased_cnt == 0) { phased_results.insert(nomapping_fail); if (verbose) log_fh << " Locus NOT phased in individual " << i << "; loci " << d_1[i]->id << " / " << d_2[i]->id << " no mapping found.\n"; } else if (phased_cnt == 1) { phased_sample_cnt++; phased_results.insert(complex_phase); } else { phased_results.insert(multimapping_fail); if (verbose) log_fh << " Locus NOT phased in individual " << i << "; loci " << d_1[i]->id << " / " << d_2[i]->id << " multiple mappings found.\n"; } } } if (phased_sample_cnt != sample_cnt) { if (phased_results.count(nomapping_fail) > 0 && phased_results.count(multimapping_fail) > 0) return multiple_fails; else if (phased_results.count(nomapping_fail) > 0) return nomapping_fail; else if (phased_results.count(multimapping_fail) > 0) return multimapping_fail; else { cerr << "WE SHOULD NOT GET HERE\n"; return merge_failure; } } // // Okay, merge these two loci together. // if (!merge_datums(pmap->sample_cnt(), cur->len, d_1, d_2, phased_haplotypes, merge_type)) return merge_failure; // // Merge the catalog entries together. // if (!merge_csloci(cur, next, phased_haplotypes)) return merge_failure; // // Mark the merged locus for destruction. // loci_to_destroy.insert(next->id); if (phased_results.count(complex_phase) > 0) return complex_phase; return simple_merge; } int merge_csloci(CSLocus *sink, CSLocus *src, set &phased_haplotypes) { // // We assume that we are merging two loci: one on the negative strand, one on the // positive. We will keep the sink cslocus and delete the src cslocus. // -> The sink cslocus is assumed to be on the negative strand. // // // 1. Reverse complement the SNP coordinates in the sink locus so that they are // enumerated on the positive strand. Complement the alleles as well. // for (uint j = 0; j < sink->snps.size(); j++) { sink->snps[j]->col = sink->len - sink->snps[j]->col - 1; sink->snps[j]->rank_1 = reverse(sink->snps[j]->rank_1); sink->snps[j]->rank_2 = reverse(sink->snps[j]->rank_2); sink->snps[j]->rank_3 = reverse(sink->snps[j]->rank_3); sink->snps[j]->rank_4 = reverse(sink->snps[j]->rank_4); } // // 2. Adjust the SNP coordinates in the src locus to account for the now, longer length. // for (uint j = 0; j < src->snps.size(); j++) src->snps[j]->col = sink->len + src->snps[j]->col - renz_olap[enz]; // // 3. Combine SNPs between the two catalog loci: add the SNPs from the sink (formerly on the // negative strand) in reverse order, followed by the SNPs from the src. // vector tmpsnp; for (int j = (int) sink->snps.size() - 1; j >= 0; j--) tmpsnp.push_back(sink->snps[j]); for (uint j = 0; j < src->snps.size(); j++) tmpsnp.push_back(src->snps[j]); sink->snps.clear(); for (uint j = 0; j < tmpsnp.size(); j++) sink->snps.push_back(tmpsnp[j]); // // 4. Adjust the genomic location of the sink locus. // uint bp = sink->sort_bp(); sink->loc.bp = bp; sink->loc.strand = plus; // // 5. Adjust the length of the sequence. // sink->len += src->len - renz_olap[enz]; // // 6. Merge the consensus sequence together. // char *new_con = rev_comp(sink->con); delete [] sink->con; sink->con = new_con; new_con = new char[sink->len + 1]; strcpy(new_con, sink->con); delete [] sink->con; sink->con = new_con; new_con += src->len - renz_olap[enz]; strcpy(new_con, src->con); // // 7. Record the now phased haplotypes. // sink->alleles.clear(); set::iterator it; for (it = phased_haplotypes.begin(); it != phased_haplotypes.end(); it++) sink->alleles[*it] = 0; // cerr << "CSLocus " << sink->id << ":\n" // << "Length: " << sink->len << "; Chr: " << sink->loc.chr << "; BP: " << sink->sort_bp() << "; strand: " << (sink->loc.strand == plus ? "+" : "-") << "\n" // << " SNPs:\n"; // for (uint j = 0; j < sink->snps.size(); j++) // cerr << " Col: " << sink->snps[j]->col // << " Rank 1: " << sink->snps[j]->rank_1 // << " Rank 2: " << sink->snps[j]->rank_2 << "\n"; // cerr << " Alleles:\n"; // map::iterator ait; // for (ait = sink->alleles.begin(); ait != sink->alleles.end(); ait++) // cerr << " " << ait->first << "\n"; return 1; } int merge_datums(int sample_cnt, int sink_locus_len, Datum **sink, Datum **src, set &phased_haplotypes, int merge_type) { char tmphap[id_len], *new_hap; uint haplen, model_len, offset; vector tmpsnp; vector tmpobshap; vector tmpobsdep; // // We assume that we are merging two loci: one on the negative strand, one on the // positive. We will keep the sink datum and delete the src datum. // -The sink datum is assumed to be on the negative strand. // for (int i = 0; i < sample_cnt; i++) { if (sink[i] == NULL && src[i] == NULL) continue; else if (sink[i] == NULL || src[i] == NULL) cerr << "Unexpected condition in merging datums: one datum is NULL while the other is not.\n"; // // 1. Reverse complement the SNP coordinates in the sink locus so that they are // enumerated on the positive strand. Complement the alleles as well. // for (uint j = 0; j < sink[i]->snps.size(); j++) { sink[i]->snps[j]->col = sink[i]->len - sink[i]->snps[j]->col - 1; sink[i]->snps[j]->rank_1 = reverse(sink[i]->snps[j]->rank_1); sink[i]->snps[j]->rank_2 = reverse(sink[i]->snps[j]->rank_2); sink[i]->snps[j]->rank_3 = reverse(sink[i]->snps[j]->rank_3); sink[i]->snps[j]->rank_4 = reverse(sink[i]->snps[j]->rank_4); } // // 2. Adjust the SNP coordinates in the src locus to account for the now, longer length. // for (uint j = 0; j < src[i]->snps.size(); j++) src[i]->snps[j]->col = sink[i]->len + src[i]->snps[j]->col - renz_olap[enz]; // // 3. Reverse complement the observed haplotypes in the sink locus. // haplen = strlen(sink[i]->obshap[0]); for (uint j = 0; j < sink[i]->obshap.size(); j++) { for (uint k = 0; k < haplen; k++) tmphap[k] = reverse(sink[i]->obshap[j][haplen - k - 1]); tmphap[haplen] = '\0'; strcpy(sink[i]->obshap[j], tmphap); } // // 4. Combine SNPs between the two datums: add the SNPs from the sink (formerly on the // negative strand) in reverse order, followed by the SNPs from the src. // tmpsnp.clear(); for (int j = (int) sink[i]->snps.size() - 1; j >= 0; j--) tmpsnp.push_back(sink[i]->snps[j]); for (uint j = 0; j < src[i]->snps.size(); j++) tmpsnp.push_back(src[i]->snps[j]); sink[i]->snps.clear(); for (uint j = 0; j < tmpsnp.size(); j++) sink[i]->snps.push_back(tmpsnp[j]); } // // 5. Combine observed haplotypes between the two datums while phasing them. // 5.1 First combine the haplotypes from samples that are already in phase. // string merged_hap; vector to_be_phased; phased_haplotypes.clear(); for (int i = 0; i < sample_cnt; i++) { if (sink[i] == NULL && src[i] == NULL) continue; if (sink[i]->obshap.size() > 1 && src[i]->obshap.size() > 1) { to_be_phased.push_back(i); continue; } else { tmpobshap.clear(); tmpobsdep.clear(); for (uint j = 0; j < sink[i]->obshap.size(); j++) { for (uint k = 0; k < src[i]->obshap.size(); k++) { switch (merge_type) { case 0: merged_hap = string(sink[i]->obshap[j]) + string(src[i]->obshap[k]); break; case 1: merged_hap = string(src[i]->obshap[j]); break; case 2: merged_hap = string(sink[i]->obshap[j]); break; case 3: default: merged_hap = "consensus"; break; } phased_haplotypes.insert(merged_hap); tmpobshap.push_back(merged_hap); tmpobsdep.push_back((sink[i]->depth[j] + src[i]->depth[k]) / 2); } } sink[i]->depth.clear(); for (uint j = 0; j < sink[i]->obshap.size(); j++) delete [] sink[i]->obshap[j]; sink[i]->obshap.clear(); for (uint j = 0; j < tmpobshap.size(); j++) { new_hap = new char[tmpobshap[j].length() + 1]; strcpy(new_hap, tmpobshap[j].c_str()); sink[i]->obshap.push_back(new_hap); sink[i]->depth.push_back(tmpobsdep[j]); } } } // // 5.2 Phase and combine the haplotypes from the remaining samples. // int index; for (uint i = 0; i < to_be_phased.size(); i++) { index = to_be_phased[i]; tmpobshap.clear(); tmpobsdep.clear(); vector > seen_phased; uint tot_obshap = sink[index]->obshap.size() + src[index]->obshap.size(); for (uint j = 0; j < sink[index]->obshap.size(); j++) { for (uint k = 0; k < src[index]->obshap.size(); k++) { if (phased_haplotypes.count(string(sink[index]->obshap[j]) + string(src[index]->obshap[k]))) seen_phased.push_back(make_pair(sink[index]->obshap[j], src[index]->obshap[k])); } } for (uint j = 0; j < seen_phased.size(); j++) { for (uint k = j; k < seen_phased.size(); k++) { set incorporated_haplotypes; incorporated_haplotypes.insert(seen_phased[j].first); incorporated_haplotypes.insert(seen_phased[j].second); incorporated_haplotypes.insert(seen_phased[k].first); incorporated_haplotypes.insert(seen_phased[k].second); if (incorporated_haplotypes.size() == tot_obshap) { tmpobshap.push_back(string(seen_phased[j].first) + string(seen_phased[j].second)); tmpobshap.push_back(string(seen_phased[k].first) + string(seen_phased[k].second)); //tmpobsdep.push_back((sink[index]->depth[j] + src[index]->depth[k]) / 2); } } } sink[index]->depth.clear(); for (uint j = 0; j < sink[index]->obshap.size(); j++) delete [] sink[index]->obshap[j]; sink[index]->obshap.clear(); for (uint j = 0; j < tmpobshap.size(); j++) { new_hap = new char[tmpobshap[j].length() + 1]; strcpy(new_hap, tmpobshap[j].c_str()); sink[index]->obshap.push_back(new_hap); // sink[index]->depth.push_back(tmpobsdep[j]); } } // // 6. Merge model calls; Set the length; combine the two depth and lnl measures together. // string model_calls; char *p; for (int i = 0; i < sample_cnt; i++) { if (sink[i] == NULL && src[i] == NULL) continue; // // Merge the two strings of model calls together. // We need to check if the locus for this individual is shorter than the catalog // locus. If so, we need to expand out the model call array to be the proper length. // reverse_string(sink[i]->model); offset = 0; model_calls.clear(); if (sink_locus_len > sink[i]->len) { offset = sink_locus_len - sink[i]->len; model_calls.assign(offset, 'N'); } model_len = offset + sink[i]->len + src[i]->len - renz_olap[enz]; model_calls.append(sink[i]->model); delete [] sink[i]->model; sink[i]->model = new char[model_len + 1]; strcpy(sink[i]->model, model_calls.c_str()); p = sink[i]->model; p += offset + sink[i]->len - renz_olap[enz]; strcpy(p, src[i]->model); sink[i]->len = model_len; sink[i]->tot_depth = (sink[i]->tot_depth + src[i]->tot_depth) / 2; sink[i]->lnl = (sink[i]->lnl + src[i]->lnl) / 2.0; // // Record which datum was merged into this one. // sink[i]->merge_partner = src[i]->id; } return 1; } int datum_adjust_snp_positions(map > &merge_map, CSLocus *loc, Datum *datum, map &snpres) { // // We will start with the 'sink' locus, which was originally on the negative strand: // 1. If the locus was shorter than the catalog locus, pad the difference. // 2. Convert to positive strand: Reverse the order, complement the alleles, // alter the internal column position. // SNP *snp; SNPRes *snpr = snpres[datum->id]; int index = 0; int stop_pos = renz_olap[enz] - 1; // // We know the catalog was padded since we already padded hte model call string // if it was necessary when originally merging. // while (datum->model[index] == 'N') { snp = new SNP; snp->col = index; snp->lratio = 0.0; snp->rank_1 = 'N'; snp->type = snp_type_unk; datum->snps.push_back(snp); index++; } for (int j = snpr->snps.size() - 1; j > stop_pos; j--) { snp = new SNP; snp->col = index; snp->lratio = snpr->snps[j]->lratio; snp->rank_1 = reverse(snpr->snps[j]->rank_1); snp->rank_2 = reverse(snpr->snps[j]->rank_2); snp->rank_3 = reverse(snpr->snps[j]->rank_3); snp->rank_4 = reverse(snpr->snps[j]->rank_4); datum->snps.push_back(snp); index++; } // // Now we fetch the former locus, the 'src', which was originally on the positive strand. // All we have to do is adjust the column position of each SNP. // snpr = snpres[datum->merge_partner]; for (uint j = 0; j < snpres[datum->id]->snps.size(); j++) { snp = new SNP; snp->col = index; snp->lratio = snpr->snps[j]->lratio; snp->rank_1 = snpr->snps[j]->rank_1; snp->rank_2 = snpr->snps[j]->rank_2; snp->rank_3 = snpr->snps[j]->rank_3; snp->rank_4 = snpr->snps[j]->rank_4; datum->snps.push_back(snp); index++; } return 0; } int create_genotype_map(CSLocus *locus, PopMap *pmap) { // // Create a genotype map. For any set of haplotypes, this routine will // assign each haplotype to a genotype, e.g. given the haplotypes // 'AC' and 'GT' in the population, this routine will assign 'AC' == 'a' // and 'GT' == 'b'. If an individual is homozygous for 'AC', they will be // assigned an 'aa' genotype. // //cerr << "Creating genotype map for catalog ID " << locus->id << ", marker: " << locus->marker << ".\n"; char gtypes[26] ={'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; Datum **d; map haplotypes; map::iterator k; vector > sorted_haplotypes; d = pmap->locus(locus->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] != NULL) for (uint n = 0; n < d[i]->obshap.size(); n++) haplotypes[d[i]->obshap[n]]++; } // // Check that there are not more haplotypes than we have encodings. // if (haplotypes.size() > 26) return 0; // // Sort the haplotypes map by value // for (k = haplotypes.begin(); k != haplotypes.end(); k++) sorted_haplotypes.push_back(*k); sort(sorted_haplotypes.begin(), sorted_haplotypes.end(), hap_compare); for (uint n = 0, index = 0; n < sorted_haplotypes.size() && index <= 26; n++, index++) { locus->gmap[sorted_haplotypes[n].first] = gtypes[index]; //cerr << "GMAP: " << sorted_haplotypes[n].first << " == " << gtypes[index] << "\n"; } return 0; } int call_population_genotypes(CSLocus *locus, PopMap *pmap) { // // Fetch the array of observed haplotypes from the population // Datum **d = pmap->locus(locus->id); for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; vector gtypes; string gtype; //cerr << "Sample Id: " << pmap->rev_sample_index(i) << "\n"; for (uint j = 0; j < d[i]->obshap.size(); j++) { // // Impossible allele encountered. // if (locus->gmap.count(d[i]->obshap[j]) == 0) { gtypes.clear(); gtypes.push_back("-"); goto impossible; } gtypes.push_back(locus->gmap[d[i]->obshap[j]]); //cerr << " Observed Haplotype: " << d[i]->obshap[j] << ", Genotype: " << locus->gmap[d[i]->obshap[j]] << "\n"; } impossible: sort(gtypes.begin(), gtypes.end()); for (uint j = 0; j < gtypes.size(); j++) { gtype += gtypes[j]; //cerr << " Adding genotype to string: " << gtypes[j] << "; " << gtype << "\n"; } string m = gtype.length() == 1 ? gtype + gtype : gtype; d[i]->gtype = new char[m.length() + 1]; strcpy(d[i]->gtype, m.c_str()); if (m != "-") locus->gcnt++; //cerr << "Assigning datum, marker: " << locus->marker << ", string: " << m << ", haplotype: " << d[i]->obshap[0] << ", gtype: " << gtype << "\n"; } return 0; } int tally_haplotype_freq(CSLocus *locus, PopMap *pmap, int &total, double &max, string &freq_str) { map freq; Datum **d = pmap->locus(locus->id); total = 0; max = 0; //cerr << "Examining marker: " << locus->id << "\n"; for (int i = 0; i < pmap->sample_cnt(); i++) { if (d[i] == NULL) continue; //cerr << " Sample: " << i << "; Haplotype: " << d[i]->obshap[0] << "; Genotype: " << d[i]->gtype << "\n"; if (d[i]->gtype[0] != '-') { freq[d[i]->gtype]++; total++; } } if (total == 0) return 0; double frac; stringstream s; char f[id_len]; map::iterator it; for (it = freq.begin(); it != freq.end(); it++) { frac = (double) it->second / (double) total * 100; if (frac > max) max = frac; sprintf(f, "(%0.1f%%);", frac); s << it->first << ":" << it->second << f; } freq_str = s.str(); return 0; } int write_genomic(map &catalog, PopMap *pmap) { stringstream pop_name; pop_name << "batch_" << batch_id << ".genomic.tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening genomic output file '" << file << "'\n"; exit(1); } // // Count the number of markers that have enough samples to output. // map::iterator cit; CSLocus *loc; int num_loci = 0; for (cit = catalog.begin(); cit != catalog.end(); cit++) { loc = cit->second; num_loci += loc->len - renz_len[enz]; } cerr << "Writing " << num_loci << " nucleotide positions to genomic file, '" << file << "'\n"; // // Write the header // fh << num_loci << "\t" << pmap->sample_cnt() << "\n"; // // Output each locus. // map >::iterator it; int a, b; uint rcnt = enz.length() ? renz_cnt[enz] : 0; uint rlen = enz.length() ? renz_len[enz] : 0; char *p; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint i = 0; i < it->second.size(); i++) { loc = it->second[i]; Datum **d = pmap->locus(loc->id); set snp_locs; string obshap; for (uint i = 0; i < loc->snps.size(); i++) snp_locs.insert(loc->snps[i]->col); uint start = 0; uint end = loc->len; // // Check for the existence of the restriction enzyme cut site, mask off // its output. // for (uint n = 0; n < rcnt; n++) if (strncmp(loc->con, renz[enz][n], rlen) == 0) start += renz_len[enz]; if (start == 0) { p = loc->con + (loc->len - rlen); for (uint n = rcnt; n < rcnt + rcnt; n++) if (strncmp(p, renz[enz][n], rlen) == 0) end -= renz_len[enz]; } uint k = 0; for (uint n = start; n < end; n++) { fh << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp(n); if (snp_locs.count(n) == 0) { for (int j = 0; j < pmap->sample_cnt(); j++) { a = encode_gtype(loc->con[n]); fh << "\t" << encoded_gtypes[a][a]; } } else { for (int j = 0; j < pmap->sample_cnt(); j++) { fh << "\t"; if (d[j] == NULL) fh << "0"; else switch (d[j]->obshap.size()) { case 1: a = encode_gtype(d[j]->obshap[0][k]); fh << encoded_gtypes[a][a]; break; case 2: a = encode_gtype(d[j]->obshap[0][k]); b = encode_gtype(d[j]->obshap[1][k]); fh << encoded_gtypes[a][b]; break; default: fh << "0"; break; } } k++; } fh << "\n"; } } } fh.close(); return 0; } int calculate_haplotype_stats(vector > &files, map > &pop_indexes, map &catalog, PopMap *pmap, PopSum *psum) { map >::iterator it; CSLocus *loc; Datum **d; LocStat *l; // // Instantiate the kernel smoothing and bootstrap objects if requested. // KSmooth *ks; OHaplotypes *ord; Bootstrap *bs; if (kernel_smoothed && loci_ordered) { ks = new KSmooth(2); ord = new OHaplotypes(); } // // Open output file and print header. // stringstream pop_name; pop_name << "batch_" << batch_id << ".hapstats" << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening haplotype stats file '" << file << "'\n"; exit(1); } fh.precision(fieldw); fh.setf(std::ios::fixed); map >::iterator pit; int start, end, pop_id; // // Write the population members. // for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start = pit->second.first; end = pit->second.second; fh << "# " << pop_key[pit->first] << "\t"; for (int i = start; i <= end; i++) { fh << files[i].second; if (i < end) fh << ","; } fh << "\n"; } fh << "# Batch ID " << "\t" << "Locus ID" << "\t" << "Chr" << "\t" << "BP" << "\t" << "Pop ID" << "\t" << "N" << "\t" << "Haplotype Cnt" << "\t" << "Gene Diversity" << "\t" << "Smoothed Gene Diversity" << "\t" << "Smoothed Gene Diversity P-value" << "\t" << "Haplotype Diversity" << "\t" << "Smoothed Haplotype Diversity" << "\t" << "Smoothed Haplotype Diversity P-value" << "\t" << "Haplotypes" << "\n"; // // Iterate over the members of each population. // for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start = pit->second.first; end = pit->second.second; pop_id = pit->first; cerr << "Generating haplotype-level summary statistics for population '" << pop_key[pop_id] << "'\n"; map > genome_locstats; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { if (bootstrap_div) bs = new Bootstrap(2); vector &locstats = genome_locstats[it->first]; map locstats_key; ord->order(locstats, locstats_key, it->second); for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; d = pmap->locus(loc->id); if (loc->snps.size() == 0) continue; // cerr << "Looking at locus " << loc->id << "\n"; l = haplotype_diversity(start, end, d); if (l != NULL) { l->loc_id = loc->id; l->bp = loc->sort_bp(); locstats[locstats_key[l->bp]] = l; } } if (kernel_smoothed && loci_ordered) { cerr << " Generating kernel-smoothed statistics on chromosome " << it->first << "\n"; ks->smooth(locstats); } if (bootstrap_div) bs->add_data(locstats); } for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &locstats = genome_locstats[it->first]; if (bootstrap_div) bs->execute(locstats); // // Write results. // for (uint k = 0; k < locstats.size(); k++) { l = locstats[k]; if (l == NULL) continue; fh << batch_id << "\t" << l->loc_id << "\t" << it->first << "\t" << l->bp + 1 << "\t" << pop_key[pop_id] << "\t" << (int) l->alleles << "\t" << l->hap_cnt << "\t" << l->stat[0] << "\t" << l->smoothed[0] << "\t" << l->bs[0] << "\t" << l->stat[1] << "\t" << l->smoothed[1] << "\t" << l->bs[1] << "\t" << l->hap_str << "\n"; } for (uint k = 0; k < locstats.size(); k++) delete locstats[k]; } if (bootstrap_div) delete bs; } if (kernel_smoothed && loci_ordered) { delete ks; delete ord; } fh.close(); return 0; } int nuc_substitution_dist(map &hap_index, double **hdists) { vector haplotypes; map::iterator it; uint i, j; for (it = hap_index.begin(); it != hap_index.end(); it++) haplotypes.push_back(it->first); const char *p, *q; double dist; for (i = 0; i < haplotypes.size(); i++) { for (j = i; j < haplotypes.size(); j++) { dist = 0.0; p = haplotypes[i].c_str(); q = haplotypes[j].c_str(); while (*p != '\0' && *q != '\0') { if (*p != *q) dist++; p++; q++; } hdists[i][j] = dist; hdists[j][i] = dist; } } // // // // Print the distance matrix. // // // cerr << " "; // for (hit = loc_hap_index.begin(); hit != loc_hap_index.end(); hit++) // cerr << "\t" << hit->first; // cerr << "\n"; // for (hit = loc_hap_index.begin(); hit != loc_hap_index.end(); hit++) { // cerr << " " << hit->first; // for (hit_2 = loc_hap_index.begin(); hit_2 != loc_hap_index.end(); hit_2++) // cerr << "\t" << hdists[hit->second][hit_2->second]; // cerr << "\n"; // } // cerr << "\n"; return 0; } int nuc_substitution_identity(map &hap_index, double **hdists) { vector haplotypes; map::iterator it; uint i, j; for (it = hap_index.begin(); it != hap_index.end(); it++) haplotypes.push_back(it->first); double dist; for (i = 0; i < haplotypes.size(); i++) { for (j = i; j < haplotypes.size(); j++) { if (haplotypes[i] == haplotypes[j]) dist = 0.0; else dist = 1.0; hdists[i][j] = dist; hdists[j][i] = dist; } } return 0; } int nuc_substitution_identity_max(map &hap_index, double **hdists) { vector haplotypes; map::iterator it; uint i, j; for (it = hap_index.begin(); it != hap_index.end(); it++) haplotypes.push_back(it->first); for (i = 0; i < haplotypes.size(); i++) { for (j = i; j < haplotypes.size(); j++) { hdists[i][j] = 1.0; hdists[j][i] = 1.0; } } return 0; } int calculate_haplotype_divergence(vector > &files, map > &pop_indexes, map > &master_grp_members, map &catalog, PopMap *pmap, PopSum *psum) { map >::iterator it; if (bootstrap_phist) cerr << "Calculating halotype F statistics across all populations/groups and bootstrap resampling...\n"; else cerr << "Calculating haplotype F statistics across all populations/groups...\n"; // // Create a list of all the groups we have. // map >::iterator git; map pop_grp_key; for (git = master_grp_members.begin(); git != master_grp_members.end(); git++) for (uint i = 0; i < git->second.size(); i++) pop_grp_key[git->second[i]] = git->first; // // Create a list of all the populations we have. // vector pop_ids; map >::iterator pit; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) pop_ids.push_back(pit->first); // // Instantiate the kernel smoothing object and associated ordering object if requested. // KSmooth *ks; OHaplotypes *ord; Bootstrap *bs; if (kernel_smoothed && loci_ordered) { ks = new KSmooth(5); ord = new OHaplotypes(); } if (bootstrap_phist) bs = new Bootstrap(5); map > genome_hapstats; uint cnt = 0; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { string chr = it->first; cerr << " Generating haplotype F statistics for " << chr << "..."; map hapstats_key; vector &hapstats = genome_hapstats[chr]; ord->order(hapstats, hapstats_key, it->second); #pragma omp parallel { CSLocus *loc; LocSum **s; Datum **d; HapStat *h; #pragma omp for schedule(dynamic, 1) reduction(+:cnt) for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); d = pmap->locus(loc->id); if (loc->snps.size() == 0) continue; // // If this locus only appears in one population or there is only a single haplotype, // do not calculate haplotype F stats. // if (fixed_locus(pop_indexes, d, pop_ids)) continue; cnt++; // cerr << "Processing locus " << loc->id << "\n"; h = haplotype_amova(pop_grp_key, pop_indexes, d, s, pop_ids); if (h != NULL) { h->stat[4] = haplotype_d_est(pop_indexes, d, s, pop_ids); h->loc_id = loc->id; h->bp = loc->sort_bp(); hapstats[hapstats_key[h->bp]] = h; } } } if (bootstrap_phist) bs->add_data(hapstats); cerr << "done.\n"; // // Calculate kernel-smoothed Fst values. // if (kernel_smoothed && loci_ordered) { cerr << " Generating kernel-smoothed haplotype F statistics for " << it->first << "..."; ks->smooth(hapstats); cerr << "done.\n"; } } if (bootstrap_phist) { for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) bs->execute(genome_hapstats[it->first]); } cerr << "done.\n"; if (kernel_smoothed && loci_ordered) { delete ks; delete ord; } if (bootstrap_phist) delete bs; cerr << "Writing haplotype F statistics... "; stringstream pop_name; pop_name << "batch_" << batch_id << ".phistats" << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening haplotype Phi_st file '" << file << "'\n"; exit(1); } fh.precision(fieldw); fh.setf(std::ios::fixed); // // Write the population members. // int start, end; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start = pit->second.first; end = pit->second.second; fh << "# Population " << pop_key[pit->first] << "\t"; for (int k = start; k <= end; k++) { fh << files[k].second; if (k < end) fh << ","; } fh << "\n"; } // // Write the group members. // for (git = grp_members.begin(); git != grp_members.end(); git++) { end = git->second.size(); fh << "# Group " << grp_key[git->first] << "\t"; for (int k = 0; k < end; k++) { fh << pop_key[git->second[k]]; if (k < end - 1) fh << ","; } fh << "\n"; } fh << "# Batch ID " << "\t" << "Locus ID" << "\t" << "Chr" << "\t" << "BP" << "\t" << "PopCnt" << "\t"; if (log_fst_comp) fh << "SSD(WP)" << "\t" << "SSD(AP/WG)" << "\t" << "SSD(AG)" << "\t" << "SSD(TOTAL)" << "\t" << "MSD(WP)" << "\t" << "MSD(AP/WG)" << "\t" << "MSD(AG)" << "\t" << "MSD(TOTAL)" << "\t" << "n" << "\t" << "n'" << "\t" << "n''" << "\t" << "Sigma2_a" << "\t" << "Sigma2_b" << "\t" << "Sigma2_c" << "\t" << "Sigma_Total" << "\t"; fh << "phi_st" << "\t" << "Smoothed Phi_st" << "\t" << "Smoothed Phi_st P-value" << "\t" << "Phi_ct" << "\t" << "Smoothed Phi_ct" << "\t" << "Smoothed Phi_ct P-value" << "\t" << "Phi_sc" << "\t" << "Smoothed Phi_sc" << "\t" << "Smoothed Phi_sc P-value" << "\t" << "Fst'" << "\t" << "Smoothed Fst'" << "\t" << "Smoothed Fst' P-value" << "\t" << "D_est" << "\t" << "Smoothed D_est" << "\t" << "Smoothed D_est P-value" << "\n"; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { string chr = it->first; vector &hapstats = genome_hapstats[chr]; for (uint k = 0; k < hapstats.size(); k++) { if (hapstats[k] == NULL) continue; fh << batch_id << "\t" << hapstats[k]->loc_id << "\t" << chr << "\t" << hapstats[k]->bp << "\t" << hapstats[k]->popcnt << "\t"; if (log_fst_comp) fh << hapstats[k]->comp[0] << "\t" << hapstats[k]->comp[1] << "\t" << hapstats[k]->comp[2] << "\t" << hapstats[k]->comp[3] << "\t" << hapstats[k]->comp[4] << "\t" << hapstats[k]->comp[5] << "\t" << hapstats[k]->comp[6] << "\t" << hapstats[k]->comp[7] << "\t" << hapstats[k]->comp[8] << "\t" << hapstats[k]->comp[9] << "\t" << hapstats[k]->comp[10] << "\t" << hapstats[k]->comp[11] << "\t" << hapstats[k]->comp[12] << "\t" << hapstats[k]->comp[13] << "\t" << hapstats[k]->comp[14] << "\t"; fh << hapstats[k]->stat[0] << "\t" << hapstats[k]->smoothed[0] << "\t" << hapstats[k]->bs[0] << "\t" << hapstats[k]->stat[1] << "\t" << hapstats[k]->smoothed[1] << "\t" << hapstats[k]->bs[1] << "\t" << hapstats[k]->stat[2] << "\t" << hapstats[k]->smoothed[2] << "\t" << hapstats[k]->bs[2] << "\t" << hapstats[k]->stat[3] << "\t" << hapstats[k]->smoothed[3] << "\t" << hapstats[k]->bs[3] << "\t" << hapstats[k]->stat[4] << "\t" << hapstats[k]->smoothed[4] << "\t" << hapstats[k]->bs[4] << "\n"; delete hapstats[k]; } } fh.close(); cerr << "wrote " << cnt << " loci to haplotype Phi_st file, '" << file << "'\n"; return 0; } int calculate_haplotype_divergence_pairwise(vector > &files, map > &pop_indexes, map > &master_grp_members, map &catalog, PopMap *pmap, PopSum *psum) { map >::iterator it; if (bootstrap_phist) cerr << "Calculating pairwise halotype F statistics and bootstrap resampling...\n"; else cerr << "Calculating pairwise haplotype F statistics...\n"; // // Assign all individuals to one group for the pairwise calculations. // map >::iterator git; map pop_grp_key; for (git = master_grp_members.begin(); git != master_grp_members.end(); git++) for (uint i = 0; i < git->second.size(); i++) pop_grp_key[git->second[i]] = 1; map >::iterator pit; vector pop_ids; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) pop_ids.push_back(pit->first); // // Instantiate the kernel smoothing object if requested. // KSmooth *ks; OHaplotypes *ord; Bootstrap *bs; if (kernel_smoothed && loci_ordered) { ks = new KSmooth(5); ord = new OHaplotypes(); } for (uint i = 0; i < pop_ids.size(); i++) { for (uint j = i + 1; j < pop_ids.size(); j++) { if (bootstrap_phist) bs = new Bootstrap(5); map > genome_hapstats; vector subpop_ids; subpop_ids.push_back(pop_ids[i]); subpop_ids.push_back(pop_ids[j]); cerr << " Processing populations '" << pop_key[pop_ids[i]] << "' and '" << pop_key[pop_ids[j]] << "'\n"; uint cnt = 0; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { string chr = it->first; cerr << " Generating pairwise haplotype F statistics for " << chr << "..."; map hapstats_key; vector &hapstats = genome_hapstats[chr]; ord->order(hapstats, hapstats_key, it->second); #pragma omp parallel { CSLocus *loc; LocSum **s; Datum **d; HapStat *h; #pragma omp for schedule(dynamic, 1) reduction(+:cnt) for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); d = pmap->locus(loc->id); if (loc->snps.size() == 0) continue; // // If this locus only appears in one population or there is only a single haplotype, // do not calculate haplotype F stats. // if (fixed_locus(pop_indexes, d, subpop_ids)) continue; cnt++; // cerr << "Processing locus " << loc->id << "\n"; h = haplotype_amova(pop_grp_key, pop_indexes, d, s, subpop_ids); if (h != NULL) { h->stat[4] = haplotype_d_est(pop_indexes, d, s, subpop_ids); h->loc_id = loc->id; h->bp = loc->sort_bp(); hapstats[hapstats_key[h->bp]] = h; } } } if (bootstrap_phist) bs->add_data(hapstats); cerr << "done.\n"; // // Calculate kernel-smoothed Fst values. // if (kernel_smoothed && loci_ordered) { cerr << " Generating kernel-smoothed Phi_st for " << it->first << "..."; ks->smooth(hapstats); cerr << "done.\n"; } } if (bootstrap_phist) { for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) bs->execute(genome_hapstats[it->first]); } cerr << "done.\n"; if (bootstrap_phist) delete bs; cerr << "Writing haplotype F statistics... "; stringstream pop_name; pop_name << "batch_" << batch_id << ".phistats_" << pop_key[pop_ids[i]] << "-" << pop_key[pop_ids[j]] << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening haplotype Phi_st file '" << file << "'\n"; exit(1); } fh.precision(fieldw); fh.setf(std::ios::fixed); // // Write the population members. // int start, end; for (uint k = 0; k < subpop_ids.size(); k++) { start = pop_indexes[subpop_ids[k]].first; end = pop_indexes[subpop_ids[k]].second; fh << "# Population " << pop_key[subpop_ids[k]] << "\t"; for (int n = start; n <= end; n++) { fh << files[n].second; if (n < end) fh << ","; } fh << "\n"; } fh << "# Batch ID " << "\t" << "Locus ID" << "\t" << "Pop 1 ID" << "\t" << "Pop 2 ID" << "\t" << "Chr" << "\t" << "BP" << "\t"; if (log_fst_comp) fh << "SSD(WP)" << "\t" << "SSD(AP/WG)" << "\t" << "SSD(AG)" << "\t" << "SSD(TOTAL)" << "\t" << "MSD(WP)" << "\t" << "MSD(AP/WG)" << "\t" << "MSD(AG)" << "\t" << "MSD(TOTAL)" << "\t" << "n" << "\t" << "n'" << "\t" << "n''" << "\t" << "Sigma2_a" << "\t" << "Sigma2_b" << "\t" << "Sigma2_c" << "\t" << "Sigma_Total" << "\t"; fh << "phi_st" << "\t" << "Smoothed Phi_st" << "\t" << "Smoothed Phi_st P-value" << "\t" << "Fst'" << "\t" << "Smoothed Fst'" << "\t" << "Smoothed Fst' P-value" << "\t" << "D_est" << "\t" << "Smoothed D_est" << "\t" << "Smoothed D_est P-value" << "\n"; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { string chr = it->first; vector &hapstats = genome_hapstats[chr]; for (uint k = 0; k < hapstats.size(); k++) { if (hapstats[k] == NULL) continue; fh << batch_id << "\t" << hapstats[k]->loc_id << "\t" << pop_key[pop_ids[i]] << "\t" << pop_key[pop_ids[j]] << "\t" << chr << "\t" << hapstats[k]->bp << "\t"; if (log_fst_comp) fh << hapstats[k]->comp[0] << "\t" << hapstats[k]->comp[1] << "\t" << hapstats[k]->comp[2] << "\t" << hapstats[k]->comp[3] << "\t" << hapstats[k]->comp[4] << "\t" << hapstats[k]->comp[5] << "\t" << hapstats[k]->comp[6] << "\t" << hapstats[k]->comp[7] << "\t" << hapstats[k]->comp[8] << "\t" << hapstats[k]->comp[9] << "\t" << hapstats[k]->comp[10] << "\t" << hapstats[k]->comp[11] << "\t" << hapstats[k]->comp[12] << "\t" << hapstats[k]->comp[13] << "\t" << hapstats[k]->comp[14] << "\t"; fh << hapstats[k]->stat[0] << "\t" << hapstats[k]->smoothed[0] << "\t" << hapstats[k]->bs[0] << "\t" << hapstats[k]->stat[3] << "\t" << hapstats[k]->smoothed[3] << "\t" << hapstats[k]->bs[3] << "\t" << hapstats[k]->stat[4] << "\t" << hapstats[k]->smoothed[4] << "\t" << hapstats[k]->bs[4] << "\n"; delete hapstats[k]; } } fh.close(); cerr << "wrote " << cnt << " loci to pairwise haplotype file, '" << file << "'\n"; } } if (kernel_smoothed && loci_ordered) { delete ks; delete ord; } return 0; } bool fixed_locus(map > &pop_indexes, Datum **d, vector &pop_ids) { set loc_haplotypes; map > pop_haplotypes; int start, end, pop_id; int pop_cnt = pop_ids.size(); for (int p = 0; p < pop_cnt; p++) { start = pop_indexes[pop_ids[p]].first; end = pop_indexes[pop_ids[p]].second; pop_id = pop_ids[p]; for (int i = start; i <= end; i++) { if (d[i] == NULL) continue; if (d[i]->obshap.size() > 2) { continue; } else if (d[i]->obshap.size() == 1) { if (!uncalled_haplotype(d[i]->obshap[0])) { loc_haplotypes.insert(d[i]->obshap[0]); pop_haplotypes[pop_id].push_back(d[i]->obshap[0]); pop_haplotypes[pop_id].push_back(d[i]->obshap[0]); } } else { for (uint j = 0; j < d[i]->obshap.size(); j++) { if (!uncalled_haplotype(d[i]->obshap[0])) { loc_haplotypes.insert(d[i]->obshap[j]); pop_haplotypes[pop_id].push_back(d[i]->obshap[j]); } } } } } uint valid_pops = 0; for (int p = 0; p < pop_cnt; p++) { pop_id = pop_ids[p]; if (pop_haplotypes[pop_id].size() > 0) valid_pops++; } // // Check that more than one population has data for this locus. // if (valid_pops <= 1) return true; // // Check that there is more than one haplotype at this locus. // if (loc_haplotypes.size() == 1) return true; return false; } inline bool uncalled_haplotype(const char *haplotype) { for (const char *p = haplotype; *p != '\0'; p++) if (*p == 'N' || *p == 'n') return true; return false; } inline double count_haplotypes_at_locus(int start, int end, Datum **d, map &hap_cnts) { double n = 0.0; for (int i = start; i <= end; i++) { if (d[i] == NULL) continue; if (d[i]->obshap.size() > 2) { continue; } else if (d[i]->obshap.size() == 1) { if(!uncalled_haplotype(d[i]->obshap[0])) { n += 2; hap_cnts[d[i]->obshap[0]] += 2; } } else { for (uint j = 0; j < d[i]->obshap.size(); j++) { if(!uncalled_haplotype(d[i]->obshap[0])) { n++; hap_cnts[d[i]->obshap[j]]++; } } } } return n; } LocStat * haplotype_diversity(int start, int end, Datum **d) { map::iterator hit; vector haplotypes; map hap_freq; map hap_index; double n = 0.0; double gene_diversity = 0.0; double hapl_diversity = 0.0; LocStat *lstat; // // Tabulate the haplotypes in this population. // n = count_haplotypes_at_locus(start, end, d, hap_freq); // cerr << " " << n << " total haplotypes observed.\n"; // // If this haplotype is fixed, don't calculate any statistics. // if (n == 0) return NULL; lstat = new LocStat; // // Store a summary of the haplotype counts to output below. // stringstream sstr; for (hit = hap_freq.begin(); hit != hap_freq.end(); hit++) sstr << hit->first << ":" << hit->second << ";"; lstat->hap_str = sstr.str().substr(0, sstr.str().length() - 1); // // Determine an ordering for the haplotypes. Convert haplotype counts into frequencies. // uint k = 0; for (hit = hap_freq.begin(); hit != hap_freq.end(); hit++) { hap_index[hit->first] = k; haplotypes.push_back(hit->first); k++; // cerr << " Haplotype '" << hit->first << "' occured " << hit->second << " times; "; hit->second = hit->second / n; // cerr << " frequency of " << hit->second << "%\n"; } // // Initialize a two-dimensional array to hold distances between haplotyes. // double **hdists = new double *[hap_index.size()]; for (k = 0; k < hap_index.size(); k++) { hdists[k] = new double[hap_index.size()]; memset(hdists[k], 0, hap_index.size()); } // // Calculate the distances between haplotypes. // nuc_substitution_dist(hap_index, hdists); // // Calculate haplotype diversity, Pi. // for (uint i = 0; i < haplotypes.size(); i++) { for (uint j = 0; j < haplotypes.size(); j++) { hapl_diversity += hap_freq[haplotypes[i]] * hap_freq[haplotypes[j]] * hdists[hap_index[haplotypes[i]]][hap_index[haplotypes[j]]]; } } hapl_diversity = (n / (n-1)) * hapl_diversity; // // Calculate gene diversity. // for (uint i = 0; i < haplotypes.size(); i++) { gene_diversity += hap_freq[haplotypes[i]] * hap_freq[haplotypes[i]]; } gene_diversity = (n / (n - 1)) * (1 - gene_diversity); lstat->alleles = n; lstat->stat[0] = gene_diversity; lstat->stat[1] = hapl_diversity; lstat->hap_cnt = haplotypes.size(); // cerr << " Population " << pop_id << " has haplotype diversity (pi) of " << s[pop_index]->pi << "\n"; for (k = 0; k < hap_index.size(); k++) delete hdists[k]; delete hdists; return lstat; } HapStat * haplotype_amova(map &pop_grp_key, map > &pop_indexes, Datum **d, LocSum **s, vector &pop_ids) { map loc_hap_index; vector loc_haplotypes; map > pop_haplotypes; map > grp_members; vector grps; map::iterator hit, hit_2; map >::iterator pit; int start, end, pop_id, pop_id_1; HapStat *h; int pop_cnt = pop_ids.size(); // // Tabulate the occurences of haplotypes at this locus. // for (int p = 0; p < pop_cnt; p++) { start = pop_indexes[pop_ids[p]].first; end = pop_indexes[pop_ids[p]].second; pop_id = pop_ids[p]; for (int i = start; i <= end; i++) { if (d[i] == NULL) continue; if (d[i]->obshap.size() > 2) { continue; } else if (d[i]->obshap.size() == 1) { if(!uncalled_haplotype(d[i]->obshap[0])) { loc_hap_index[d[i]->obshap[0]]++; loc_haplotypes.push_back(d[i]->obshap[0]); loc_haplotypes.push_back(d[i]->obshap[0]); pop_haplotypes[pop_id].push_back(d[i]->obshap[0]); pop_haplotypes[pop_id].push_back(d[i]->obshap[0]); } } else { for (uint j = 0; j < d[i]->obshap.size(); j++) { if(!uncalled_haplotype(d[i]->obshap[0])) { loc_hap_index[d[i]->obshap[j]]++; loc_haplotypes.push_back(d[i]->obshap[j]); pop_haplotypes[pop_id].push_back(d[i]->obshap[j]); } } } } } // // What is the total number of populations that had valid haplotypes. // double valid_pop_cnt = 0.0; for (int p = 0; p < pop_cnt; p++) { pop_id = pop_ids[p]; if (pop_haplotypes[pop_id].size() > 0) valid_pop_cnt++; } // // If we filtered a population out at this locus make sure that we still have at least one // representative present in each group. // set uniq_grps; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = pit->first; if (pop_haplotypes.count(pop_id) > 0) { uniq_grps.insert(pop_grp_key[pop_id]); grp_members[pop_grp_key[pop_id]].push_back(pop_id); } } set::iterator uit; for (uit = uniq_grps.begin(); uit != uniq_grps.end(); uit++) grps.push_back(*uit); if (grps.size() == 0) return NULL; // cerr << "Groups: "; // for (uint i = 0; i < grps.size(); i++) // cerr << grps[i] << ", "; // cerr << "\n"; // for (git = grp_members.begin(); git != grp_members.end(); git++) { // cerr << "Group " << git->first << ": "; // for (uint i = 0; i < git->second.size(); i++) // cerr << git->second[i] << ", "; // cerr << "\n"; // } // // Determine an ordering for the haplotypes. // uint m = 0; for (hit = loc_hap_index.begin(); hit != loc_hap_index.end(); hit++) { loc_hap_index[hit->first] = m; m++; } // // Initialize a two-dimensional array to hold distances between haplotyes. // double **hdists = new double *[loc_hap_index.size()]; double **hdists_max = new double *[loc_hap_index.size()]; for (uint k = 0; k < loc_hap_index.size(); k++) { hdists[k] = new double[loc_hap_index.size()]; memset(hdists[k], 0, loc_hap_index.size()); hdists_max[k] = new double[loc_hap_index.size()]; memset(hdists_max[k], 0, loc_hap_index.size()); } // // Calculate the distances between haplotypes. // nuc_substitution_dist(loc_hap_index, hdists); // // Calculate the sum of squared distances in each subset: total, within populations, across populations // and withing groups, and across groups. // double ssd_total = amova_ssd_total(loc_haplotypes, loc_hap_index, hdists); double ssd_wp = amova_ssd_wp(grps, grp_members, loc_hap_index, pop_haplotypes, hdists); double ssd_ap_wg = amova_ssd_ap_wg(grps, grp_members, loc_hap_index, pop_haplotypes, hdists, hdists); double ssd_ag = grps.size() > 1 ? amova_ssd_ag(grps, grp_members, loc_hap_index, pop_haplotypes, hdists, ssd_total) : 0.0; // // Calculate n // double n = 0.0; double n_1 = 0.0; double n_2 = 0.0; double s_g = 0.0; double tot_cnt = 0.0; double grp_cnt = 0.0; double num_grps = grps.size(); double a = 0.0; double b = 0.0; for (uint g = 0; g < num_grps; g++) { for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; tot_cnt += (double) pop_haplotypes[pop_id_1].size(); } } for (uint g = 0; g < num_grps; g++) { grp_cnt = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; grp_cnt += (double) pop_haplotypes[pop_id_1].size(); } a = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; a += (double) (pop_haplotypes[pop_id_1].size() * pop_haplotypes[pop_id_1].size()) / grp_cnt; } s_g += a; } n = (tot_cnt - s_g) / (double) (valid_pop_cnt - num_grps); // cerr << " n: "<< n << "\n"; if (num_grps > 1) { // // Calculate n' // a = 0.0; for (uint g = 0; g < num_grps; g++) { for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; a += ((double) (pop_haplotypes[pop_id_1].size() * pop_haplotypes[pop_id_1].size()) / tot_cnt); } } n_1 = (s_g - a) / (double) (num_grps - 1.0); // cerr << " n': "<< n_1 << "\n"; // // Calculate n'' // for (uint g = 0; g < num_grps; g++) { a = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; a += pop_haplotypes[pop_id_1].size(); } b += ((a * a) / tot_cnt); } n_2 = (tot_cnt - b) / (double) (num_grps - 1); // cerr << " n'': "<< n_2 << "\n"; } // // Calculate the mean square deviations, equal to SSD divided by degrees of freedom. // double msd_ag = num_grps > 1 ? ssd_ag / (double) (num_grps - 1) : 0.0; double msd_ap_wg = ssd_ap_wg / ((double) (valid_pop_cnt - num_grps)); double msd_wp = ssd_wp / ((double) (loc_haplotypes.size() - valid_pop_cnt)); double msd_total = ssd_total / ((double) (loc_haplotypes.size() - 1)); double sigma_c = msd_wp; double sigma_b = n > 0 ? (msd_ap_wg - sigma_c) / n : 0.0; double sigma_a = 0.0; if (grps.size() > 1) sigma_a = (msd_ag - sigma_c - (n_1 * sigma_b)) / n_2; // Arlequin seems to sum the variance components instead of independently calculating sigma_total: MSD(total) = SSD(total)/degrees.of.freedom double sigma_total = sigma_a + sigma_b + sigma_c; // msd_total; double phi_st = 0.0; double phi_ct = 0.0; double phi_sc = 0.0; if (grps.size() > 1) { phi_st = sigma_total > 0.0 ? (sigma_a + sigma_b) / sigma_total : 0.0; phi_ct = sigma_total > 0.0 ? sigma_a / sigma_total : 0.0; phi_sc = (sigma_a + sigma_b) > 0.0 ? sigma_b / (sigma_b + sigma_c) : 0.0; } else { phi_st = sigma_total > 0.0 ? sigma_b / sigma_total : 0.0; } // cerr << " MSD(AG): " << msd_ag << "; MSD(AP/WG): " << msd_ap_wg << "; MSD(WP): " << msd_wp << "; MSD(TOTAL): " << msd_total << "\n" // << " Sigma_a: " << sigma_a << "; Sigma_b: " << sigma_b << "; Sigma_c: " << sigma_c << "; Sigma_Total: " << sigma_total << "\n" // << " Phi_st: " << phi_st << "; Phi_ct: " << phi_ct << "; Phi_sc: " << phi_sc << "\n"; // // Calculate Fst' = Fst / Fst_max // // First calculate Fst. // // To calculate Fst instead of Phi_st, we need to reset our distance matrix to return 1 if haplotypes are different, 0 otherwise. // nuc_substitution_identity(loc_hap_index, hdists); ssd_wp = amova_ssd_wp(grps, grp_members, loc_hap_index, pop_haplotypes, hdists); ssd_ap_wg = amova_ssd_ap_wg(grps, grp_members, loc_hap_index, pop_haplotypes, hdists, hdists); // // Calculate the mean square deviations, equal to SSD divided by degrees of freedom. // msd_ap_wg = ssd_ap_wg / ((double) (valid_pop_cnt - num_grps)); msd_wp = ssd_wp / ((double) (loc_haplotypes.size() - valid_pop_cnt)); sigma_c = msd_wp; sigma_b = n > 0 ? (msd_ap_wg - sigma_c) / n : 0.0; sigma_total = sigma_b + sigma_c; double fst = sigma_total > 0.0 ? sigma_b / sigma_total : 0.0; // // Now calculate Fst_max. // // Reset our distance matrix to give maximum possible distance between haplotypes // and recalculate sum of squared deviations across groups. // nuc_substitution_identity_max(loc_hap_index, hdists_max); ssd_ap_wg = amova_ssd_ap_wg(grps, grp_members, loc_hap_index, pop_haplotypes, hdists, hdists_max); // // Recalculate the mean square deviations, given maximum divergence between populations. // msd_ap_wg = ssd_ap_wg / ((double) (valid_pop_cnt - num_grps)); sigma_b = n > 0 ? (msd_ap_wg - sigma_c) / n : 0.0; double fst_max = sigma_total > 0.0 ? sigma_b / sigma_total : 0.0; double fst_1 = fst_max > 0.0 ? fst / fst_max : 0.0; // // Cache the results so we can print them in order below, once the parallel code has executed. // h = new HapStat; h->alleles = tot_cnt; h->popcnt = valid_pop_cnt; if (log_fst_comp) { h->comp = new double[15]; h->comp[0] = ssd_wp; h->comp[1] = ssd_ap_wg; h->comp[2] = ssd_ag; h->comp[3] = ssd_total; h->comp[4] = msd_wp; h->comp[5] = msd_ap_wg; h->comp[6] = msd_ag; h->comp[7] = msd_total; h->comp[8] = n; h->comp[9] = n_1; h->comp[10] = n_2; h->comp[11] = sigma_a; h->comp[12] = sigma_b; h->comp[13] = sigma_c; h->comp[14] = sigma_total; } h->stat[0] = phi_st; h->stat[1] = phi_ct; h->stat[2] = phi_sc; h->stat[3] = fst_1; for (uint k = 0; k < loc_hap_index.size(); k++) { delete [] hdists[k]; delete [] hdists_max[k]; } delete [] hdists; delete [] hdists_max; return h; } double amova_ssd_total(vector &loc_haplotypes, map &loc_hap_index, double **hdists) { // // Calculate sum of squared deviations for the total sample, SSD(Total) // double ssd_total = 0.0; for (uint j = 0; j < loc_haplotypes.size(); j++) { for (uint k = 0; k < loc_haplotypes.size(); k++) { ssd_total += hdists[loc_hap_index[loc_haplotypes[j]]][loc_hap_index[loc_haplotypes[k]]]; // cerr << j << "\t" // << k << "\t" // << loc_haplotypes[j] << "\t" // << loc_haplotypes[k] << "\t" // << hdists[loc_hap_index[loc_haplotypes[j]]][loc_hap_index[loc_haplotypes[k]]] << "\n"; } } ssd_total = (1.0 / (double) (2*loc_haplotypes.size())) * ssd_total; // cerr << " ssd_total: "<< ssd_total << "\n"; return ssd_total; } double amova_ssd_wp(vector &grps, map > &grp_members, map &loc_hap_index, map > &pop_haplotypes, double **hdists) { // // Calculate the sum of squared deviations within populations, SSD(WP) // double ssd_wp = 0.0; double ssd = 0.0; int pop_id; for (uint g = 0; g < grps.size(); g++) { for (uint i = 0; i < grp_members[grps[g]].size(); i++) { pop_id = grp_members[grps[g]][i]; ssd = 0.0; for (uint j = 0; j < pop_haplotypes[pop_id].size(); j++) { for (uint k = 0; k < pop_haplotypes[pop_id].size(); k++) { ssd += hdists[loc_hap_index[pop_haplotypes[pop_id][j]]][loc_hap_index[pop_haplotypes[pop_id][k]]]; // cerr << pop_id << "\t" // << j << "\t" // << k << "\t" // << loc_haplotypes[j] << "\t" // << loc_haplotypes[k] << "\t" // << hdists[loc_hap_index[loc_haplotypes[j]]][loc_hap_index[loc_haplotypes[k]]] << "\n"; } } if (pop_haplotypes[pop_id].size() > 0) ssd_wp += (1.0 / (double) (2*pop_haplotypes[pop_id].size())) * ssd; } } // cerr << " ssd_wp: "<< ssd_wp << "\n"; return ssd_wp; } double amova_ssd_ap_wg(vector &grps, map > &grp_members, map &loc_hap_index, map > &pop_haplotypes, double **hdists_1, double **hdists_2) { // // Calculate the sum of squared deviations across populations and within groups, SSD(AP/WG) // double ssd_ap_wg = 0.0; double ssd = 0.0; double ssd_1 = 0.0; double ssd_2 = 0.0; double den = 0.0; int pop_id, pop_id_1, pop_id_2; for (uint g = 0; g < grps.size(); g++) { ssd_1 = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; for (uint j = 0; j < pop_haplotypes[pop_id_1].size(); j++) { for (uint s = 0; s < grp_members[grps[g]].size(); s++) { pop_id_2 = grp_members[grps[g]][s]; for (uint k = 0; k < pop_haplotypes[pop_id_2].size(); k++) { if (pop_id_1 == pop_id_2) ssd_1 += hdists_1[loc_hap_index[pop_haplotypes[pop_id_1][j]]][loc_hap_index[pop_haplotypes[pop_id_2][k]]]; else ssd_1 += hdists_2[loc_hap_index[pop_haplotypes[pop_id_1][j]]][loc_hap_index[pop_haplotypes[pop_id_2][k]]]; } } } } den = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; den += 2 * pop_haplotypes[pop_id_1].size(); } ssd_1 = ssd_1 / den; ssd_2 = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id = grp_members[grps[g]][r]; ssd = 0.0; for (uint j = 0; j < pop_haplotypes[pop_id].size(); j++) { for (uint k = 0; k < pop_haplotypes[pop_id].size(); k++) { ssd += hdists_1[loc_hap_index[pop_haplotypes[pop_id][j]]][loc_hap_index[pop_haplotypes[pop_id][k]]]; } } if (pop_haplotypes[pop_id].size() > 0) ssd_2 += (1.0 / (double) (2*pop_haplotypes[pop_id].size())) * ssd; } ssd_ap_wg += ssd_1 - ssd_2; } // cerr << " ssd_ap_wg: "<< ssd_ap_wg << "\n"; return ssd_ap_wg; } double amova_ssd_ag(vector &grps, map > &grp_members, map &loc_hap_index, map > &pop_haplotypes, double **hdists, double ssd_total) { // // Calculate the sum of squared deviations across groups, SSD(AG) // int pop_id_1, pop_id_2; double ssd_ag = 0.0; double ssd = 0.0; double ssd_1 = 0.0; double den = 0.0; for (uint g = 0; g < grps.size(); g++) { ssd_1 = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; for (uint j = 0; j < pop_haplotypes[pop_id_1].size(); j++) { for (uint s = 0; s < grp_members[grps[g]].size(); s++) { pop_id_2 = grp_members[grps[g]][s]; for (uint k = 0; k < pop_haplotypes[pop_id_2].size(); k++) { ssd_1 += hdists[loc_hap_index[pop_haplotypes[pop_id_1][j]]][loc_hap_index[pop_haplotypes[pop_id_2][k]]]; } } } } den = 0.0; for (uint r = 0; r < grp_members[grps[g]].size(); r++) { pop_id_1 = grp_members[grps[g]][r]; den += 2 * pop_haplotypes[pop_id_1].size(); } ssd += ssd_1 / den; } ssd_ag = ssd_total - ssd; // cerr << " ssd_ag: "<< ssd_ag << "\n"; return ssd_ag; } double haplotype_d_est(map > &pop_indexes, Datum **d, LocSum **s, vector &pop_ids) { // // Calculate D_est, fixation index, as described by // Bird, et al., 2011, Detecting and measuring genetic differentiation // +-Equation 11 // and // Jost, 2008, GST and its relatives do not measure differentiation, Molecular Ecology // +- Equation 13, D_est_chao // map loc_haplotypes; map > pop_haplotypes; map pop_totals; map::iterator it; int start, end, pop_id; uint pop_cnt = pop_ids.size(); // // Tabulate the occurences of haplotypes at this locus. // for (uint p = 0; p < pop_cnt; p++) { start = pop_indexes[pop_ids[p]].first; end = pop_indexes[pop_ids[p]].second; pop_id = pop_ids[p]; for (int i = start; i <= end; i++) { if (d[i] == NULL) continue; if (d[i]->obshap.size() > 2) { continue; } else if (d[i]->obshap.size() == 1) { loc_haplotypes[d[i]->obshap[0]] += 2; pop_haplotypes[pop_id][d[i]->obshap[0]] += 2; } else { for (uint j = 0; j < d[i]->obshap.size(); j++) { loc_haplotypes[d[i]->obshap[j]]++; pop_haplotypes[pop_id][d[i]->obshap[j]]++; } } } for (it = pop_haplotypes[pop_id].begin(); it != pop_haplotypes[pop_id].end(); it++) pop_totals[pop_id] += it->second; } double x = 0.0; for (it = loc_haplotypes.begin(); it != loc_haplotypes.end(); it++) { double freq_sum_sq = 0.0; double freq_sq_sum = 0.0; for (uint p = 0; p < pop_cnt; p++) { pop_id = pop_ids[p]; freq_sum_sq += (pop_haplotypes[pop_id][it->first] / pop_totals[pop_id]); freq_sq_sum += pow((pop_haplotypes[pop_id][it->first] / pop_totals[pop_id]), 2); } freq_sum_sq = pow(freq_sum_sq, 2); x += (freq_sum_sq - freq_sq_sum) / (pop_cnt - 1); } double y = 0.0; for (it = loc_haplotypes.begin(); it != loc_haplotypes.end(); it++) { for (uint p = 0; p < pop_cnt; p++) { pop_id = pop_ids[p]; y += (pop_haplotypes[pop_id][it->first] * (pop_haplotypes[pop_id][it->first] - 1)) / (pop_totals[pop_id] * (pop_totals[pop_id] - 1)); } } double d_est = 1.0 - (x / y); return d_est; } int calculate_summary_stats(vector > &files, map > &pop_indexes, map &catalog, PopMap *pmap, PopSum *psum) { map >::iterator it; CSLocus *loc; LocSum **s; LocTally *t; int len; int pop_cnt = psum->pop_cnt(); // // Calculate the means for each summary statistic. // int *private_cnt; double *num_indv_mean, *p_mean, *obs_het_mean, *obs_hom_mean, *exp_het_mean, *exp_hom_mean, *pi_mean, *fis_mean; double *num_indv_var, *p_var, *obs_het_var, *obs_hom_var, *exp_het_var, *exp_hom_var, *pi_var, *fis_var; double *num_indv_mean_all, *p_mean_all, *obs_het_mean_all, *obs_hom_mean_all, *exp_het_mean_all, *exp_hom_mean_all, *pi_mean_all, *fis_mean_all; double *num_indv_var_all, *p_var_all, *obs_het_var_all, *obs_hom_var_all, *exp_het_var_all, *exp_hom_var_all, *pi_var_all, *fis_var_all; double *n, *n_all, *var_sites; private_cnt = new int[pop_cnt]; n = new double[pop_cnt]; var_sites = new double[pop_cnt]; num_indv_mean = new double[pop_cnt]; num_indv_var = new double[pop_cnt]; p_mean = new double[pop_cnt]; p_var = new double[pop_cnt]; obs_het_mean = new double[pop_cnt]; obs_het_var = new double[pop_cnt]; obs_hom_mean = new double[pop_cnt]; obs_hom_var = new double[pop_cnt]; exp_het_mean = new double[pop_cnt]; exp_het_var = new double[pop_cnt]; exp_hom_mean = new double[pop_cnt]; exp_hom_var = new double[pop_cnt]; pi_mean = new double[pop_cnt]; pi_var = new double[pop_cnt]; fis_mean = new double[pop_cnt]; fis_var = new double[pop_cnt]; n_all = new double[pop_cnt]; num_indv_mean_all = new double[pop_cnt]; num_indv_var_all = new double[pop_cnt]; p_mean_all = new double[pop_cnt]; p_var_all = new double[pop_cnt]; obs_het_mean_all = new double[pop_cnt]; obs_het_var_all = new double[pop_cnt]; obs_hom_mean_all = new double[pop_cnt]; obs_hom_var_all = new double[pop_cnt]; exp_het_mean_all = new double[pop_cnt]; exp_het_var_all = new double[pop_cnt]; exp_hom_mean_all = new double[pop_cnt]; exp_hom_var_all = new double[pop_cnt]; pi_mean_all = new double[pop_cnt]; pi_var_all = new double[pop_cnt]; fis_mean_all = new double[pop_cnt]; fis_var_all = new double[pop_cnt]; for (int j = 0; j < pop_cnt; j++) { private_cnt[j] = 0; n[j] = 0.0; var_sites[j] = 0.0; num_indv_mean[j] = 0.0; num_indv_var[j] = 0.0; p_mean[j] = 0.0; p_var[j] = 0.0; obs_het_mean[j] = 0.0; obs_het_var[j] = 0.0; obs_hom_mean[j] = 0.0; obs_hom_var[j] = 0.0; exp_het_mean[j] = 0.0; exp_het_var[j] = 0.0; exp_hom_mean[j] = 0.0; exp_hom_var[j] = 0.0; pi_mean[j] = 0.0; pi_var[j] = 0.0; fis_mean[j] = 0.0; fis_var[j] = 0.0; n_all[j] = 0.0; num_indv_mean_all[j] = 0.0; num_indv_var_all[j] = 0.0; p_mean_all[j] = 0.0; p_var_all[j] = 0.0; obs_het_mean_all[j] = 0.0; obs_het_var_all[j] = 0.0; obs_hom_mean_all[j] = 0.0; obs_hom_var_all[j] = 0.0; exp_het_mean_all[j] = 0.0; exp_het_var_all[j] = 0.0; exp_hom_mean_all[j] = 0.0; exp_hom_var_all[j] = 0.0; pi_mean_all[j] = 0.0; pi_var_all[j] = 0.0; fis_mean_all[j] = 0.0; fis_var_all[j] = 0.0; } for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); len = strlen(loc->con); for (int i = 0; i < len; i++) { // // Compile private alleles // if (t->nucs[i].priv_allele >= 0) private_cnt[t->nucs[i].priv_allele]++; if (t->nucs[i].allele_cnt == 2) { for (int j = 0; j < pop_cnt; j++) { if (s[j]->nucs[i].num_indv == 0) continue; n[j]++; if (s[j]->nucs[i].pi > 0) var_sites[j]++; num_indv_mean[j] += s[j]->nucs[i].num_indv; p_mean[j] += s[j]->nucs[i].p; obs_het_mean[j] += s[j]->nucs[i].obs_het; obs_hom_mean[j] += s[j]->nucs[i].obs_hom; exp_het_mean[j] += s[j]->nucs[i].exp_het; exp_hom_mean[j] += s[j]->nucs[i].exp_hom; pi_mean[j] += s[j]->nucs[i].stat[0]; fis_mean[j] += s[j]->nucs[i].stat[1] != -7.0 ? s[j]->nucs[i].stat[1] : 0.0; n_all[j]++; num_indv_mean_all[j] += s[j]->nucs[i].num_indv; p_mean_all[j] += s[j]->nucs[i].p; obs_het_mean_all[j] += s[j]->nucs[i].obs_het; obs_hom_mean_all[j] += s[j]->nucs[i].obs_hom; exp_het_mean_all[j] += s[j]->nucs[i].exp_het; exp_hom_mean_all[j] += s[j]->nucs[i].exp_hom; pi_mean_all[j] += s[j]->nucs[i].stat[0]; fis_mean_all[j] += s[j]->nucs[i].stat[1] != -7.0 ? s[j]->nucs[i].stat[1] : 0.0; } } else if (t->nucs[i].allele_cnt == 1) { for (int j = 0; j < pop_cnt; j++) { if (s[j]->nucs[i].num_indv == 0) continue; n_all[j]++; num_indv_mean_all[j] += s[j]->nucs[i].num_indv; p_mean_all[j] += s[j]->nucs[i].p; obs_het_mean_all[j] += s[j]->nucs[i].obs_het; obs_hom_mean_all[j] += s[j]->nucs[i].obs_hom; exp_het_mean_all[j] += s[j]->nucs[i].exp_het; exp_hom_mean_all[j] += s[j]->nucs[i].exp_hom; pi_mean_all[j] += s[j]->nucs[i].stat[0]; fis_mean_all[j] += s[j]->nucs[i].stat[1] != -7.0 ? s[j]->nucs[i].stat[1] : 0.0; } } } } } for (int j = 0; j < pop_cnt; j++) { num_indv_mean[j] = num_indv_mean[j] / n[j]; p_mean[j] = p_mean[j] / n[j]; obs_het_mean[j] = obs_het_mean[j] / n[j]; obs_hom_mean[j] = obs_hom_mean[j] / n[j]; exp_het_mean[j] = exp_het_mean[j] / n[j]; exp_hom_mean[j] = exp_hom_mean[j] / n[j]; pi_mean[j] = pi_mean[j] / n[j]; fis_mean[j] = fis_mean[j] / n[j]; num_indv_mean_all[j] = num_indv_mean_all[j] / n_all[j]; p_mean_all[j] = p_mean_all[j] / n_all[j]; obs_het_mean_all[j] = obs_het_mean_all[j] / n_all[j]; obs_hom_mean_all[j] = obs_hom_mean_all[j] / n_all[j]; exp_het_mean_all[j] = exp_het_mean_all[j] / n_all[j]; exp_hom_mean_all[j] = exp_hom_mean_all[j] / n_all[j]; pi_mean_all[j] = pi_mean_all[j] / n_all[j]; fis_mean_all[j] = fis_mean_all[j] / n_all[j]; } stringstream pop_name; pop_name << "batch_" << batch_id << ".sumstats" << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening sumstats file '" << file << "'\n"; exit(1); } fh.precision(fieldw); fh.setf(std::ios::fixed); double p_freq; int start, end; // // Write the population members. // map >::iterator pit; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { start = pit->second.first; end = pit->second.second; fh << "# " << pit->first << "\t"; for (int i = start; i <= end; i++) { fh << files[i].second; if (i < end) fh << ","; } fh << "\n"; } cerr << "Writing " << catalog.size() << " loci to summary statistics file, '" << file << "'\n"; fh << "# Batch ID " << "\t" << "Locus ID" << "\t" << "Chr" << "\t" << "BP" << "\t" << "Col" << "\t" << "Pop ID" << "\t" << "P Nuc" << "\t" << "Q Nuc" << "\t" << "N" << "\t" << "P" << "\t" << "Obs Het" << "\t" << "Obs Hom" << "\t" << "Exp Het" << "\t" << "Exp Hom" << "\t" << "Pi" << "\t" << "Smoothed Pi" << "\t" << "Smoothed Pi P-value" << "\t" << "Fis" << "\t" << "Smoothed Fis" << "\t" << "Smoothed Fis P-value" << "\t" << "Private" << "\n"; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); len = strlen(loc->con); for (int i = 0; i < len; i++) { // // If this site is fixed in all populations, DON'T output it. If it is variable, // or fixed within populations but variable among, DO output it. // if (t->nucs[i].allele_cnt == 2) { for (int j = 0; j < pop_cnt; j++) { if (s[j]->nucs[i].num_indv == 0) continue; fh << batch_id << "\t" << loc->id << "\t" << loc->loc.chr << "\t" << loc->sort_bp(i) + 1 << "\t" << i << "\t" << pop_key[psum->rev_pop_index(j)] << "\t"; // // Output the p and q alleles in the same order in each population. // if (t->nucs[i].p_allele == s[j]->nucs[i].p_nuc) { if (s[j]->nucs[i].q_nuc == 0) fh << s[j]->nucs[i].p_nuc << "\t" << "-"; else fh << s[j]->nucs[i].p_nuc << "\t" << s[j]->nucs[i].q_nuc; p_freq = s[j]->nucs[i].p; } else { if (s[j]->nucs[i].q_nuc == 0) fh << "-\t" << s[j]->nucs[i].p_nuc; else fh << s[j]->nucs[i].q_nuc << "\t" << s[j]->nucs[i].p_nuc; p_freq = 1 - s[j]->nucs[i].p; } fh << "\t" << (int) s[j]->nucs[i].num_indv << "\t" << std::setprecision(8) << p_freq << "\t" << std::setprecision(fieldw) << s[j]->nucs[i].obs_het << "\t" << s[j]->nucs[i].obs_hom << "\t" << s[j]->nucs[i].exp_het << "\t" << s[j]->nucs[i].exp_hom << "\t" << s[j]->nucs[i].stat[0] << "\t" // Pi << s[j]->nucs[i].smoothed[0] << "\t" // Smoothed Pi << s[j]->nucs[i].bs[0] << "\t" // Pi bootstrapped p-value << (s[j]->nucs[i].stat[1] == -7.0 ? 0.0 : s[j]->nucs[i].stat[1]) << "\t" // Fis << s[j]->nucs[i].smoothed[1] << "\t" // Smoothed Fis << s[j]->nucs[i].bs[1] << "\t"; // Fis bootstrapped p-value. (t->nucs[i].priv_allele == j) ? fh << "1\n" : fh << "0\n"; // // Tabulate the residuals to calculate the variance. // num_indv_var[j] += pow((s[j]->nucs[i].num_indv - num_indv_mean[j]), 2); p_var[j] += pow((s[j]->nucs[i].p - p_mean[j]), 2); obs_het_var[j] += pow((s[j]->nucs[i].obs_het - obs_het_mean[j]), 2); obs_hom_var[j] += pow((s[j]->nucs[i].obs_hom - obs_hom_mean[j]), 2); exp_het_var[j] += pow((s[j]->nucs[i].exp_het - exp_het_mean[j]), 2); exp_hom_var[j] += pow((s[j]->nucs[i].exp_hom - exp_hom_mean[j]), 2); pi_var[j] += pow((s[j]->nucs[i].stat[0] - pi_mean[j]), 2); fis_var[j] += pow((s[j]->nucs[i].stat[1] - fis_mean[j]), 2); num_indv_var_all[j] += pow((s[j]->nucs[i].num_indv - num_indv_mean_all[j]), 2); p_var_all[j] += pow((s[j]->nucs[i].p - p_mean_all[j]), 2); obs_het_var_all[j] += pow((s[j]->nucs[i].obs_het - obs_het_mean_all[j]), 2); obs_hom_var_all[j] += pow((s[j]->nucs[i].obs_hom - obs_hom_mean_all[j]), 2); exp_het_var_all[j] += pow((s[j]->nucs[i].exp_het - exp_het_mean_all[j]), 2); exp_hom_var_all[j] += pow((s[j]->nucs[i].exp_hom - exp_hom_mean_all[j]), 2); pi_var_all[j] += pow((s[j]->nucs[i].stat[0] - pi_mean_all[j]), 2); fis_var_all[j] += pow((s[j]->nucs[i].stat[1] - fis_mean_all[j]), 2); } } else if (t->nucs[i].allele_cnt == 1) { for (int j = 0; j < pop_cnt; j++) { if (s[j]->nucs[i].num_indv == 0) continue; num_indv_var_all[j] += pow((s[j]->nucs[i].num_indv - num_indv_mean_all[j]), 2); p_var_all[j] += pow((s[j]->nucs[i].p - p_mean_all[j]), 2); obs_het_var_all[j] += pow((s[j]->nucs[i].obs_het - obs_het_mean_all[j]), 2); obs_hom_var_all[j] += pow((s[j]->nucs[i].obs_hom - obs_hom_mean_all[j]), 2); exp_het_var_all[j] += pow((s[j]->nucs[i].exp_het - exp_het_mean_all[j]), 2); exp_hom_var_all[j] += pow((s[j]->nucs[i].exp_hom - exp_hom_mean_all[j]), 2); pi_var_all[j] += pow((s[j]->nucs[i].stat[0] - pi_mean_all[j]), 2); fis_var_all[j] += pow((s[j]->nucs[i].stat[1] - fis_mean_all[j]), 2); } } } } } // // Calculate the variance. // for (int j = 0; j < pop_cnt; j++) { num_indv_var[j] = num_indv_var[j] / (n[j] - 1); p_var[j] = p_var[j] / (n[j] - 1); obs_het_var[j] = obs_het_var[j] / (n[j] - 1); obs_hom_var[j] = obs_hom_var[j] / (n[j] - 1); exp_het_var[j] = exp_het_var[j] / (n[j] - 1); exp_hom_var[j] = exp_hom_var[j] / (n[j] - 1); pi_var[j] = pi_var[j] / (n[j] - 1); fis_var[j] = fis_var[j] / (n[j] - 1); num_indv_var_all[j] = num_indv_var_all[j] / (n_all[j] - 1); p_var_all[j] = p_var_all[j] / (n_all[j] - 1); obs_het_var_all[j] = obs_het_var_all[j] / (n_all[j] - 1); obs_hom_var_all[j] = obs_hom_var_all[j] / (n_all[j] - 1); exp_het_var_all[j] = exp_het_var_all[j] / (n_all[j] - 1); exp_hom_var_all[j] = exp_hom_var_all[j] / (n_all[j] - 1); pi_var_all[j] = pi_var_all[j] / (n_all[j] - 1); fis_var_all[j] = fis_var_all[j] / (n_all[j] - 1); } fh.close(); pop_name.str(""); pop_name << "batch_" << batch_id << ".sumstats_summary" << ".tsv"; file = in_path + pop_name.str(); fh.open(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening sumstats summary file '" << file << "'\n"; exit(1); } // // Write out summary statistics of the summary statistics. // fh << "# Variant positions\n" << "# Pop ID\t" << "Private\t" << "Num Indv\t" << "Var\t" << "StdErr\t" << "P\t" << "Var\t" << "StdErr\t" << "Obs Het\t" << "Var\t" << "StdErr\t" << "Obs Hom\t" << "Var\t" << "StdErr\t" << "Exp Het\t" << "Var\t" << "StdErr\t" << "Exp Hom\t" << "Var\t" << "StdErr\t" << "Pi\t" << "Var\t" << "StdErr\t" << "Fis\t" << "Var\t" << "StdErr\n"; double *sq_n = new double[pop_cnt]; double *sq_n_all = new double[pop_cnt]; for (int j = 0; j < pop_cnt; j++) { sq_n[j] = sqrt(n[j]); sq_n_all[j] = sqrt(n_all[j]); } for (int j = 0; j < pop_cnt; j++) fh << pop_key[psum->rev_pop_index(j)] << "\t" << private_cnt[j] << "\t" << num_indv_mean[j] << "\t" << num_indv_var[j] << "\t" << sqrt(num_indv_var[j]) / sq_n[j] << "\t" << p_mean[j] << "\t" << p_var[j] << "\t" << sqrt(p_var[j]) / sq_n[j] << "\t" << obs_het_mean[j] << "\t" << obs_het_var[j] << "\t" << sqrt(obs_het_var[j]) / sq_n[j] << "\t" << obs_hom_mean[j] << "\t" << obs_hom_var[j] << "\t" << sqrt(obs_hom_var[j]) / sq_n[j] << "\t" << exp_het_mean[j] << "\t" << exp_het_var[j] << "\t" << sqrt(exp_het_var[j]) / sq_n[j] << "\t" << exp_hom_mean[j] << "\t" << exp_hom_var[j] << "\t" << sqrt(exp_hom_var[j]) / sq_n[j] << "\t" << pi_mean[j] << "\t" << pi_var[j] << "\t" << sqrt(pi_var[j]) / sq_n[j] << "\t" << fis_mean[j] << "\t" << fis_var[j] << "\t" << sqrt(num_indv_var[j]) / sq_n[j] << "\n"; fh << "# All positions (variant and fixed)\n" << "# Pop ID\t" << "Private\t" << "Sites\t" << "Variant Sites\t" << "Polymorphic Sites\t" << "% Polymorphic Loci\t" << "Num Indv\t" << "Var\t" << "StdErr\t" << "P\t" << "Var\t" << "StdErr\t" << "Obs Het\t" << "Var\t" << "StdErr\t" << "Obs Hom\t" << "Var\t" << "StdErr\t" << "Exp Het\t" << "Var\t" << "StdErr\t" << "Exp Hom\t" << "Var\t" << "StdErr\t" << "Pi\t" << "Var\t" << "StdErr\t" << "Fis\t" << "Var\t" << "StdErr\n"; for (int j = 0; j < pop_cnt; j++) { fh << pop_key[psum->rev_pop_index(j)] << "\t" << private_cnt[j] << "\t" << n_all[j] << "\t" << n[j] << "\t" << var_sites[j] << "\t" << var_sites[j] / n_all[j] * 100 << "\t" << num_indv_mean_all[j] << "\t" << num_indv_var_all[j] << "\t" << sqrt(num_indv_var_all[j]) / sq_n_all[j] << "\t" << p_mean_all[j] << "\t" << p_var_all[j] << "\t" << sqrt(p_var_all[j]) / sq_n_all[j] << "\t" << obs_het_mean_all[j] << "\t" << obs_het_var_all[j] << "\t" << sqrt(obs_het_var_all[j]) / sq_n_all[j] << "\t" << obs_hom_mean_all[j] << "\t" << obs_hom_var_all[j] << "\t" << sqrt(obs_hom_var_all[j]) / sq_n_all[j] << "\t" << exp_het_mean_all[j] << "\t" << exp_het_var_all[j] << "\t" << sqrt(exp_het_var_all[j]) / sq_n_all[j] << "\t" << exp_hom_mean_all[j] << "\t" << exp_hom_var_all[j] << "\t" << sqrt(exp_hom_var_all[j]) / sq_n_all[j] << "\t" << pi_mean_all[j] << "\t" << pi_var_all[j] << "\t" << sqrt(pi_var_all[j]) / sq_n_all[j] << "\t" << fis_mean_all[j] << "\t" << fis_var_all[j] << "\t" << sqrt(num_indv_var_all[j]) / sq_n_all[j] << "\n"; } delete [] private_cnt; delete [] n; delete [] var_sites; delete [] sq_n; delete [] num_indv_mean; delete [] num_indv_var; delete [] p_mean; delete [] p_var; delete [] obs_het_mean; delete [] obs_het_var; delete [] obs_hom_mean; delete [] obs_hom_var; delete [] exp_het_mean; delete [] exp_het_var; delete [] exp_hom_mean; delete [] exp_hom_var; delete [] pi_mean; delete [] pi_var; delete [] fis_mean; delete [] fis_var; delete [] n_all; delete [] sq_n_all; delete [] num_indv_mean_all; delete [] num_indv_var_all; delete [] p_mean_all; delete [] p_var_all; delete [] obs_het_mean_all; delete [] obs_het_var_all; delete [] obs_hom_mean_all; delete [] obs_hom_var_all; delete [] exp_het_mean_all; delete [] exp_het_var_all; delete [] exp_hom_mean_all; delete [] exp_hom_var_all; delete [] pi_mean_all; delete [] pi_var_all; delete [] fis_mean_all; delete [] fis_var_all; fh.close(); return 0; } int write_fst_stats(vector > &files, map > &pop_indexes, map &catalog, PopMap *pmap, PopSum *psum, ofstream &log_fh) { // // We want to iterate over each pair of populations and calculate Fst at each // nucleotide of each locus. // vector means; vector pops; map >::iterator pit; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) pops.push_back(pit->first); if (pops.size() == 1) return 0; // // Instantiate the kernel smoothing object if requested. // OPopPair *ord = new OPopPair(psum, log_fh); KSmooth *ks; Bootstrap *bs; if (kernel_smoothed && loci_ordered) ks = new KSmooth(2); for (uint i = 0; i < pops.size(); i++) { for (uint j = i + 1; j < pops.size(); j++) { int pop_1 = pops[i]; int pop_2 = pops[j]; double sum = 0.0; double cnt = 0.0; stringstream pop_name; pop_name << "batch_" << batch_id << ".fst_" << pop_key[pop_1] << "-" << pop_key[pop_2] << ".tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Fst output file '" << file << "'\n"; exit(1); } fh.precision(fieldw); fh.setf(std::ios::fixed); cerr << "Calculating Fst for populations '" << pop_key[pop_1] << "' and '" << pop_key[pop_2] << "' and writing it to file, '" << file << "'\n"; fh << "# Batch ID" << "\t" << "Locus ID" << "\t" << "Pop 1 ID" << "\t" << "Pop 2 ID" << "\t" << "Chr" << "\t" << "BP" << "\t" << "Column" << "\t" << "Overall Pi" << "\t" << "Fst" << "\t" << "Fisher's P" << "\t" << "Odds Ratio" << "\t" << "CI Low" << "\t" << "CI High" << "\t" << "LOD" << "\t" << "Corrected Fst" << "\t" << "Smoothed Fst" << "\t" << "AMOVA Fst" << "\t" << "Corrected AMOVA Fst" << "\t" << "Smoothed AMOVA Fst" << "\t" << "Smoothed AMOVA Fst P-value" << "\t" << "Window SNP Count"; // // If requested, log Fst component calculations to a file. // if (log_fst_comp) { fh << "\t" << "n_1" << "\t" << "n_2" << "\t" << "tot_alleles" << "\t" << "p_1" << "\t" << "q_1" << "\t" << "p_2" << "\t" << "q_2" << "\t" << "pi_1" << "\t" << "pi_2" << "\t" << "pi_all" << "\t" << "bcoeff_1" << "\t" << "bcoeff_2" << "\t" << "binomial_fst" << "\t" << "p_1_freq" << "\t" << "q_1_freq" << "\t" << "p_2_freq" << "\t" << "q_2_freq" << "\t" << "p_avg_cor" << "\t" << "n_avg_cor" << "\t" << "amova_fst" << "\n"; } else { fh << "\n"; } if (bootstrap_fst) bs = new Bootstrap(2); map >::iterator it; map > genome_pairs; // int snp_dist[max_snp_dist] = {0}; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { string chr = it->first; map pairs_key; vector &pairs = genome_pairs[chr]; // // Order loci between the two populations and calculate Fst // ord->order(pairs, pairs_key, it->second, pop_1, pop_2); // // Apply user-selected correction to the Fst values. // double correction; switch(fst_correction) { case p_value: for (uint i = 0; i < pairs.size(); i++) { if (pairs[i] != NULL) { pairs[i]->stat[0] = pairs[i]->fet_p < p_value_cutoff ? pairs[i]->fst : 0; pairs[i]->stat[1] = pairs[i]->fet_p < p_value_cutoff ? pairs[i]->amova_fst : 0; } } break; case bonferroni_win: correct_fst_bonferroni_win(pairs); break; case bonferroni_gen: correction = p_value_cutoff / catalog.size(); for (uint i = 0; i < pairs.size(); i++) { if (pairs[i] != NULL) { pairs[i]->stat[0] = pairs[i]->fet_p < correction ? pairs[i]->fst : 0; pairs[i]->stat[1] = pairs[i]->fet_p < correction ? pairs[i]->amova_fst : 0; } } break; case no_correction: for (uint i = 0; i < pairs.size(); i++) { if (pairs[i] != NULL) { pairs[i]->stat[0] = pairs[i]->fst; pairs[i]->stat[1] = pairs[i]->amova_fst; } } break; } // // If bootstrapping is enabled, record all Fst values. // if (bootstrap_fst) bs->add_data(pairs); // // Calculate kernel-smoothed Fst values. // if (kernel_smoothed && loci_ordered) { cerr << " Generating kernel-smoothed Fst for " << it->first << ".\n"; ks->smooth(pairs); } } // // If bootstrap resampling method is approximate, generate our single, empirical distribution. // map > approx_fst_dist; // if (bootstrap_fst && bootstrap_type == bs_approx) // bootstrap_fst_approximate_dist(fst_samples, allele_depth_samples, weights, snp_dist, approx_fst_dist); for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { string chr = it->first; vector &pairs = genome_pairs[chr]; // // Bootstrap resample this chromosome. // if (bootstrap_fst && bootstrap_type == bs_exact) { cerr << " Bootstrap resampling kernel-smoothed Fst for " << it->first << ".\n"; bs->execute(pairs); } for (uint i = 0; i < pairs.size(); i++) { if (pairs[i] == NULL) continue; // // Calculate Fst P-value from approximate distribution. // // if (bootstrap_fst && bootstrap_type == bs_approx) // pairs[i]->bs[0] = bootstrap_approximate_pval(pairs[i]->snp_cnt, pairs[i]->stat[0], approx_fst_dist); cnt++; sum += pairs[i]->stat[1]; // Corrected AMOVA Fst fh << batch_id << "\t" << pairs[i]->loc_id << "\t" << pop_key[pop_1] << "\t" << pop_key[pop_2] << "\t" << chr << "\t" << pairs[i]->bp << "\t" << pairs[i]->col << "\t" << pairs[i]->pi << "\t" << pairs[i]->fst << "\t" << std::setprecision(9) << pairs[i]->fet_p << "\t" << pairs[i]->fet_or << "\t" << pairs[i]->ci_low << "\t" << pairs[i]->ci_high << "\t" << pairs[i]->lod << "\t" << pairs[i]->stat[0] << "\t" << pairs[i]->smoothed[0] << "\t" << pairs[i]->amova_fst << "\t" << pairs[i]->stat[1] << "\t" << pairs[i]->smoothed[1] << "\t" << pairs[i]->bs[1] << "\t" << pairs[i]->snp_cnt; if (log_fst_comp) { fh << "\t" << pairs[i]->comp[0] << "\t" << pairs[i]->comp[1] << "\t" << pairs[i]->comp[2] << "\t" << pairs[i]->comp[3] << "\t" << pairs[i]->comp[4] << "\t" << pairs[i]->comp[5] << "\t" << pairs[i]->comp[6] << "\t" << pairs[i]->comp[7] << "\t" << pairs[i]->comp[8] << "\t" << pairs[i]->comp[9] << "\t" << pairs[i]->comp[10] << "\t" << pairs[i]->comp[11] << "\t" << pairs[i]->fst << "\t" << pairs[i]->comp[12] << "\t" << pairs[i]->comp[13] << "\t" << pairs[i]->comp[14] << "\t" << pairs[i]->comp[15] << "\t" << pairs[i]->comp[16] << "\t" << pairs[i]->comp[17] << "\t" << pairs[i]->amova_fst << "\n"; } else { fh << "\n"; } delete pairs[i]; } } cerr << "Pop 1: " << pop_key[pop_1] << "; Pop 2: " << pop_key[pop_2] << "; mean Fst: " << (sum / cnt) << "\n"; means.push_back(sum / cnt); cerr << "Pooled populations '" << pop_key[pop_1] << "' and '" << pop_key[pop_2] << "' contained: " << ord->incompatible_loci << " incompatible loci; " << ord->multiple_loci << " nucleotides covered by more than one RAD locus.\n"; fh.close(); if (bootstrap_fst) delete bs; } } // // Write out the mean Fst measure of each pair of populations. // stringstream pop_name; pop_name << "batch_" << batch_id << ".fst_summary.tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening generic output file '" << file << "'\n"; exit(1); } // // Write out X-axis header. // for (uint i = 0; i < pops.size(); i++) fh << "\t" << pop_key[pops[i]]; fh << "\n"; uint n = 0; for (uint i = 0; i < pops.size() - 1; i++) { fh << pop_key[pops[i]]; for (uint k = 0; k <= i; k++) fh << "\t"; for (uint j = i + 1; j < pops.size(); j++) { fh << "\t" << means[n]; n++; } fh << "\n"; } fh.close(); delete ord; if (kernel_smoothed && loci_ordered) { delete ks; } return 0; } int correct_fst_bonferroni_win(vector &pairs) { int limit = 3 * sigma; int limit_l, limit_u; double correction; uint cnt, pos_l, pos_u; pos_l = 0; pos_u = 0; for (uint pos_c = 0; pos_c < pairs.size(); pos_c++) { if (pairs[pos_c] == NULL) continue; limit_l = pairs[pos_c]->bp - limit > 0 ? pairs[pos_c]->bp - limit : 0; limit_u = pairs[pos_c]->bp + limit; while (pos_l < pairs.size()) { if (pairs[pos_l] == NULL) { pos_l++; } else { if (pairs[pos_l]->bp < limit_l) pos_l++; else break; } } while (pos_u < pairs.size()) { if (pairs[pos_u] == NULL) { pos_u++; } else { if (pairs[pos_u]->bp < limit_u) pos_u++; else break; } } cnt = 0; for (uint i = pos_l; i < pos_u; i++) { if (pairs[i] == NULL) continue; cnt++; } correction = p_value_cutoff / cnt; pairs[pos_c]->stat[0] = pairs[pos_c]->fet_p < correction ? pairs[pos_c]->fst : 0; } return 0; } int kernel_smoothed_popstats(map &catalog, PopMap *pmap, PopSum *psum, int pop_id, ofstream &log_fh) { // int snp_dist[max_snp_dist] = {0}; // int sites_per_snp = 0; // int tot_windows = 0; map >::iterator it; map > genome_sites; // // Instantiate the kernel smoothing object if requested. // KSmooth *ks = new KSmooth(2); OSumStat *ord = new OSumStat(psum, log_fh); Bootstrap *bs; if (bootstrap_pifis) bs = new Bootstrap(2); for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &sites = genome_sites[it->first]; ord->order(sites, it->second, pop_id); if (bootstrap_pifis) bs->add_data(sites); } cerr << " Population '" << pop_key[pop_id] << "' contained " << ord->multiple_loci << " nucleotides covered by more than one RAD locus.\n"; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { if (bootstrap_pifis) cerr << " Smoothing and bootstrapping chromosome " << it->first << "\n"; else cerr << " Smoothing chromosome " << it->first << "\n"; vector &sites = genome_sites[it->first]; ks->smooth(sites); if (bootstrap_pifis && bootstrap_type == bs_exact) bs->execute_mixed(sites); } delete ks; delete ord; if (bootstrap_pifis) delete bs; // // // // If bootstrap resampling method is approximate, generate our single, empirical distribution. // // // map > approx_fis_dist; // map > approx_pi_dist; // if (bootstrap && bootstrap_type == bs_approx) { // sites_per_snp = sites_per_snp / tot_windows; // // cerr << "Sites per snp: " << sites_per_snp << "\n"; // bootstrap_popstats_approximate_dist(fis_samples, pi_samples, allele_depth_samples, // weights, snp_dist, sites_per_snp, // approx_fis_dist, approx_pi_dist); // for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { // for (uint pos = 0; pos < it->second.size(); pos++) { // loc = it->second[pos]; // len = strlen(loc->con); // lsum = psum->pop(loc->id, pop_id); // for (int k = 0; k < len; k++) // if (lsum->nucs[k].num_indv > 0 && bootstrap && lsum->nucs[k].pi > 0) { // // // // Calculate Fis/Pi p-values from approximate distribution. // // // lsum->nucs[k].wFis_pval = bootstrap_approximate_pval(lsum->nucs[k].snp_cnt, lsum->nucs[k].wFis, approx_fis_dist); // lsum->nucs[k].wPi_pval = bootstrap_approximate_pval(lsum->nucs[k].snp_cnt, lsum->nucs[k].wPi, approx_pi_dist); // } // } // } // } return 0; } int bootstrap_popstats_approximate_dist(vector &fis_samples, vector &pi_samples, vector &allele_samples, double *weights, int *snp_dist, int sites_per_snp, map > &approx_fis_dist, map > &approx_pi_dist) { // // Allocate an array of bootstrap resampling objects. // int win_size = 6 * sigma + 1; int win_cntr = win_size / 2; // // Initialize the Fst distribution map. // for (int i = 0; i < max_snp_dist; i++) { if (snp_dist[i] == 0.0) continue; // cerr << "SNP Dist: " << i << " snps occurred " << snp_dist[i] << "\n"; approx_fis_dist[i] = vector (); approx_fis_dist[i].reserve(bootstrap_reps); approx_pi_dist[i] = vector (); approx_pi_dist[i].reserve(bootstrap_reps); } vector poss; poss.reserve(max_snp_dist); double weighted_fis, weighted_pi, sum_fis, sum_pi, final_weight_fis, final_weight_pi; // int index_1, index_2; int pos, index_3, dist, start, end; int half = sites_per_snp / 2; for (int i = 0; i < max_snp_dist; i++) { if (snp_dist[i] == 0.0) continue; cerr << " Generating NULL distribution for " << i << " SNPs...\n"; // #pragma omp parallel private(poss, pos, index_1, index_2, index_3, dist, sum_fis, sum_pi, weighted_fis, weighted_pi, final_weight_fis, final_weight_pi) #pragma omp parallel private(poss, pos, index_3, dist, sum_fis, sum_pi, weighted_fis, weighted_pi, final_weight_fis, final_weight_pi) { BSample *bs = new BSample[win_size]; // // Populate the BSample objects. // for (int n = 0; n < win_size; n++) bs[n].bp = n + 1; vector fiss, pis; // // Bootstrap this bitch. // #pragma omp for schedule(dynamic, 1) for (int j = 0; j < bootstrap_reps; j++) { // cerr << " Bootsrap rep " << j << "\n"; // // First SNP is always placed at the center of the window. // pos = win_cntr; // index_1 = (int) (fis_samples.size() * (random() / (RAND_MAX + 1.0))); // index_2 = (int) (pi_samples.size() * (random() / (RAND_MAX + 1.0))); index_3 = (int) (allele_samples.size() * (random() / (RAND_MAX + 1.0))); // // Fill in the area around the SNP with fixed sites. // start = pos - half > 0 ? pos - half : 0; end = pos + half < win_size ? pos + half : win_size; for (int n = start; n < end; n++) { // bs[n].f = 0; // bs[n].pi = 0; bs[n].alleles = bs[pos].alleles; poss.push_back(n); } // bs[pos].f = fis_samples[index_1]; // bs[pos].pi = pi_samples[index_2]; bs[pos].alleles = allele_samples[index_3]; // cerr << " Placing SNP at position: " << pos << "; with data from " << index_1 << " filling area from " << start << " to " << end << "\n"; // // Randomly select the positions and values for each SNP to populate the window // for (int k = 0; k < i - 1; k++) { pos = (int) (win_size * (random() / (RAND_MAX + 1.0))); // index_1 = (int) (fis_samples.size() * (random() / (RAND_MAX + 1.0))); // index_2 = (int) (pi_samples.size() * (random() / (RAND_MAX + 1.0))); index_3 = (int) (allele_samples.size() * (random() / (RAND_MAX + 1.0))); poss.push_back(pos); // // Fill in the area around the SNP with fixed sites. // start = pos - half > 0 ? pos - half : 0; end = pos + half < win_size ? pos + half : win_size; for (int n = start; n < end; n++) { // bs[n].f = 0; // bs[n].pi = 0; bs[n].alleles = bs[pos].alleles; poss.push_back(n); } // bs[pos].f = fis_samples[index_1]; // bs[pos].pi = pi_samples[index_2]; bs[pos].alleles = allele_samples[index_3]; // cerr << " Placing SNP at position: " << pos << "; with data from " << index_1 << " filling area from " << start << " to " << end << "\n"; } weighted_fis = 0.0; sum_fis = 0.0; weighted_pi = 0.0; sum_pi = 0.0; for (int n = 0; n < win_size; n++) { // if (bs[n].pi < 0.0) // continue; // // Calculate weighted Fst at this position. // dist = bs[n].bp > bs[win_cntr].bp ? bs[n].bp - bs[win_cntr].bp : bs[win_cntr].bp - bs[n].bp; final_weight_fis = (bs[n].alleles - 1) * weights[dist]; // weighted_fis += bs[n].f * final_weight_fis; sum_fis += final_weight_fis; final_weight_pi = (bs[n].alleles - 1) * weights[dist]; // weighted_pi += bs[n].pi * final_weight_pi; sum_pi += final_weight_pi; } fiss.push_back(weighted_fis / sum_fis); pis.push_back(weighted_pi / sum_pi); // cerr << " New weighted fis value: " << weighted_fis / sum_fis << "; size: " << fiss.size() << "\n"; for (uint n = 0; n < poss.size(); n++) { // bs[poss[n]].f = 0.0; // bs[poss[n]].pi = -1.0; } poss.clear(); } // #pragma omp critical // { // vector &f = approx_fis_dist[i]; // for (uint n = 0; n < fiss.size(); n++) // f.push_back(fiss[n]); // vector &p = approx_pi_dist[i]; // for (uint n = 0; n < pis.size(); n++) // p.push_back(pis[n]); // } delete [] bs; } sort(approx_fis_dist[i].begin(), approx_fis_dist[i].end()); sort(approx_pi_dist[i].begin(), approx_pi_dist[i].end()); } return 0; } int bootstrap_fst_approximate_dist(vector &fst_samples, vector &allele_samples, double *weights, int *snp_dist, map > &approx_fst_dist) { // // Allocate an array of bootstrap resampling objects. // int win_size = 6 * sigma + 1; int win_cntr = win_size / 2; // // Initialize the Fst distribution map. // for (int i = 0; i < max_snp_dist; i++) { if (snp_dist[i] == 0.0) continue; // cerr << "SNP Dist: " << i << " snps occurred " << snp_dist[i] << "\n"; approx_fst_dist[i] = vector (); approx_fst_dist[i].reserve(bootstrap_reps); } vector poss; poss.reserve(max_snp_dist); double weighted_fst, sum, final_weight; //int index_1; int pos, index_2, dist; for (int i = 0; i < max_snp_dist; i++) { if (snp_dist[i] == 0.0) continue; cerr << " Generating NULL distribution for " << i << " SNPs...\n"; // #pragma omp parallel private(poss, pos, index_1, index_2, dist, sum, weighted_fst, final_weight) #pragma omp parallel private(poss, pos, index_2, dist, sum, weighted_fst, final_weight) { BSample *bs = new BSample[win_size]; // // Populate the BSample objects. // for (int n = 0; n < win_size; n++) bs[n].bp = n + 1; vector fsts; // // Bootstrap this bitch. // #pragma omp for schedule(dynamic, 1) for (int j = 0; j < bootstrap_reps; j++) { // cerr << "Bootsrap rep " << j << "\n"; // // First SNP is always placed at the center of the window. // pos = win_cntr; // index_1 = (int) (fst_samples.size() * (random() / (RAND_MAX + 1.0))); index_2 = (int) (allele_samples.size() * (random() / (RAND_MAX + 1.0))); // bs[pos].f = fst_samples[index_1]; bs[pos].alleles = allele_samples[index_2]; // // Randomly select the positions and values for each SNP to populate the window // for (int k = 0; k < i - 1; k++) { pos = (int) (win_size * (random() / (RAND_MAX + 1.0))); // index_1 = (int) (fst_samples.size() * (random() / (RAND_MAX + 1.0))); index_2 = (int) (allele_samples.size() * (random() / (RAND_MAX + 1.0))); // bs[pos].f = fst_samples[index_1]; // bs[pos].alleles = allele_samples[index_2]; // cerr << " " << j << ": Placing SNP at position: " << pos << " with data from index " << index_1 << "\n"; poss.push_back(pos); } weighted_fst = 0.0; sum = 0.0; for (int n = 0; n < win_size; n++) { // if (bs[n].f == 0.0) // continue; // // Calculate weighted Fst at this position. // dist = bs[n].bp > bs[win_cntr].bp ? bs[n].bp - bs[win_cntr].bp : bs[win_cntr].bp - bs[n].bp; final_weight = (bs[n].alleles - 1) * weights[dist]; // weighted_fst += bs[n].f * final_weight; sum += final_weight; } fsts.push_back(weighted_fst / sum); // cerr << " New weighted Fst value: " << weighted_fst / sum << "; size: " << fsts.size() << "\n"; // for (uint n = 0; n < poss.size(); n++) // bs[poss[n]].f = 0.0; poss.clear(); } // #pragma omp critical // { // vector &f = approx_fst_dist[i]; // for (uint n = 0; n < fsts.size(); n++) // f.push_back(fsts[n]); // } delete [] bs; } sort(approx_fst_dist[i].begin(), approx_fst_dist[i].end()); } return 0; } double bootstrap_approximate_pval(int snp_cnt, double stat, map > &approx_dist) { if (approx_dist.count(snp_cnt) == 0) return 1.0; vector::iterator up; vector &dist = approx_dist[snp_cnt]; double pos; up = upper_bound(dist.begin(), dist.end(), stat); if (up == dist.begin()) pos = 1; else if (up == dist.end()) pos = dist.size(); else pos = up - dist.begin() + 1; double res = 1.0 - (pos / (double) dist.size()); // cerr << "Generated Approx Smoothed Fst Distribution:\n"; // for (uint n = 0; n < dist.size(); n++) // cerr << " n: " << n << "; Fst: " << dist[n] << "\n"; // cerr << "Comparing Fst value: " << stat // << " at position " << (up - dist.begin()) << " out of " // << dist.size() << " positions (converted position: " << pos << "); pvalue: " << res << ".\n"; return res; } int write_generic(map &catalog, PopMap *pmap, map &samples, bool write_gtypes) { stringstream pop_name; pop_name << "batch_" << batch_id; if (write_gtypes) pop_name << ".genotypes.tsv"; else pop_name << ".haplotypes.tsv"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening generic output file '" << file << "'\n"; exit(1); } // // Count the number of markers that have enough samples to output. // map::iterator it; CSLocus *loc; int num_loci = catalog.size(); cerr << "Writing " << num_loci << " loci to " << (write_gtypes ? "genotype" : "observed haplotype") << " file, '" << file << "'\n"; // // Write the header // fh << "Catalog ID\t"; if (expand_id) fh << "\t"; if (write_gtypes) fh << "Marker\t"; fh << "Cnt\t"; for (int i = 0; i < pmap->sample_cnt(); i++) { fh << samples[pmap->rev_sample_index(i)]; if (i < pmap->sample_cnt() - 1) fh << "\t"; } fh << "\n"; // // Output each locus. // for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; stringstream id; loc->annotation.length() > 0 ? id << loc->id << "|" << loc->annotation : id << loc->id; fh << id.str(); if (expand_id) { if (loc->annotation.length() > 0) id << "\t" << loc->id << "\t" << loc->annotation; else if (strlen(loc->loc.chr) > 0) id << "\t" << loc->id << "\t" << loc->loc.chr << "_" << loc->loc.bp; else id << "\t" << loc->id << "\t"; } if (write_gtypes) fh << "\t" << loc->marker; write_gtypes ? fh << "\t" << loc->gcnt : fh << "\t" << loc->hcnt; Datum **d = pmap->locus(loc->id); string obshap; for (int i = 0; i < pmap->sample_cnt(); i++) { fh << "\t"; if (d[i] == NULL) fh << "-"; else if (write_gtypes) { fh << d[i]->gtype; } else { obshap = ""; for (uint j = 0; j < d[i]->obshap.size(); j++) obshap += string(d[i]->obshap[j]) + "/"; obshap = obshap.substr(0, obshap.length()-1); fh << obshap; } } fh << "\n"; } fh.close(); return 0; } int write_sql(map &catalog, PopMap *pmap) { stringstream pop_name; pop_name << "batch_" << batch_id << ".markers.tsv"; string file = in_path + pop_name.str(); cerr << "Writing SQL markers file to '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening markers SQL file '" << file << "'\n"; exit(1); } fh.precision(fieldw); fh.setf(std::ios::fixed); fh << "# SQL ID" << "\t" << "Batch ID" << "\t" << "Catalog Locus ID" << "\t" << "\t" << "Total Genotypes" << "\t" << "Max" << "\t" << "Genotype Freqs" << "\t" << "F" << "\t" << "Mean Log Likelihood" << "\t" << "Genotype Map" << "\t" << "\n"; map::iterator it; CSLocus *loc; stringstream gtype_map; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; string freq = ""; double max = 0.0; int total = 0; gtype_map.str(""); if (loc->marker.length() > 0) { tally_haplotype_freq(loc, pmap, total, max, freq); // // Record the haplotype to genotype map. // map::iterator j; for (j = loc->gmap.begin(); j != loc->gmap.end(); j++) gtype_map << j->first << ":" << j->second << ";"; } fh << 0 << "\t" << batch_id << "\t" << loc->id << "\t" << "\t" // Marker << total << "\t" << max << "\t" << freq << "\t" << loc->f << "\t" << loc->lnl << "\t" << gtype_map.str() << "\t" << "\n"; } fh.close(); return 0; } int write_fasta(map &catalog, PopMap *pmap, map &samples, vector &sample_ids) { // // Write a FASTA file containing each allele from each locus from // each sample in the population. // stringstream pop_name; pop_name << "batch_" << batch_id << ".fa"; string file = in_path + pop_name.str(); cerr << "Writing population alleles to FASTA file '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening FASTA file '" << file << "'\n"; exit(1); } map >::iterator it; CSLocus *loc; Datum **d; char *seq; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; d = pmap->locus(loc->id); seq = new char[loc->len + 1]; strcpy(seq, loc->con); for (int j = 0; j < pmap->sample_cnt(); j++) { if (d[j] == NULL) continue; for (uint k = 0; k < d[j]->obshap.size(); k++) { for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; seq[col] = col < loc->len ? d[j]->obshap[k][i] : loc->con[col]; } fh << ">CLocus_" << loc->id << "_Sample_" << pmap->rev_sample_index(j) << "_Locus_" << d[j]->id << "_Allele_" << k << " [" << samples[pmap->rev_sample_index(j)]; if (strcmp(loc->loc.chr, "un") != 0) fh << "; " << loc->loc.chr << ", " << loc->sort_bp() + 1 << ", " << (loc->loc.strand == plus ? "+" : "-"); fh << "]\n" << seq << "\n"; } } delete [] seq; } } fh.close(); return 0; } int write_strict_fasta(map &catalog, PopMap *pmap, map &samples, vector &sample_ids) { // // Write a FASTA file containing each allele from each locus from // each sample in the population. // stringstream pop_name; pop_name << "batch_" << batch_id << ".strict.fa"; string file = in_path + pop_name.str(); cerr << "Writing strict population alleles to FASTA file '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening strict FASTA file '" << file << "'\n"; exit(1); } map >::iterator it; CSLocus *loc; Datum **d; char *seq; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; d = pmap->locus(loc->id); seq = new char[loc->len + 1]; strcpy(seq, loc->con); for (int j = 0; j < pmap->sample_cnt(); j++) { if (d[j] == NULL) continue; if (d[j]->obshap.size() > 2) continue; if (d[j]->obshap.size() == 1) { for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; seq[col] = col < loc->len ? d[j]->obshap[0][i] : loc->con[col]; } fh << ">CLocus_" << loc->id << "_Sample_" << pmap->rev_sample_index(j) << "_Locus_" << d[j]->id << "_Allele_" << 0 << " [" << samples[pmap->rev_sample_index(j)]; if (strcmp(loc->loc.chr, "un") != 0) fh << "; " << loc->loc.chr << ", " << loc->sort_bp() + 1 << ", " << (loc->loc.strand == plus ? "+" : "-"); fh << "]\n" << seq << "\n"; fh << ">CLocus_" << loc->id << "_Sample_" << pmap->rev_sample_index(j) << "_Locus_" << d[j]->id << "_Allele_" << 1 << " [" << samples[pmap->rev_sample_index(j)]; if (strcmp(loc->loc.chr, "un") != 0) fh << "; " << loc->loc.chr << ", " << loc->sort_bp() + 1 << ", " << (loc->loc.strand == plus ? "+" : "-"); fh << "]\n" << seq << "\n"; } else { for (uint k = 0; k < d[j]->obshap.size(); k++) { for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; seq[col] = col < loc->len ? d[j]->obshap[k][i] : loc->con[col]; } fh << ">CLocus_" << loc->id << "_Sample_" << pmap->rev_sample_index(j) << "_Locus_" << d[j]->id << "_Allele_" << k << " [" << samples[pmap->rev_sample_index(j)]; if (strcmp(loc->loc.chr, "un") != 0) fh << "; " << loc->loc.chr << ", " << loc->sort_bp() + 1 << ", " << (loc->loc.strand == plus ? "+" : "-"); fh << "]\n" << seq << "\n"; } } } delete [] seq; } } fh.close(); return 0; } int write_vcf_ordered(map &catalog, PopMap *pmap, PopSum *psum, map &samples, vector &sample_ids, map > &merge_map, ofstream &log_fh) { // // Write a VCF file as defined here: http://www.1000genomes.org/node/101 // stringstream pop_name; pop_name << "batch_" << batch_id << ".vcf"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening VCF file '" << file << "'\n"; exit(1); } // // Load SNP data so that model likelihoods can be output to VCF file. // cerr << "In preparation for VCF export, loading SNP data for " << samples.size() << " samples.\n"; populate_snp_calls(catalog, pmap, samples, sample_ids, merge_map); cerr << "Writing population data to VCF file '" << file << "'\n"; log_fh << "\n#\n# Generating SNP-based VCF export.\n#\n"; // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%Y%m%d", timeinfo); // // Output the header. // fh << "##fileformat=VCFv4.0\n" << "##fileDate=" << date << "\n" << "##source=\"Stacks v" << VERSION << "\"\n" << "##INFO=\n" << "##INFO=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "#CHROM" << "\t" << "POS" << "\t" << "ID" << "\t" << "REF" << "\t" << "ALT" << "\t" << "QUAL" << "\t" << "FILTER" << "\t" << "INFO" << "\t" << "FORMAT"; for (int i = 0; i < pmap->sample_cnt(); i++) fh << "\t" << samples[pmap->rev_sample_index(i)]; fh << "\n"; map >::iterator it; CSLocus *loc; Datum **d; int gt_1, gt_2, dp_1, dp_2; char p_allele, q_allele, p_str[32], q_str[32]; uint16_t col; int snp_index; // // We need to order the SNPs taking into account overlapping loci. // OLocTally *ord = new OLocTally(psum, log_fh); for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector sites; ord->order(sites, it->second); for (uint pos = 0; pos < sites.size(); pos++) { if (catalog.count(sites[pos]->loc_id) == 0) { cerr << "Unable to find locus id " << sites[pos]->loc_id << "\n"; continue; } loc = catalog[sites[pos]->loc_id]; col = sites[pos]->col; sprintf(p_str, "%0.3f", sites[pos]->p_freq); sprintf(q_str, "%0.3f", 1 - sites[pos]->p_freq); // // If on the negative strand, complement the alleles. // p_allele = loc->loc.strand == minus ? reverse(sites[pos]->p_allele) : sites[pos]->p_allele; q_allele = loc->loc.strand == minus ? reverse(sites[pos]->q_allele) : sites[pos]->q_allele; fh << loc->loc.chr << "\t" << loc->sort_bp(col) + 1 << "\t" << loc->id << "\t" << p_allele << "\t" // REFerence allele << q_allele << "\t" // ALTernate allele << "." << "\t" // QUAL << "PASS" << "\t" // FILTER << "NS=" << sites[pos]->num_indv << ";" // INFO << "AF=" << p_str << "," << q_str << "\t" // INFO << "GT:DP:AD:GL"; // FORMAT snp_index = loc->snp_index(col); if (snp_index < 0) { cerr << "Warning, unable to locate SNP call in column " << col << " for catalog locus " << loc->id << "\n"; fh << "\n"; continue; } d = pmap->locus(loc->id); for (int j = 0; j < pmap->sample_cnt(); j++) { fh << "\t"; if (d[j] == NULL) { // // Data does not exist. // fh << "./.:0:.,.:.,.,."; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "./.:" << d[j]->tot_depth << ":.,.:.,.,."; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) { // More than two potential alleles. fh << "./.:" << d[j]->tot_depth << ":.,.:.,.,."; } else { find_datum_allele_depths(d[j], snp_index, sites[pos]->p_allele, sites[pos]->q_allele, p_allele+q_allele, dp_1, dp_2); if (p_allele == 0) { gt_1 = q_allele == sites[pos]->p_allele ? 0 : 1; fh << gt_1 << "/" << gt_1 << ":" << d[j]->tot_depth << ":" << dp_1 << "," << dp_2; } else if (q_allele == 0) { gt_1 = p_allele == sites[pos]->p_allele ? 0 : 1; fh << gt_1 << "/" << gt_1 << ":" << d[j]->tot_depth << ":" << dp_1 << "," << dp_2; } else { gt_1 = p_allele == sites[pos]->p_allele ? 0 : 1; gt_2 = q_allele == sites[pos]->p_allele ? 0 : 1; fh << gt_1 << "/" << gt_2 << ":" << d[j]->tot_depth << ":" << dp_1 << "," << dp_2; } // // Output the likelihood for this model call. // if (col < d[j]->snps.size()) { fh << ":.," << d[j]->snps[col]->lratio << ",."; } else { cerr << "Warning, unable to locate SNP call in column " << col << " for catalog locus " << loc->id << ", tag ID " << d[j]->id << "\n"; fh << ":.,.,."; } } } } fh << "\n"; } } fh.close(); return 0; } int write_vcf(map &catalog, PopMap *pmap, PopSum *psum, map &samples, vector &sample_ids, map > &merge_map) { // // Write a VCF file as defined here: http://www.1000genomes.org/node/101 // stringstream pop_name; pop_name << "batch_" << batch_id << ".vcf"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening VCF file '" << file << "'\n"; exit(1); } cerr << "In preparation for VCF export, loading SNP data for " << samples.size() << " samples.\n"; // // Load SNP data so that model likelihoods can be output to VCF file. // populate_snp_calls(catalog, pmap, samples, sample_ids, merge_map); // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%Y%m%d", timeinfo); cerr << "Writing population data to VCF file '" << file << "'\n"; // // Output the header. // fh << "##fileformat=VCFv4.0\n" << "##fileDate=" << date << "\n" << "##source=\"Stacks v" << VERSION << "\"\n" << "##INFO=\n" << "##INFO=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "#CHROM" << "\t" << "POS" << "\t" << "ID" << "\t" << "REF" << "\t" << "ALT" << "\t" << "QUAL" << "\t" << "FILTER" << "\t" << "INFO" << "\t" << "FORMAT"; for (int i = 0; i < pmap->sample_cnt(); i++) fh << "\t" << samples[pmap->rev_sample_index(i)]; fh << "\n"; map >::iterator it; CSLocus *loc; Datum **d; LocTally *t; int gt_1, gt_2, dp_1, dp_2; double num_indv; char p_allele, q_allele, p_str[32], q_str[32]; int snp_index; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { // // We need to order the SNPs so negative and positive strand SNPs are properly ordered. // vector ordered_loci; uint col; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) ordered_loci.push_back(GenPos(loc->id, i, loc->sort_bp(col))); } } sort(ordered_loci.begin(), ordered_loci.end(), compare_genpos); for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; col = loc->snps[ordered_loci[pos].snp_index]->col; t = psum->locus_tally(loc->id); num_indv = (double) t->nucs[col].num_indv; sprintf(p_str, "%0.3f", t->nucs[col].p_freq); sprintf(q_str, "%0.3f", 1 - t->nucs[col].p_freq); // // If on the negative strand, complement the alleles. // p_allele = loc->loc.strand == minus ? reverse(t->nucs[col].p_allele) : t->nucs[col].p_allele; q_allele = loc->loc.strand == minus ? reverse(t->nucs[col].q_allele) : t->nucs[col].q_allele; fh << loc->loc.chr << "\t" << loc->sort_bp(col) + 1 << "\t" << loc->id << "\t" << p_allele << "\t" // REFerence allele << q_allele << "\t" // ALTernate allele << "." << "\t" // QUAL << "PASS" << "\t" // FILTER << "NS=" << num_indv << ";" // INFO << "AF=" << p_str << "," << q_str << "\t" // INFO << "GT:DP:AD:GL"; // FORMAT snp_index = loc->snp_index(col); if (snp_index < 0) { cerr << "Warning, unable to locate SNP call in column " << col << " for catalog locus " << loc->id << "\n"; fh << "\n"; continue; } d = pmap->locus(loc->id); for (int j = 0; j < pmap->sample_cnt(); j++) { fh << "\t"; if (d[j] == NULL) { // // Data does not exist. // fh << "./.:0:.,.:.,.,."; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "./.:" << d[j]->tot_depth << ":.,.:.,.,."; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, ordered_loci[pos].snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) { // More than two potential alleles. fh << "./.:" << d[j]->tot_depth << ":.,.:.,.,."; } else { find_datum_allele_depths(d[j], snp_index, t->nucs[col].p_allele, t->nucs[col].q_allele, p_allele+q_allele, dp_1, dp_2); if (p_allele == 0) { gt_1 = q_allele == t->nucs[col].p_allele ? 0 : 1; fh << gt_1 << "/" << gt_1 << ":" << d[j]->tot_depth << ":" << dp_1 << "," << dp_2; } else if (q_allele == 0) { gt_1 = p_allele == t->nucs[col].p_allele ? 0 : 1; fh << gt_1 << "/" << gt_1 << ":" << d[j]->tot_depth << ":" << dp_1 << "," << dp_2; } else { gt_1 = p_allele == t->nucs[col].p_allele ? 0 : 1; gt_2 = q_allele == t->nucs[col].p_allele ? 0 : 1; fh << gt_1 << "/" << gt_2 << ":" << d[j]->tot_depth << ":" << dp_1 << "," << dp_2; } // // Output the likelihood measure for this model call. // if (snp_index >= 0) { fh << ":.," << d[j]->snps[snp_index]->lratio << ",."; } else { cerr << "Warning, unable to locate SNP call in column " << col << " for catalog locus " << loc->id << ", tag ID " << d[j]->id << "\n"; fh << ":.,.,."; } } } } fh << "\n"; } } fh.close(); return 0; } int populate_snp_calls(map &catalog, PopMap *pmap, map &samples, vector &sample_ids, map > &merge_map) { map::iterator cit; map::iterator sit; CSLocus *loc; Datum *datum; SNPRes *snpr; SNP *snp; for (uint i = 0; i < sample_ids.size(); i++) { map snpres; load_snp_calls(in_path + samples[sample_ids[i]], snpres); for (cit = catalog.begin(); cit != catalog.end(); cit++) { loc = cit->second; datum = pmap->datum(loc->id, sample_ids[i]); if (datum != NULL && snpres.count(datum->id)) { if (merge_sites && merge_map.count(loc->id)) { datum_adjust_snp_positions(merge_map, loc, datum, snpres); } else { // // Deep copy the SNP objects. // snpr = snpres[datum->id]; for (uint j = 0; j < snpr->snps.size(); j++) { snp = new SNP; snp->col = snpr->snps[j]->col; snp->lratio = snpr->snps[j]->lratio; snp->rank_1 = snpr->snps[j]->rank_1; snp->rank_2 = snpr->snps[j]->rank_2; snp->rank_3 = snpr->snps[j]->rank_3; snp->rank_4 = snpr->snps[j]->rank_4; datum->snps.push_back(snp); } } } } for (sit = snpres.begin(); sit != snpres.end(); sit++) delete sit->second; } return 0; } int find_datum_allele_depths(Datum *d, int snp_index, char p_allele, char q_allele, int allele_cnt, int &dp_1, int &dp_2) { dp_1 = 0; dp_2 = 0; if (allele_cnt == 1) { // // There is a single observed haplotype for this locus, e.g. GA. // if (d->obshap.size() == 1) { if (d->obshap[0][snp_index] == p_allele) { dp_1 = d->depth[0]; dp_2 = 0; } else { dp_1 = 0; dp_2 = d->depth[0]; } } else { // // This SNP position is homozygous, but the locus is heterozygous, so there is more // than one observed haplotype, e.g. GA / TA. // if (d->obshap[0][snp_index] == p_allele) { dp_1 = d->tot_depth; dp_2 = 0; } else { dp_1 = 0; dp_2 = d->tot_depth; } } } else { // // This SNP position is heterozygous. // for (uint i = 0; i < d->obshap.size(); i++) { if (d->obshap[i][snp_index] == p_allele) dp_1 = d->depth[i]; else if (d->obshap[i][snp_index] == q_allele) dp_2 = d->depth[i]; } } if (dp_1 == 0 && dp_2 == 0) cerr << "Warning: Unable to find allele depths for datum " << d->id << "\n"; return 0; } int write_vcf_haplotypes(map &catalog, PopMap *pmap, PopSum *psum, map &samples, vector &sample_ids) { // // Write a VCF file as defined here: http://samtools.github.io/hts-specs/ // stringstream pop_name; pop_name << "batch_" << batch_id << ".haplotypes.vcf"; string file = in_path + pop_name.str(); cerr << "Writing population data haplotypes to VCF file '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening VCF file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%Y%m%d", timeinfo); // // Output the header. // fh << "##fileformat=VCFv4.2\n" << "##fileDate=" << date << "\n" << "##source=\"Stacks v" << VERSION << "\"\n" << "##INFO=\n" << "##INFO=\n" << "##FORMAT=\n" << "##FORMAT=\n" << "#CHROM" << "\t" << "POS" << "\t" << "ID" << "\t" << "REF" << "\t" << "ALT" << "\t" << "QUAL" << "\t" << "FILTER" << "\t" << "INFO" << "\t" << "FORMAT"; for (int i = 0; i < pmap->sample_cnt(); i++) fh << "\t" << samples[pmap->rev_sample_index(i)]; fh << "\n"; map >::iterator it; map::iterator hit; map hap_freq; map hap_index; vector > ordered_hap; CSLocus *loc; Datum **d; double num_indv, num_hap; char allele[id_len]; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; d = pmap->locus(loc->id); hap_freq.clear(); hap_index.clear(); ordered_hap.clear(); num_hap = count_haplotypes_at_locus(0, pmap->sample_cnt() - 1, d, hap_freq); if (num_hap == 0 || hap_freq.size() == 1) continue; num_indv = num_hap / 2.0; // // Order the haplotypes according to most frequent. Record the ordered position or each // haplotype and convert them from counts to frequencies. // for (hit = hap_freq.begin(); hit != hap_freq.end(); hit++) { ordered_hap.push_back(make_pair(hit->first, hit->second)); hit->second = hit->second / num_hap; } sort(ordered_hap.begin(), ordered_hap.end(), compare_pair_haplotype); for (uint i = 0; i < ordered_hap.size(); i++) hap_index[ordered_hap[i].first] = i; string alt_str, freq_str; for (uint i = 1; i < ordered_hap.size(); i++) { alt_str += ordered_hap[i].first; sprintf(allele, "%0.3f", hap_freq[ordered_hap[i].first]); freq_str += allele; if (i < ordered_hap.size() - 1) { alt_str += ","; freq_str += ","; } } fh << loc->loc.chr << "\t" << loc->sort_bp() + 1 << "\t" << loc->id << "\t" << ordered_hap[0].first << "\t" // REFerence haplotypes << alt_str << "\t" // ALTernate haplotypes << "." << "\t" // QUAL << "PASS" << "\t" // FILTER << "NS=" << num_indv << ";" // INFO << "AF=" << freq_str << "\t" // INFO << "GT:DP"; // FORMAT for (int j = 0; j < pmap->sample_cnt(); j++) { fh << "\t"; if (d[j] == NULL) { // // Data does not exist. // fh << "./.:0"; } else if (d[j]->obshap.size() > 2) { fh << "./.:" << d[j]->tot_depth; } else if (d[j]->obshap.size() == 1) { if(uncalled_haplotype(d[j]->obshap[0])) fh << "./.:" << d[j]->tot_depth; else fh << hap_index[d[j]->obshap[0]] << "/" << hap_index[d[j]->obshap[0]] << ":" << d[j]->tot_depth; } else { if(!uncalled_haplotype(d[j]->obshap[0]) && !uncalled_haplotype(d[j]->obshap[1])) fh << hap_index[d[j]->obshap[0]] << "/" << hap_index[d[j]->obshap[1]] << ":" << d[j]->tot_depth; else if (!uncalled_haplotype(d[j]->obshap[0])) fh << hap_index[d[j]->obshap[0]] << "/" << "." << ":" << d[j]->tot_depth; else if (!uncalled_haplotype(d[j]->obshap[1])) fh << "." << "/" << hap_index[d[j]->obshap[1]] << ":" << d[j]->tot_depth; } } fh << "\n"; } } fh.close(); return 0; } int write_genepop(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a GenePop file as defined here: http://kimura.univ-montp2.fr/~rousset/Genepop.htm // stringstream pop_name; pop_name << "batch_" << batch_id << ".genepop"; string file = in_path + pop_name.str(); cerr << "Writing population data to GenePop file '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening GenePop file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); // // Output the header line. // fh << "Stacks version " << VERSION << "; Genepop version 4.1.3; " << date << "\n"; map >::iterator pit; map::iterator it; CSLocus *loc; Datum **d; LocSum **s; LocTally *t; int start_index, end_index, col, pop_id; char p_allele, q_allele; // // Determine how many loci will be output, then output all the loci on the second line, comma-separated. // uint cnt = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; for (uint j = 0; j < loc->snps.size(); j++) { col = loc->snps[j]->col; t = psum->locus_tally(loc->id); if (t->nucs[col].allele_cnt != 2) continue; cnt++; } } uint i = 0; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; for (uint j = 0; j < loc->snps.size(); j++) { col = loc->snps[j]->col; t = psum->locus_tally(loc->id); // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; i++; fh << loc->id << "_" << col; if (i < cnt) fh << ","; } } fh << "\n"; map nuc_map; nuc_map['A'] = "01"; nuc_map['C'] = "02"; nuc_map['G'] = "03"; nuc_map['T'] = "04"; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; fh << "pop\n"; for (int j = start_index; j <= end_index; j++) { fh << samples[pmap->rev_sample_index(j)] << ","; for (it = catalog.begin(); it != catalog.end(); it++) { loc = it->second; d = pmap->locus(loc->id); s = psum->locus(loc->id); t = psum->locus_tally(loc->id); for (i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt != 2) continue; if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // fh << "\t0000"; } else if (d[j] == NULL) { // // Data does not exist. // fh << "\t0000"; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "\t0000"; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, i, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) { // More than two potential alleles. fh << "\t0000"; } else if (p_allele == 0) { fh << "\t" << nuc_map[q_allele] << nuc_map[q_allele]; } else if (q_allele == 0) { fh << "\t" << nuc_map[p_allele] << nuc_map[p_allele]; } else { fh << "\t" << nuc_map[p_allele] << nuc_map[q_allele]; } } } } fh << "\n"; } } fh.close(); return 0; } int write_genepop_ordered(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples, ofstream &log_fh) { // // Write a GenePop file as defined here: http://kimura.univ-montp2.fr/~rousset/Genepop.htm // stringstream pop_name; pop_name << "batch_" << batch_id << ".genepop"; string file = in_path + pop_name.str(); cerr << "Writing population data to GenePop file '" << file << "'\n"; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening GenePop file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); // // Output the header line. // fh << "Stacks version " << VERSION << "; Genepop version 4.1.3; " << date << "\n"; map > genome_sites; map >::iterator pit; map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; int start_index, end_index, pop_id; uint col, snp_index; char p_allele, q_allele; // // We need to order the SNPs to take into account overlapping loci. // OLocTally *ord = new OLocTally(psum, log_fh); // // Output all the loci on the second line, comma-separated. // int chrs = pmap->ordered_loci.size(); int cnt = 0; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &sites = genome_sites[it->first]; ord->order(sites, it->second); cnt++; for (uint pos = 0; pos < sites.size(); pos++) { fh << sites[pos]->loc_id << "_" << sites[pos]->col; if (cnt < chrs || pos < sites.size() - 1) fh << ","; } } fh << "\n"; map nuc_map; nuc_map['A'] = "01"; nuc_map['C'] = "02"; nuc_map['G'] = "03"; nuc_map['T'] = "04"; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; fh << "pop\n"; for (int j = start_index; j <= end_index; j++) { fh << samples[pmap->rev_sample_index(j)] << ","; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &sites = genome_sites[it->first]; for (uint pos = 0; pos < sites.size(); pos++) { loc = catalog[sites[pos]->loc_id]; s = psum->locus(loc->id); d = pmap->locus(loc->id); col = sites[pos]->col; if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // fh << "\t0000"; } else if (d[j] == NULL) { // // Data does not exist. // fh << "\t0000"; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "\t0000"; } else { snp_index = loc->snp_index(col); // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) { // More than two potential alleles. fh << "\t0000"; } else if (p_allele == 0) { fh << "\t" << nuc_map[q_allele] << nuc_map[q_allele]; } else if (q_allele == 0) { fh << "\t" << nuc_map[p_allele] << nuc_map[p_allele]; } else { fh << "\t" << nuc_map[p_allele] << nuc_map[q_allele]; } } } } fh << "\n"; } } fh.close(); return 0; } int write_structure(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a Structure file as defined here: http://pritch.bsd.uchicago.edu/structure.html // // To avoid linked SNPs (which Structure can't handle), we will only output the first // SNP from each variable locus. // stringstream pop_name; pop_name << "batch_" << batch_id << ".structure.tsv"; string file = in_path + pop_name.str(); cerr << "Writing population data to Structure file '" << file << "'..."; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Structure file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " Structure v2.3; " << date << "\n"; map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; LocTally *t; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) fh << "\t" << loc->id << "_" << col; } } } fh << "\n"; map nuc_map; nuc_map['A'] = "1"; nuc_map['C'] = "2"; nuc_map['G'] = "3"; nuc_map['T'] = "4"; map >::iterator pit; int start_index, end_index, pop_id, p; char p_allele, q_allele; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { p = psum->pop_index(pit->first); pop_id = pit->first; start_index = pit->second.first; end_index = pit->second.second; for (int j = start_index; j <= end_index; j++) { // // Output all the loci for this sample, printing only the p allele // fh << samples[pmap->rev_sample_index(j)] << "\t" << pop_id; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; if (s[p]->nucs[col].incompatible_site || s[p]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // fh << "\t" << "0"; } else if (d[j] == NULL) { // // Data does not exist. // fh << "\t" << "0"; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "\t" << "0"; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, i, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) fh << "\t" << "0"; else if (p_allele == 0) fh << "\t" << nuc_map[q_allele]; else fh << "\t" << nuc_map[p_allele]; } } } } fh << "\n"; // // Output all the loci for this sample again, now for the q allele // fh << samples[pmap->rev_sample_index(j)] << "\t" << pop_id; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt != 2) continue; if (s[p]->nucs[col].incompatible_site || s[p]->nucs[col].filtered_site) { fh << "\t" << "0"; } else if (d[j] == NULL) { fh << "\t" << "0"; } else if (d[j]->model[col] == 'U') { fh << "\t" << "0"; } else { tally_observed_haplotypes(d[j]->obshap, i, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) fh << "\t" << "0"; else if (q_allele == 0) fh << "\t" << nuc_map[p_allele]; else fh << "\t" << nuc_map[q_allele]; } } } } fh << "\n"; } } fh.close(); cerr << "done.\n"; return 0; } int write_structure_ordered(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples, ofstream &log_fh) { // // Write a Structure file as defined here: http://pritch.bsd.uchicago.edu/structure.html // // To avoid linked SNPs (which Structure can't handle), we will only output the first // SNP from each variable locus. // stringstream pop_name; pop_name << "batch_" << batch_id << ".structure.tsv"; string file = in_path + pop_name.str(); cerr << "Writing population data to Structure file '" << file << "'..."; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Structure file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " Structure v2.3; " << date << "\n"; map > genome_sites; map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; // // We need to order the SNPs to take into account overlapping loci. // OLocTally *ord = new OLocTally(psum, log_fh); for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &sites = genome_sites[it->first]; ord->order(sites, it->second); for (uint pos = 0; pos < sites.size(); pos++) fh << "\t" << sites[pos]->loc_id << "_" << sites[pos]->col; } fh << "\n"; map nuc_map; nuc_map['A'] = "1"; nuc_map['C'] = "2"; nuc_map['G'] = "3"; nuc_map['T'] = "4"; map >::iterator pit; int start_index, end_index, pop_id, p; char p_allele, q_allele; uint col, snp_index; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { p = psum->pop_index(pit->first); pop_id = pit->first; start_index = pit->second.first; end_index = pit->second.second; for (int j = start_index; j <= end_index; j++) { // // Output all the loci for this sample, printing only the p allele // fh << samples[pmap->rev_sample_index(j)] << "\t" << pop_id; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &sites = genome_sites[it->first]; for (uint pos = 0; pos < sites.size(); pos++) { loc = catalog[sites[pos]->loc_id]; s = psum->locus(loc->id); d = pmap->locus(loc->id); col = sites[pos]->col; if (s[p]->nucs[col].incompatible_site || s[p]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // fh << "\t" << "0"; } else if (d[j] == NULL) { // // Data does not exist. // fh << "\t" << "0"; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "\t" << "0"; } else { snp_index = loc->snp_index(col); // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) fh << "\t" << "0"; else if (p_allele == 0) fh << "\t" << nuc_map[q_allele]; else fh << "\t" << nuc_map[p_allele]; } } } fh << "\n"; // // Output all the loci for this sample again, now for the q allele // fh << samples[pmap->rev_sample_index(j)] << "\t" << pop_id; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { vector &sites = genome_sites[it->first]; for (uint pos = 0; pos < sites.size(); pos++) { loc = catalog[sites[pos]->loc_id]; s = psum->locus(loc->id); d = pmap->locus(loc->id); col = sites[pos]->col; if (s[p]->nucs[col].incompatible_site || s[p]->nucs[col].filtered_site) { fh << "\t" << "0"; } else if (d[j] == NULL) { fh << "\t" << "0"; } else if (d[j]->model[col] == 'U') { fh << "\t" << "0"; } else { snp_index = loc->snp_index(col); tally_observed_haplotypes(d[j]->obshap, snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) fh << "\t" << "0"; else if (q_allele == 0) fh << "\t" << nuc_map[p_allele]; else fh << "\t" << nuc_map[q_allele]; } } } fh << "\n"; } } fh.close(); cerr << "done.\n"; return 0; } int write_hzar(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a Hybrid Zone Analysis using R (HZAR) file as defined here: // http://cran.r-project.org/web/packages/hzar/hzar.pdf // stringstream pop_name; pop_name << "batch_" << batch_id << ".hzar.csv"; string file = in_path + pop_name.str(); cerr << "Writing population data to HZAR file '" << file << "'..."; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening HZAR file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " HZAR v0.2-5; " << date << "\n" << "Population" << "," << "Distance"; map >::iterator it; CSLocus *loc; LocSum **s; LocTally *t; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) { fh << "," << loc->id << "_" << col << ".A" << "," << loc->id << "_" << col << ".B" << "," << loc->id << "_" << col << ".N"; } } } } fh << "\n"; map >::iterator pit; int pop_id, p; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { p = psum->pop_index(pit->first); pop_id = pit->first; fh << pop_key[pop_id] << ","; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; if (s[p]->nucs[col].num_indv == 0 || s[p]->nucs[col].incompatible_site || s[p]->nucs[col].filtered_site) { fh << ",0,0,0"; continue; } if (t->nucs[col].p_allele == s[p]->nucs[col].p_nuc) fh << "," << s[p]->nucs[col].p << "," << 1 - s[p]->nucs[col].p << ","; else fh << "," << 1 - s[p]->nucs[col].p << "," << s[p]->nucs[col].p << ","; fh << s[p]->nucs[col].num_indv * 2; } } } fh << "\n"; } fh.close(); cerr << "done.\n"; return 0; } int write_treemix(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a TreeMix file (Pickrell and Pritchard, 2012 PLoS Genetics) // https://bitbucket.org/nygcresearch/treemix/wiki/Home // stringstream pop_name; pop_name << "batch_" << batch_id << ".treemix"; string file = in_path + pop_name.str(); cerr << "Writing population data to TreeMix file '" << file << "'; "; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening TreeMix file '" << file << "'\n"; exit(1); } pop_name << ".log"; file = in_path + pop_name.str(); cerr << "logging nucleotide positions to '" << file << "'..."; ofstream log_fh(file.c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening Phylip Log file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); log_fh << "# Stacks v" << VERSION << "; " << " TreeMix v1.1; " << date << "\n" << "# Line\tLocus ID\tColumn\tChr\tBasepair\n"; // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " TreeMix v1.1; " << date << "\n"; map >::iterator it; map >::iterator pit; CSLocus *loc; LocSum **s; LocTally *t; int p; // // Output a space-separated list of the populations on the first line. // stringstream sstr; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) sstr << pop_key[pit->first] << " "; fh << sstr.str().substr(0, sstr.str().length() - 1) << "\n"; double p_freq, p_cnt, q_cnt, allele_cnt; long int line = 1; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; sstr.str(""); // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { p = psum->pop_index(pit->first); if (s[p]->nucs[col].num_indv == 0 || s[p]->nucs[col].incompatible_site || s[p]->nucs[col].filtered_site) { sstr << "0,0 "; continue; } p_freq = (t->nucs[col].p_allele == s[p]->nucs[col].p_nuc) ? s[p]->nucs[col].p : 1 - s[p]->nucs[col].p; allele_cnt = s[p]->nucs[col].num_indv * 2; p_cnt = round(allele_cnt * p_freq); q_cnt = allele_cnt - p_cnt; sstr << (int) p_cnt << "," << (int) q_cnt << " "; } if (sstr.str().length() == 0) continue; fh << sstr.str().substr(0, sstr.str().length() - 1) << "\n"; log_fh << line << "\t" << loc->id << "\t" << col << "\t" << loc->loc.chr << "\t" << loc->sort_bp(col) + 1 << "\n"; line++; } } } fh.close(); log_fh.close(); cerr << "done.\n"; return 0; } int write_fastphase(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a fastPHASE file as defined here: http://stephenslab.uchicago.edu/software.html // // Data will be written as independent, bi-allelic SNPs. We will write one file per chromosome. // cerr << "Writing population data to fastPHASE files..."; map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; LocTally *t; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { stringstream pop_name; pop_name << "batch_" << batch_id << "." << it->first << ".fastphase.inp"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening fastPHASE file '" << file << "'\n"; exit(1); } // // Tally up the number of sites // int total_sites = 0; uint col; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) total_sites++; } } // // Output the total number of SNP sites and the number of individuals. // fh << samples.size() << "\n" << total_sites << "\n"; // // We need to determine an ordering that can take into account overlapping RAD sites. // vector ordered_loci; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) ordered_loci.push_back(GenPos(loc->id, i, loc->sort_bp(col))); } } sort(ordered_loci.begin(), ordered_loci.end(), compare_genpos); // // Output the position of each site according to its basepair. // fh << "P"; for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; col = loc->snps[ordered_loci[pos].snp_index]->col; fh << " " << ordered_loci[pos].bp; } fh << "\n"; // // Output a line of 'S' characters, one per site, indicating that these are SNP markers. // string snp_markers, gtypes_str; snp_markers.assign(total_sites, 'S'); fh << snp_markers << '\n'; // // Now output each sample name followed by a new line, then all of the genotypes for that sample // on two lines. // map >::iterator pit; int start_index, end_index, pop_id; char p_allele, q_allele; stringstream gtypes; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; for (int j = start_index; j <= end_index; j++) { // // Output all the loci for this sample, printing only the p allele // fh << samples[pmap->rev_sample_index(j)] << "\n"; gtypes.str(""); for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; col = loc->snps[ordered_loci[pos].snp_index]->col; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // gtypes << "? "; } else if (d[j] == NULL) { // // Data does not exist. // gtypes << "? "; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // gtypes << "? "; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, ordered_loci[pos].snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) gtypes << "? "; else if (p_allele == 0) gtypes << q_allele << " "; else gtypes << p_allele << " "; } } gtypes_str = gtypes.str(); fh << gtypes_str.substr(0, gtypes_str.length() - 1) << "\n"; // // Output all the loci for this sample again, now for the q allele // gtypes.str(""); for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; col = loc->snps[ordered_loci[pos].snp_index]->col; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); if (t->nucs[col].allele_cnt != 2) continue; if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { gtypes << "? "; } else if (d[j] == NULL) { gtypes << "? "; } else if (d[j]->model[col] == 'U') { gtypes << "? "; } else { tally_observed_haplotypes(d[j]->obshap, ordered_loci[pos].snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) gtypes << "? "; else if (q_allele == 0) gtypes << p_allele << " "; else gtypes << q_allele << " "; } } gtypes_str = gtypes.str(); fh << gtypes_str.substr(0, gtypes_str.length() - 1) << "\n"; } } fh.close(); } cerr << "done.\n"; return 0; } int write_phase(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a PHASE file as defined here: http://stephenslab.uchicago.edu/software.html // // Data will be written as mixture of multiple allele, linked RAD sites // (SNPs within a single RAD locus are already phased), and bi-allelic SNPs. We // will write one file per chromosome. // cerr << "Writing population data to PHASE files..."; map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; LocTally *t; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { stringstream pop_name; pop_name << "batch_" << batch_id << "." << it->first << ".phase.inp"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening PHASE file '" << file << "'\n"; exit(1); } // // We need to determine an ordering for all legitimate loci/SNPs. // uint col; vector ordered_loci; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); if (loc->snps.size() == 0) continue; // // Will we output this locus as a haplotype or as a SNP? // if (loc->snps.size() > 1) { // // Check that there aren't too many haplotypes (PHASE has a max of 50). // if (loc->alleles.size() > 40) continue; // // Iterate over the population to determine that this subset of the population // has data at this locus. // d = pmap->locus(loc->id); for (int j = 0; j < pmap->sample_cnt(); j++) { if (d[j] != NULL && d[j]->obshap.size() > 0 && d[j]->obshap.size() <= 2) { // // Data exists, and there are the correct number of haplotypes. // ordered_loci.push_back(GenPos(loc->id, 0, loc->sort_bp(), haplotype)); break; } } } else { col = loc->snps[0]->col; if (t->nucs[col].allele_cnt == 2) ordered_loci.push_back(GenPos(loc->id, 0, loc->sort_bp(col), snp)); } } sort(ordered_loci.begin(), ordered_loci.end(), compare_genpos); // // Output the total number of SNP sites and the number of individuals. // fh << samples.size() << "\n" << ordered_loci.size() << "\n"; // // Output the position of each site according to its basepair. // fh << "P"; for (uint pos = 0; pos < ordered_loci.size(); pos++) fh << " " << ordered_loci[pos].bp; fh << "\n"; // // Output a line of 'S' characters for SNP markers, 'M' characters for multiallelic haplotypes. // for (uint pos = 0; pos < ordered_loci.size(); pos++) { if (pos > 0) fh << " "; fh << (ordered_loci[pos].type == snp ? "S" : "M"); } fh << "\n"; // // Now output each sample name followed by a new line, then all of the genotypes for that sample // on two lines. // map >::iterator pit; string gtypes_str; bool found; int start_index, end_index, pop_id; char p_allele, q_allele; stringstream gtypes; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; for (int j = start_index; j <= end_index; j++) { // // Output all the loci for this sample, printing only the p allele // fh << samples[pmap->rev_sample_index(j)] << "\n"; gtypes.str(""); for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); // // Will we output this locus as a haplotype or as a SNP? // if (ordered_loci[pos].type == haplotype) { if (d[j] == NULL) { // // Data does not exist. // gtypes << "-1 "; } else { // // Data exists, output the first haplotype. We will assume the haplotypes are // numbered by their position in the loc->strings vector. // if (d[j]->obshap.size() > 2) { // cerr << "Warning: too many haplotypes, catalog locus: " << loc->id << "\n"; gtypes << "-1 "; } else { found = false; for (uint k = 0; k < loc->strings.size(); k++) if (d[j]->obshap[0] == loc->strings[k].first) { found = true; gtypes << k + 1 << " "; } if (found == false) cerr << "Unable to find haplotype " << d[j]->obshap[0] << " from individual " << samples[pmap->rev_sample_index(j)] << "; catalog locus: " << loc->id << "\n"; } } } else { col = loc->snps[ordered_loci[pos].snp_index]->col; if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // gtypes << "? "; } else if (d[j] == NULL) { // // Data does not exist. // gtypes << "? "; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // gtypes << "? "; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, ordered_loci[pos].snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) gtypes << "? "; else if (p_allele == 0) gtypes << q_allele << " "; else gtypes << p_allele << " "; } } } gtypes_str = gtypes.str(); fh << gtypes_str.substr(0, gtypes_str.length() - 1) << "\n"; // // Output all the loci for this sample again, now for the q allele // gtypes.str(""); for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); // // Will we output this locus as a haplotype or as a SNP? // if (ordered_loci[pos].type == haplotype) { if (d[j] == NULL) { // // Data does not exist. // gtypes << "-1 "; } else { // // Data exists, output the second haplotype. We will assume the haplotypes are // numbered by their position in the loc->strings vector. // if (d[j]->obshap.size() > 2) { // cerr << "Warning: too many haplotypes, catalog locus: " << loc->id << "\n"; gtypes << "-1 "; } else if (d[j]->obshap.size() > 1) { found = false; for (uint k = 0; k < loc->strings.size(); k++) if (d[j]->obshap[1] == loc->strings[k].first) { found = true; gtypes << k + 1 << " "; } if (found == false) cerr << "Unable to find haplotype " << d[j]->obshap[1] << " from individual " << samples[pmap->rev_sample_index(j)] << "; catalog locus: " << loc->id << "\n"; } else { found = false; for (uint k = 0; k < loc->strings.size(); k++) if (d[j]->obshap[0] == loc->strings[k].first) { found = true; gtypes << k + 1 << " "; } if (found == false) cerr << "Unable to find haplotype " << d[j]->obshap[0] << " from individual " << samples[pmap->rev_sample_index(j)] << "; catalog locus: " << loc->id << "\n"; } } } else { col = loc->snps[ordered_loci[pos].snp_index]->col; if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { gtypes << "? "; } else if (d[j] == NULL) { gtypes << "? "; } else if (d[j]->model[col] == 'U') { gtypes << "? "; } else { tally_observed_haplotypes(d[j]->obshap, ordered_loci[pos].snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) gtypes << "? "; else if (q_allele == 0) gtypes << p_allele << " "; else gtypes << q_allele << " "; } } } gtypes_str = gtypes.str(); fh << gtypes_str.substr(0, gtypes_str.length() - 1) << "\n"; } } fh.close(); } cerr << "done.\n"; return 0; } int write_plink(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a PLINK file as defined here: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml // // We will write one file per chromosome. // cerr << "Writing population data to PLINK files..."; // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; LocTally *t; string chr; // // First, write a markers file containing each marker, the chromosome it falls on, // an empty centiMorgan field, and finally its genomic position in basepairs. // stringstream pop_name; pop_name << "batch_" << batch_id << ".plink.map"; string file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening PLINK markers file '" << file << "'\n"; exit(1); } // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " PLINK v1.07; " << date << "\n"; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { chr = it->first; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) fh << chr << "\t" << loc->id << "_" << col << "\t" << "0\t" << loc->sort_bp(col) << "\n"; } } } fh.close(); // // Now output the genotypes in a separate file. // pop_name.str(""); pop_name << "batch_" << batch_id << ".plink.ped"; file = in_path + pop_name.str(); fh.open(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening PLINK markers file '" << file << "'\n"; exit(1); } fh << "# Stacks v" << VERSION << "; " << " PLINK v1.07; " << date << "\n"; map >::iterator pit; int start_index, end_index, pop_id; char p_allele, q_allele; // // marker, output the genotypes for each sample in two successive columns. // for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; for (int j = start_index; j <= end_index; j++) { fh << pit->first << "\t" << samples[pmap->rev_sample_index(j)] << "\t" << "0\t" // Paternal ID << "0\t" // Maternal ID << "0\t" // Sex << "0"; // Phenotype for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; // // Output the p and q alleles // if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // fh << "\t" << "0" << "\t" << "0"; } else if (d[j] == NULL) { // // Data does not exist. // fh << "\t" << "0" << "\t" << "0"; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "\t" << "0" << "\t" << "0"; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, i, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) fh << "\t" << "0" << "\t" << "0"; else if (p_allele == 0) fh << "\t" << q_allele << "\t" << q_allele; else if (q_allele == 0) fh << "\t" << p_allele << "\t" << p_allele; else fh << "\t" << p_allele << "\t" << q_allele; } } } } fh << "\n"; } } fh.close(); cerr << "done.\n"; return 0; } int write_beagle(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a Beagle file as defined here: http://faculty.washington.edu/browning/beagle/beagle.html // // We will write one file per chromosome, per population. // cerr << "Writing population data to unphased Beagle files..."; // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); map >::iterator it; CSLocus *loc; Datum **d; LocSum **s; LocTally *t; uint col; stringstream pop_name; string file; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { // // We need to determine an ordering that can take into account overlapping RAD sites. // vector ordered_loci; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { col = loc->snps[i]->col; if (t->nucs[col].allele_cnt == 2) ordered_loci.push_back(GenPos(loc->id, i, loc->sort_bp(col))); } } sort(ordered_loci.begin(), ordered_loci.end(), compare_genpos); // // Now output the genotypes in a separate file for each population. // map >::iterator pit; int start_index, end_index, pop_id; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; // // Open a markers file containing each marker, its genomic position in basepairs // and the two alternative alleles at this position. // pop_name.str(""); pop_name << "batch_" << batch_id << "." << pop_key[pit->first] << "-" << it->first << ".unphased.bgl.markers"; file = in_path + pop_name.str(); ofstream mfh(file.c_str(), ofstream::out); if (mfh.fail()) { cerr << "Error opening Beagle markers file '" << file << "'\n"; exit(1); } mfh << "# Stacks v" << VERSION << "; " << " Beagle v3.3; " << date << "\n"; // // Open the genotypes file. // pop_name.str(""); pop_name << "batch_" << batch_id << "." << pop_key[pit->first] << "-" << it->first << ".unphased.bgl"; file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Beagle genotypes file '" << file << "'\n"; exit(1); } fh << "# Stacks v" << VERSION << "; " << " Beagle v3.3; " << date << "\n"; char p_allele, q_allele; // // Output a list of all the samples in this population. // fh << "I\tid"; for (int j = start_index; j <= end_index; j++) fh << "\t" << samples[pmap->rev_sample_index(j)] << "\t" << samples[pmap->rev_sample_index(j)]; fh << "\n"; // // Output population IDs for each sample. // fh << "S\tid"; for (int j = start_index; j <= end_index; j++) fh << "\t" << pit->first << "\t" << pit->first; fh << "\n"; // // For each marker, output the genotypes for each sample in two successive columns. // for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; s = psum->locus(loc->id); d = pmap->locus(loc->id); t = psum->locus_tally(loc->id); col = loc->snps[ordered_loci[pos].snp_index]->col; // // If this site is fixed in all populations or has too many alleles don't output it. // if (t->nucs[col].allele_cnt != 2) continue; // // If this site is monomorphic in this population don't output it. // if (s[pop_id]->nucs[col].pi == 0.0) continue; // // Output this locus to the markers file. // mfh << loc->id << "_" << col << "\t" << loc->sort_bp(col) << "\t" << t->nucs[col].p_allele << "\t" << t->nucs[col].q_allele << "\n"; fh << "M" << "\t" << loc->id << "_" << col; for (int j = start_index; j <= end_index; j++) { // // Output the p allele // if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { // // This site contains more than two alleles in this population or was filtered // due to a minor allele frequency that is too low. // fh << "\t" << "?"; } else if (d[j] == NULL) { // // Data does not exist. // fh << "\t" << "?"; } else if (d[j]->model[col] == 'U') { // // Data exists, but the model call was uncertain. // fh << "\t" << "?"; } else { // // Tally up the nucleotide calls. // tally_observed_haplotypes(d[j]->obshap, ordered_loci[pos].snp_index, p_allele, q_allele); if (p_allele == 0 && q_allele == 0) fh << "\t" << "?"; else if (p_allele == 0) fh << "\t" << q_allele; else fh << "\t" << p_allele; } // // Now output the q allele // if (s[pop_id]->nucs[col].incompatible_site || s[pop_id]->nucs[col].filtered_site) { fh << "\t" << "?"; } else if (d[j] == NULL) { fh << "\t" << "?"; } else if (d[j]->model[col] == 'U') { fh << "\t" << "?"; } else { if (p_allele == 0 && q_allele == 0) fh << "\t" << "?"; else if (q_allele == 0) fh << "\t" << p_allele; else fh << "\t" << q_allele; } } fh << "\n"; } fh.close(); mfh.close(); } } cerr << "done.\n"; return 0; } int write_beagle_phased(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // Write a Beagle file as a set of haplotpyes as defined here: // http://faculty.washington.edu/browning/beagle/beagle.html // // We will write one file per chromosome. // cerr << "Writing population data to phased Beagle files..."; // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); map >::iterator it; CSLocus *loc; Datum **d; stringstream pop_name; string file; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { // // We need to determine an ordering for all legitimate loci/SNPs. // vector ordered_loci; for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; if (loc->snps.size() == 0) continue; // // Check that there aren't too many haplotypes (PHASE has a max of 50). // if (loc->alleles.size() > 40) continue; // // Iterate over the population to determine that this subset of the population // has data at this locus. // d = pmap->locus(loc->id); for (int j = 0; j < pmap->sample_cnt(); j++) { if (d[j] != NULL && d[j]->obshap.size() > 0 && d[j]->obshap.size() <= 2) { // // Data exists, and their are the corrent number of haplotypes. // ordered_loci.push_back(GenPos(loc->id, 0, loc->sort_bp(), haplotype)); break; } } } sort(ordered_loci.begin(), ordered_loci.end(), compare_genpos); // // Now output the genotypes in a separate file for each population. // map >::iterator pit; int start_index, end_index, pop_id; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = psum->pop_index(pit->first); start_index = pit->second.first; end_index = pit->second.second; // // Open a file for writing the markers: their genomic position in basepairs // and the two alternative alleles at this position. // pop_name.str(""); pop_name << "batch_" << batch_id << "." << pop_key[pit->first] << "-" << it->first << ".phased.bgl.markers"; file = in_path + pop_name.str(); ofstream mfh(file.c_str(), ofstream::out); if (mfh.fail()) { cerr << "Error opening Beagle markers file '" << file << "'\n"; exit(1); } mfh << "# Stacks v" << VERSION << "; " << " Beagle v3.3; " << date << "\n"; // // Now output the haplotypes in a separate file. // pop_name.str(""); pop_name << "batch_" << batch_id << "." << pop_key[pit->first] << "-" << it->first << ".phased.bgl"; file = in_path + pop_name.str(); ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Beagle markers file '" << file << "'\n"; exit(1); } fh << "# Stacks v" << VERSION << "; " << " Beagle v3.3; " << date << "\n"; // // Output a list of all the samples in the data set. // fh << "I\tid"; for (int j = start_index; j <= end_index; j++) fh << "\t" << samples[pmap->rev_sample_index(j)] << "\t" << samples[pmap->rev_sample_index(j)]; fh << "\n"; // // Output population IDs for each sample. // fh << "S\tid"; for (int j = start_index; j <= end_index; j++) fh << "\t" << pop_id << "\t" << pop_id; fh << "\n"; for (uint pos = 0; pos < ordered_loci.size(); pos++) { loc = catalog[ordered_loci[pos].id]; d = pmap->locus(loc->id); // // If this locus is monomorphic in this population don't output it. // set haplotypes; for (int j = start_index; j <= end_index; j++) { if (d[j] == NULL) continue; if (d[j]->obshap.size() == 2) { haplotypes.insert(d[j]->obshap[0]); haplotypes.insert(d[j]->obshap[1]); } else { haplotypes.insert(d[j]->obshap[0]); } } if (haplotypes.size() == 1) continue; // // Output this locus to the markers file. // mfh << loc->id << "\t" << loc->sort_bp(); for (uint j = 0; j < loc->strings.size(); j++) mfh << "\t" << loc->strings[j].first; mfh << "\n"; // // For each marker, output the genotypes for each sample in two successive columns. // fh << "M" << "\t" << loc->id; for (int j = start_index; j <= end_index; j++) { // // Output the p and the q haplotype // if (d[j] == NULL) { // // Data does not exist. // fh << "\t" << "?" << "\t" << "?"; } else { // // Data exists, output the first haplotype. We will assume the haplotypes are // numbered by their position in the loc->strings vector. // if (d[j]->obshap.size() > 2) fh << "\t" << "?" << "\t" << "?"; else if (d[j]->obshap.size() == 2) fh << "\t" << d[j]->obshap[0] << "\t" << d[j]->obshap[1]; else fh << "\t" << d[j]->obshap[0] << "\t" << d[j]->obshap[0]; } } fh << "\n"; } fh.close(); mfh.close(); } } cerr << "done.\n"; return 0; } int write_phylip(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // We want to find loci where each locus is fixed within a population but variable between populations. // // We will write those loci to a Phylip file as defined here: // http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles // stringstream pop_name; pop_name << "batch_" << batch_id << ".phylip"; string file = in_path + pop_name.str(); cerr << "Writing population data to Phylip file '" << file << "'; "; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Phylip file '" << file << "'\n"; exit(1); } pop_name << ".log"; file = in_path + pop_name.str(); cerr << "logging nucleotide positions to '" << file << "'..."; ofstream log_fh(file.c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening Phylip Log file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); log_fh << "# Stacks v" << VERSION << "; " << " Phylip sequential; " << date << "\n" << "# Seq Pos\tLocus ID\tColumn\tPopulation\n"; map >::iterator it; CSLocus *loc; LocSum **s; LocTally *t; map >::iterator pit; int pop_cnt = psum->pop_cnt(); int pop_id; char nuc; // // A map storing, for each population, the concatenated list of interspecific nucleotides. // map interspecific_nucs; int index = 0; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (phylip_var == false) { // // We are looking for loci that are fixed within each population, but are // variable between one or more populations. // if (t->nucs[col].fixed == true || t->nucs[col].allele_cnt != 2 || t->nucs[col].pop_cnt < 2) continue; bool fixed_within = true; for (int j = 0; j < pop_cnt; j++) { if (s[j]->nucs[col].num_indv == 0) continue; if (s[j]->nucs[col].fixed == false) { fixed_within = false; break; } } if (fixed_within == false) continue; log_fh << index << "\t" << loc->id << "\t" << col << "\t"; for (int j = 0; j < pop_cnt; j++) { pop_id = psum->rev_pop_index(j); if (s[j]->nucs[col].num_indv > 0) { interspecific_nucs[pop_id] += s[j]->nucs[col].p_nuc; log_fh << pop_key[pop_id] << ":" << s[j]->nucs[col].p_nuc << ","; } else { interspecific_nucs[pop_id] += 'N'; log_fh << pop_key[pop_id] << ":N" << ","; } } log_fh << "\n"; index++; } else { // // Encode SNPs that are variable within a population as well, using IUPAC notation: // http://en.wikipedia.org/wiki/Nucleic_acid_notation#IUPAC_notation // if (t->nucs[col].allele_cnt != 2) continue; log_fh << index << "\t" << loc->id << "\t" << col << "\t"; for (int j = 0; j < pop_cnt; j++) { pop_id = psum->rev_pop_index(j); switch(s[j]->nucs[col].p_nuc) { case 0: nuc = 'N'; break; case 'A': switch(s[j]->nucs[col].q_nuc) { case 'C': nuc = 'M'; break; case 'G': nuc = 'R'; break; case 'T': nuc = 'W'; break; case 0: nuc = 'A'; break; } break; case 'C': switch(s[j]->nucs[col].q_nuc) { case 'A': nuc = 'M'; break; case 'G': nuc = 'S'; break; case 'T': nuc = 'Y'; break; case 0: nuc = 'C'; break; } break; case 'G': switch(s[j]->nucs[col].q_nuc) { case 'A': nuc = 'R'; break; case 'C': nuc = 'S'; break; case 'T': nuc = 'K'; break; case 0: nuc = 'G'; break; } break; case 'T': switch(s[j]->nucs[col].q_nuc) { case 'A': nuc = 'W'; break; case 'C': nuc = 'Y'; break; case 'G': nuc = 'K'; break; case 0: nuc = 'T'; break; } break; } interspecific_nucs[pop_id] += nuc; log_fh << pop_key[pop_id] << ":" << nuc << ","; } log_fh << "\n"; index++; } } } } if (interspecific_nucs.size() == 0) { cerr << " No data is available to write to the Phylip file.\n"; return 0; } char id_str[id_len]; uint len; fh << pop_indexes.size() << " " << interspecific_nucs.begin()->second.length() << "\n"; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = pit->first; sprintf(id_str, "%s", pop_key[pop_id].c_str()); len = strlen(id_str); for (uint j = len; j < 10; j++) id_str[j] = ' '; id_str[9] = '\0'; fh << id_str << " " << interspecific_nucs[pop_id] << "\n"; } // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " Phylip sequential; " << date << "\n"; fh.close(); log_fh.close(); cerr << "done.\n"; return 0; } int write_fullseq_phylip(map &catalog, PopMap *pmap, PopSum *psum, map > &pop_indexes, map &samples) { // // We want to write all variable loci in Phylip interleaved format. Polymorphic positions // will be encoded using IUPAC notation. // // We will write those loci to a Phylip file as defined here: // http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles // stringstream pop_name; pop_name << "batch_" << batch_id << ".fullseq.phylip"; string file = in_path + pop_name.str(); cerr << "Writing full sequence population data to Phylip file '" << file << "'; "; ofstream fh(file.c_str(), ofstream::out); if (fh.fail()) { cerr << "Error opening Phylip file '" << file << "'\n"; exit(1); } // // We will also write a file that allows us to specify each RAD locus as a separate partition // for use in phylogenetics programs. // pop_name.str(""); pop_name << "batch_" << batch_id << ".fullseq.partitions.phylip"; file = in_path + pop_name.str(); ofstream par_fh(file.c_str(), ofstream::out); if (par_fh.fail()) { cerr << "Error opening Phylip partitions file '" << file << "'\n"; exit(1); } pop_name.str(""); pop_name << "batch_" << batch_id << "fullseq.phylip.log"; file = in_path + pop_name.str(); cerr << "logging nucleotide positions to '" << file << "'..."; ofstream log_fh(file.c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening Phylip Log file '" << file << "'\n"; exit(1); } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%B %d, %Y", timeinfo); log_fh << "# Stacks v" << VERSION << "; " << " Phylip interleaved; " << date << "\n" << "# Locus ID\tLine Number"; if (loci_ordered) log_fh << "\tChr\tBasepair"; log_fh << "\n"; map >::iterator it; CSLocus *loc; LocSum **s; LocTally *t; map >::iterator pit; int pop_cnt = psum->pop_cnt(); int pop_id; char nuc; bool include; char id_str[id_len]; uint len = 0; // // Determine the length of sequence we will output. // for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; t = psum->locus_tally(loc->id); include = true; for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt != 2) include = false; } if (include) len += strlen(loc->con); } } map outstrs; fh << pop_indexes.size() << " " << len << "\n"; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = pit->first; outstrs[pop_id] = ""; sprintf(id_str, "%s", pop_key[pop_id].c_str()); len = strlen(id_str); for (uint j = len; j < 10; j++) id_str[j] = ' '; id_str[9] = '\0'; outstrs[pop_id] += string(id_str) + " "; } char *seq; int line = 1; int index = 1; int cnt = 1; for (it = pmap->ordered_loci.begin(); it != pmap->ordered_loci.end(); it++) { for (uint pos = 0; pos < it->second.size(); pos++) { loc = it->second[pos]; s = psum->locus(loc->id); t = psum->locus_tally(loc->id); include = true; for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; if (t->nucs[col].allele_cnt != 2) include = false; } if (!include) continue; seq = new char[loc->len + 1]; strcpy(seq, loc->con); for (int j = 0; j < pop_cnt; j++) { pop_id = psum->rev_pop_index(j); for (uint i = 0; i < loc->snps.size(); i++) { uint col = loc->snps[i]->col; // // Encode SNPs that are variable within a population using IUPAC notation: // http://en.wikipedia.org/wiki/Nucleic_acid_notation#IUPAC_notation // switch(s[j]->nucs[col].p_nuc) { case 0: nuc = 'N'; break; case 'A': switch(s[j]->nucs[col].q_nuc) { case 'C': nuc = 'M'; break; case 'G': nuc = 'R'; break; case 'T': nuc = 'W'; break; case 0: nuc = 'A'; break; } break; case 'C': switch(s[j]->nucs[col].q_nuc) { case 'A': nuc = 'M'; break; case 'G': nuc = 'S'; break; case 'T': nuc = 'Y'; break; case 0: nuc = 'C'; break; } break; case 'G': switch(s[j]->nucs[col].q_nuc) { case 'A': nuc = 'R'; break; case 'C': nuc = 'S'; break; case 'T': nuc = 'K'; break; case 0: nuc = 'G'; break; } break; case 'T': switch(s[j]->nucs[col].q_nuc) { case 'A': nuc = 'W'; break; case 'C': nuc = 'Y'; break; case 'G': nuc = 'K'; break; case 0: nuc = 'T'; break; } break; } seq[col] = nuc; } outstrs[pop_id] += string(seq); } delete [] seq; log_fh << line << "\t" << loc->id; if (loci_ordered) log_fh << "\t" << loc->loc.chr << "\t" << loc->sort_bp() + 1; log_fh << "\n"; for (pit = pop_indexes.begin(); pit != pop_indexes.end(); pit++) { pop_id = pit->first; fh << outstrs[pop_id] << "\n"; outstrs[pop_id] = ""; line++; } fh << "\n"; line++; par_fh << "DNA, p" << cnt << "=" << index << "-" << index + loc->len - 1 << "\n"; index += loc->len; cnt++; } } // // Output the header. // fh << "# Stacks v" << VERSION << "; " << " Phylip interleaved; " << date << "\n"; fh.close(); par_fh.close(); log_fh.close(); cerr << "done.\n"; return 0; } int tally_ref_alleles(LocSum **s, int pop_cnt, int snp_index, char &p_allele, char &q_allele) { int nucs[4] = {0}; char nuc[2]; for (int j = 0; j < pop_cnt; j++) { nuc[0] = 0; nuc[1] = 0; nuc[0] = s[j]->nucs[snp_index].p_nuc; nuc[1] = s[j]->nucs[snp_index].q_nuc; for (uint k = 0; k < 2; k++) switch(nuc[k]) { case 'A': case 'a': nucs[0]++; break; case 'C': case 'c': nucs[1]++; break; case 'G': case 'g': nucs[2]++; break; case 'T': case 't': nucs[3]++; break; } } // // Determine how many alleles are present at this position in this population. // We cannot deal with more than two alternative alleles, if there are more than two // in a single population, print a warning and exclude this nucleotide position. // int i; int allele_cnt = 0; for (i = 0; i < 4; i++) if (nucs[i] > 0) allele_cnt++; if (allele_cnt > 2) { p_allele = 0; q_allele = 0; return 0; } // // Record which nucleotide is the P allele and which is the Q allele. // p_allele = 0; q_allele = 0; i = 0; while (p_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 0: p_allele = 'A'; break; case 1: p_allele = 'C'; break; case 2: p_allele = 'G'; break; case 3: p_allele = 'T'; break; } } i++; } while (q_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 1: q_allele = 'C'; break; case 2: q_allele = 'G'; break; case 3: q_allele = 'T'; break; } } i++; } return 1; } int tally_observed_haplotypes(vector &obshap, int snp_index, char &p_allele, char &q_allele) { int nucs[4] = {0}; char nuc; // // Pull each allele for this SNP from the observed haplotype. // for (uint j = 0; j < obshap.size(); j++) { nuc = obshap[j][snp_index]; switch(nuc) { case 'A': case 'a': nucs[0]++; break; case 'C': case 'c': nucs[1]++; break; case 'G': case 'g': nucs[2]++; break; case 'T': case 't': nucs[3]++; break; } } // // Determine how many alleles are present at this position in this population. // We cannot deal with more than two alternative alleles, if there are more than two // in a single population, print a warning and exclude this nucleotide position. // int i; int allele_cnt = 0; for (i = 0; i < 4; i++) if (nucs[i] > 0) allele_cnt++; if (allele_cnt > 2) { p_allele = 0; q_allele = 0; return -1; } // // Record which nucleotide is the P allele and which is the Q allele. // p_allele = 0; q_allele = 0; i = 0; while (p_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 0: p_allele = 'A'; break; case 1: p_allele = 'C'; break; case 2: p_allele = 'G'; break; case 3: p_allele = 'T'; break; } } i++; } while (q_allele == 0 && i < 4) { if (nucs[i] > 0) { switch(i) { case 1: q_allele = 'C'; break; case 2: q_allele = 'G'; break; case 3: q_allele = 'T'; break; } } i++; } return 0; } int load_marker_list(string path, set &list) { char line[id_len]; ifstream fh(path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening white/black list file '" << path << "'\n"; exit(1); } int marker; char *p, *e; while (fh.good()) { fh.getline(line, id_len); if (strlen(line) == 0) continue; // // Skip commented lines. // for (p = line; isspace(*p) && *p != '\0'; p++); if (*p == '#') continue; marker = (int) strtol(line, &e, 10); if (*e == '\0') list.insert(marker); } fh.close(); if (list.size() == 0) { cerr << "Unable to load any markers from '" << path << "'\n"; exit(1); } return 0; } int load_marker_column_list(string path, map > &list) { char line[id_len]; ifstream fh(path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening white/black list file '" << path << "'\n"; exit(1); } vector parts; uint marker, col; char *p, *e; uint line_num = 1; while (fh.good()) { fh.getline(line, id_len); if (strlen(line) == 0) continue; // // Skip commented lines. // for (p = line; isspace(*p) && *p != '\0'; p++); if (*p == '#') continue; // // Parse the whitelist, we expect: // [] // parse_tsv(line, parts); if (parts.size() > 2) { cerr << "Too many columns in whitelist " << path << "' at line " << line_num << "\n"; exit(1); } else if (parts.size() == 2) { marker = (int) strtol(parts[0].c_str(), &e, 10); if (*e != '\0') { cerr << "Unable to parse whitelist, '" << path << "' at line " << line_num << "\n"; exit(1); } col = (int) strtol(parts[1].c_str(), &e, 10); if (*e != '\0') { cerr << "Unable to parse whitelist, '" << path << "' at line " << line_num << "\n"; exit(1); } list[marker].insert(col); } else { marker = (int) strtol(parts[0].c_str(), &e, 10); if (*e != '\0') { cerr << "Unable to parse whitelist, '" << path << "' at line " << line << "\n"; exit(1); } list.insert(make_pair(marker, std::set())); } line_num++; } fh.close(); if (list.size() == 0) { cerr << "Unable to load any markers from '" << path << "'\n"; help(); } return 0; } int build_file_list(vector > &files, map > &pop_indexes, map > &grp_members) { char line[max_len]; vector parts; map pop_key_rev, grp_key_rev; set pop_names, grp_names; string f; uint len; if (pmap_path.length() > 0) { cerr << "Parsing population map.\n"; ifstream fh(pmap_path.c_str(), ifstream::in); if (fh.fail()) { cerr << "Error opening population map '" << pmap_path << "'\n"; return 0; } uint pop_id = 0; uint grp_id = 0; while (fh.good()) { fh.getline(line, max_len); len = strlen(line); if (len == 0) continue; // // Check that there is no carraige return in the buffer. // if (line[len - 1] == '\r') line[len - 1] = '\0'; // // Ignore comments // if (line[0] == '#') continue; // // Parse the population map, we expect: // [] // parse_tsv(line, parts); if (parts.size() < 2 || parts.size() > 3) { cerr << "Population map is not formated correctly: expecting two or three, tab separated columns, found " << parts.size() << ".\n"; return 0; } // // Have we seen this population or group before? // if (pop_names.count(parts[1]) == 0) { pop_names.insert(parts[1]); pop_id++; pop_key[pop_id] = parts[1]; pop_key_rev[parts[1]] = pop_id; // // If this is the first time we have seen this population, but not the // first time we have seen this group, add the population to the group list. // if (parts.size() == 3 && grp_key_rev.count(parts[2]) > 0) grp_members[grp_key_rev[parts[2]]].push_back(pop_id); } if (parts.size() == 3 && grp_names.count(parts[2]) == 0) { grp_names.insert(parts[2]); grp_id++; grp_key[grp_id] = parts[2]; grp_key_rev[parts[2]] = grp_id; // // Associate the current population with the group. // grp_members[grp_id].push_back(pop_id); } // // Test that file exists before adding to list. // ifstream test_fh; gzFile gz_test_fh; f = in_path.c_str() + parts[0] + ".matches.tsv"; test_fh.open(f.c_str()); if (test_fh.fail()) { // // Test for a gzipped file. // f = in_path.c_str() + parts[0] + ".matches.tsv.gz"; gz_test_fh = gzopen(f.c_str(), "rb"); if (!gz_test_fh) { cerr << " Unable to find " << f.c_str() << ", excluding it from the analysis.\n"; } else { gzclose(gz_test_fh); files.push_back(make_pair(pop_key_rev[parts[1]], parts[0])); } } else { test_fh.close(); files.push_back(make_pair(pop_key_rev[parts[1]], parts[0])); } } fh.close(); } else { cerr << "No population map specified, building file list.\n"; // // If no population map is specified, read all the files from the Stacks directory. // uint pos; string file; struct dirent *direntry; DIR *dir = opendir(in_path.c_str()); if (dir == NULL) { cerr << "Unable to open directory '" << in_path << "' for reading.\n"; exit(1); } while ((direntry = readdir(dir)) != NULL) { file = direntry->d_name; if (file == "." || file == "..") continue; if (file.substr(0, 6) == "batch_") continue; pos = file.rfind(".tags.tsv"); if (pos < file.length()) { files.push_back(make_pair(1, file.substr(0, pos))); } else { pos = file.rfind(".tags.tsv.gz"); if (pos < file.length()) files.push_back(make_pair(1, file.substr(0, pos))); } } pop_key[1] = "1"; closedir(dir); } if (files.size() == 0) { cerr << "Unable to locate any input files to process within '" << in_path << "'\n"; return 0; } // // Sort the files according to population ID. // sort(files.begin(), files.end(), compare_pop_map); cerr << "Found " << files.size() << " input file(s).\n"; // // Determine the start/end index for each population in the files array. // int start = 0; int end = 0; int pop_id = files[0].first; do { end++; if (pop_id != files[end].first) { pop_indexes[pop_id] = make_pair(start, end - 1); start = end; pop_id = files[end].first; } } while (end < (int) files.size()); pop_indexes.size() == 1 ? cerr << " " << pop_indexes.size() << " population found\n" : cerr << " " << pop_indexes.size() << " populations found\n"; if (population_limit > (int) pop_indexes.size()) { cerr << "Population limit (" << population_limit << ") larger than number of popualtions present, adjusting parameter to " << pop_indexes.size() << "\n"; population_limit = pop_indexes.size(); } map >::iterator it; for (it = pop_indexes.begin(); it != pop_indexes.end(); it++) { start = it->second.first; end = it->second.second; cerr << " " << pop_key[it->first] << ": "; for (int i = start; i <= end; i++) { cerr << files[i].second; if (i < end) cerr << ", "; } cerr << "\n"; } // // If no group membership is specified in the population map, create a default // group with each population ID as a member. // if (grp_members.size() == 0) { for (it = pop_indexes.begin(); it != pop_indexes.end(); it++) grp_members[1].push_back(it->first); grp_key[1] = "1"; } grp_members.size() == 1 ? cerr << " " << grp_members.size() << " group of populations found\n" : cerr << " " << grp_members.size() << " groups of populations found\n"; map >::iterator git; for (git = grp_members.begin(); git != grp_members.end(); git++) { cerr << " " << grp_key[git->first] << ": "; for (uint i = 0; i < git->second.size(); i++) { cerr << pop_key[git->second[i]]; if (i < git->second.size() - 1) cerr << ", "; } cerr << "\n"; } return 1; } bool compare_pop_map(pair a, pair b) { if (a.first == b.first) return (a.second < b.second); return (a.first < b.first); } bool hap_compare(pair a, pair b) { return (a.second > b.second); } bool compare_genpos(GenPos a, GenPos b) { return (a.bp < b.bp); } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"verbose", no_argument, NULL, 'd'}, {"sql", no_argument, NULL, 's'}, {"vcf", no_argument, NULL, 'V'}, {"vcf_haplotypes", no_argument, NULL, 'n'}, {"fasta", no_argument, NULL, 'F'}, {"fasta_strict", no_argument, NULL, 'J'}, {"structure", no_argument, NULL, 'S'}, {"fastphase", no_argument, NULL, 'A'}, {"phase", no_argument, NULL, 'C'}, {"beagle", no_argument, NULL, 'E'}, {"beagle_phased", no_argument, NULL, 'H'}, {"plink", no_argument, NULL, 'K'}, {"genomic", no_argument, NULL, 'g'}, {"genepop", no_argument, NULL, 'G'}, {"phylip", no_argument, NULL, 'Y'}, {"phylip_var", no_argument, NULL, 'L'}, {"phylip_var_all", no_argument, NULL, 'T'}, {"hzar", no_argument, NULL, 'Z'}, {"treemix", no_argument, NULL, 'U'}, {"merge_sites", no_argument, NULL, 'D'}, {"window_size", required_argument, NULL, 'w'}, {"num_threads", required_argument, NULL, 't'}, {"batch_id", required_argument, NULL, 'b'}, {"in_path", required_argument, NULL, 'P'}, {"progeny", required_argument, NULL, 'r'}, {"min_depth", required_argument, NULL, 'm'}, {"renz", required_argument, NULL, 'e'}, {"pop_map", required_argument, NULL, 'M'}, {"whitelist", required_argument, NULL, 'W'}, {"blacklist", required_argument, NULL, 'B'}, {"write_single_snp", no_argument, NULL, 'I'}, {"write_random_snp", no_argument, NULL, 'j'}, {"ordered_export", no_argument, NULL, 'N'}, {"kernel_smoothed", no_argument, NULL, 'k'}, {"fstats", no_argument, NULL, '6'}, {"log_fst_comp", no_argument, NULL, 'l'}, {"bootstrap_type", required_argument, NULL, 'O'}, {"bootstrap_reps", required_argument, NULL, 'R'}, {"bootstrap_wl", required_argument, NULL, 'Q'}, {"bootstrap", no_argument, NULL, '1'}, {"bootstrap_fst", no_argument, NULL, '2'}, {"bootstrap_phist", no_argument, NULL, '3'}, {"bootstrap_div", no_argument, NULL, '4'}, {"bootstrap_pifis", no_argument, NULL, '5'}, {"min_populations", required_argument, NULL, 'p'}, {"min_maf", required_argument, NULL, 'a'}, {"max_obs_het", required_argument, NULL, 'q'}, {"lnl_lim", required_argument, NULL, 'c'}, {"merge_prune_lim", required_argument, NULL, 'i'}, {"fst_correction", required_argument, NULL, 'f'}, {"p_value_cutoff", required_argument, NULL, 'u'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "ACDEFGHJKLNSTUVYZ123456dghjklnsva:b:c:e:f:i:m:o:p:q:r:t:u:w:B:I:M:O:P:R:Q:W:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'd': verbose = true; break; case 't': num_threads = atoi(optarg); break; case 'P': in_path = optarg; break; case 'M': pmap_path = optarg; break; case 'D': merge_sites = true; break; case 'i': merge_prune_lim = is_double(optarg); break; case 'q': max_obs_het = is_double(optarg); break; case 'b': batch_id = is_integer(optarg); if (batch_id < 0) { cerr << "Batch ID (-b) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 'r': sample_limit = atof(optarg); break; case 'p': population_limit = atoi(optarg); break; case 'k': kernel_smoothed = true; calc_fstats = true; break; case '6': calc_fstats = true; break; case 'l': log_fst_comp = true; break; case '1': bootstrap = true; bootstrap_fst = true; bootstrap_phist = true; bootstrap_pifis = true; bootstrap_div = true; break; case '2': bootstrap_fst = true; break; case '3': bootstrap_phist = true; break; case '4': bootstrap_div = true; break; case '5': bootstrap_pifis = true; break; case 'O': if (strcasecmp(optarg, "exact") == 0) bootstrap_type = bs_exact; else if (strcasecmp(optarg, "approx") == 0) bootstrap_type = bs_approx; else { cerr << "Unknown bootstrap type specified '" << optarg << "'\n"; help(); } break; case 'R': bootstrap_reps = atoi(optarg); break; case 'Q': bs_wl_file = optarg; bootstrap_wl = true; break; case 'c': lnl_limit = is_double(optarg); filter_lnl = true; break; case 'I': write_single_snp = true; break; case 'j': write_random_snp = true; break; case 'N': ordered_export = true; break; case 's': sql_out = true; break; case 'V': vcf_out = true; break; case 'n': vcf_haplo_out = true; break; case 'F': fasta_out = true; break; case 'J': fasta_strict_out = true; break; case 'G': genepop_out = true; break; case 'S': structure_out = true; break; case 'A': fastphase_out = true; break; case 'C': phase_out = true; break; case 'E': beagle_out = true; break; case 'H': beagle_phased_out = true; break; case 'K': plink_out = true; break; case 'Z': hzar_out = true; break; case 'Y': phylip_out = true; break; case 'L': phylip_var = true; break; case 'T': phylip_var_all = true; break; case 'U': treemix_out = true; break; case 'g': genomic_out = true; break; case 'W': wl_file = optarg; break; case 'B': bl_file = optarg; break; case 'm': min_stack_depth = atoi(optarg); break; case 'a': minor_allele_freq = atof(optarg); break; case 'f': if (strcasecmp(optarg, "p_value") == 0) fst_correction = p_value; else if (strcasecmp(optarg, "bonferroni_win") == 0) fst_correction = bonferroni_win; else if (strcasecmp(optarg, "bonferroni_gen") == 0) fst_correction = bonferroni_gen; else { cerr << "Unknown Fst correction specified '" << optarg << "'\n"; help(); } break; case 'u': p_value_cutoff = atof(optarg); break; case 'e': enz = optarg; break; case 'w': sigma = atof(optarg); break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option: '" << (char) c << "'\n"; help(); abort(); } } if (in_path.length() == 0) { cerr << "You must specify a path to the directory containing Stacks output files.\n"; help(); } if (in_path.at(in_path.length() - 1) != '/') in_path += "/"; if (pmap_path.length() == 0) { cerr << "A population map was not specified, all samples will be read from '" << in_path << "' as a single popultaion.\n"; } if (batch_id < 0) { cerr << "You must specify a batch ID.\n"; help(); } if (enz.length() > 0 && renz.count(enz) == 0) { cerr << "Unrecognized restriction enzyme specified: '" << enz.c_str() << "'.\n"; help(); } if (merge_prune_lim != 1.0) { if (merge_prune_lim > 1.0) merge_prune_lim = merge_prune_lim / 100; if (merge_prune_lim < 0 || merge_prune_lim > 1.0) { cerr << "Unable to parse the merge sites pruning limit.\n"; help(); } } if (minor_allele_freq > 0) { if (minor_allele_freq > 1) minor_allele_freq = minor_allele_freq / 100; if (minor_allele_freq > 0.5) { cerr << "Unable to parse the minor allele frequency.\n"; help(); } } if (max_obs_het != 1.0) { if (max_obs_het > 1) max_obs_het = max_obs_het / 100; if (max_obs_het < 0 || max_obs_het > 1.0) { cerr << "Unable to parse the maximum observed heterozygosity.\n"; help(); } } if (sample_limit > 0) { if (sample_limit > 1) sample_limit = sample_limit / 100; if (sample_limit > 1.0) { cerr << "Unable to parse the sample limit frequency\n"; help(); } } if (write_single_snp && write_random_snp) { cerr << "Please specify either '--write_single_snp' or '--write_random_snp', not both.\n"; help(); } if (merge_sites == true && enz.length() == 0) { cerr << "You must specify the restriction enzyme associated with this data set to merge overlaping cutsites.\n"; help(); } return 0; } void version() { std::cerr << "populations " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "populations " << VERSION << "\n" << "populations -b batch_id -P path -M path [-r min] [-m min] [-B blacklist] [-W whitelist] [-s] [-e renz] [-t threads] [-v] [-h]" << "\n" << " b: Batch ID to examine when exporting from the catalog.\n" << " P: path to the Stacks output files.\n" << " M: path to the population map, a tab separated file describing which individuals belong in which population.\n" << " s: output a file to import results into an SQL database.\n" << " B: specify a file containing Blacklisted markers to be excluded from the export.\n" << " W: specify a file containing Whitelisted markers to include in the export.\n" << " e: restriction enzyme, required if generating 'genomic' output.\n" << " t: number of threads to run in parallel sections of code.\n" << " v: print program version." << "\n" << " h: display this help messsage." << "\n\n" << " Merging and Phasing:\n" << " --merge_sites: merge loci that were produced from the same restriction enzyme cutsite (requires reference-aligned data).\n" << " --merge_prune_lim: when merging adjacent loci, if at least X% samples posses both loci prune the remaining samples out of the analysis.\n" << " Data Filtering:\n" << " r: minimum percentage of individuals in a population required to process a locus for that population.\n" << " p: minimum number of populations a locus must be present in to process a locus.\n" << " m: specify a minimum stack depth required for individuals at a locus.\n" << " f: specify a correction to be applied to Fst values: 'p_value', 'bonferroni_win', or 'bonferroni_gen'.\n" << " --min_maf: specify a minimum minor allele frequency required to process a nucleotide site at a locus (0 < min_maf < 0.5).\n" << " --max_obs_het: specify a maximum observed heterozygosity required to process a nucleotide site at a locus.\n" << " --p_value_cutoff [num]: required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction.\n" << " --lnl_lim [num]: filter loci with log likelihood values below this threshold.\n" << " --write_single_snp: restrict data analysis to only the first SNP per locus.\n" << " --write_random_snp: restrict data analysis to one random SNP per locus.\n\n" << " Fstats:\n" << " --fstats: enable SNP and haplotype-based F statistics.\n\n" << " Kernel-smoothing algorithm:\n" << " k: enable kernel-smoothed Pi, Fis, Fst, Fst', and Phi_st calculations.\n" << " --window_size [num]: distance over which to average values (sigma, default 150,000bp; window is 3sigma in length).\n\n" << " Bootstrap Resampling:\n" << " --bootstrap: turn on boostrap resampling for all smoothed statistics.\n" << " --bootstrap_pifis: turn on boostrap resampling for smoothed SNP-based Pi and Fis calculations.\n" << " --bootstrap_fst: turn on boostrap resampling for smoothed Fst calculations based on pairwise population comparison of SNPs.\n" << " --bootstrap_div: turn on boostrap resampling for smoothed haplotype diveristy and gene diversity calculations based on haplotypes.\n" << " --bootstrap_phist: turn on boostrap resampling for smoothed Phi_st calculations based on haplotypes.\n" << " --bootstrap_reps [num]: number of bootstrap resamplings to calculate (default 100).\n" << " --bootstrap_wl [path]: only bootstrap loci contained in this whitelist.\n\n" << " File ouput options:\n" << " --ordered_export: if data is reference aligned, exports will be ordered; only a single representative of each overlapping site.\n" << " --genomic: output each nucleotide position (fixed or polymorphic) in all population members to a file.\n" << " --fasta: output full sequence for each unique haplotype, from each sample locus in FASTA format, regardless of plausibility.\n" << " --fasta_strict: output full sequence for each haplotype, from each sample locus in FASTA format, only for biologically plausible loci.\n" << " --vcf: output SNPs in Variant Call Format (VCF).\n" << " --vcf_haplotypes: output haplotypes in Variant Call Format (VCF).\n" << " --genepop: output results in GenePop format.\n" << " --structure: output results in Structure format.\n" << " --phase: output genotypes in PHASE format.\n" << " --fastphase: output genotypes in fastPHASE format.\n" << " --beagle: output genotypes in Beagle format.\n" << " --beagle_phased: output haplotypes in Beagle format.\n" << " --plink: output genotypes in PLINK format.\n" << " --hzar: output genotypes in Hybrid Zone Analysis using R (HZAR) format.\n" << " --phylip: output nucleotides that are fixed-within, and variant among populations in Phylip format for phylogenetic tree construction.\n" << " --phylip_var: include variable sites in the phylip output encoded using IUPAC notation.\n" << " --phylip_var_all: include all sequence as well as variable sites in the phylip output encoded using IUPAC notation.\n" << " --treemix: output SNPs in a format useable for the TreeMix program (Pickrell and Pritchard).\n\n" << " Debugging:\n" << " --verbose: turn on additional logging.\n" << " --log_fst_comp: log components of Fst/Phi_st calculations to a file.\n"; // << " --bootstrap_type [exact|approx]: enable bootstrap resampling for population statistics (reference genome required).\n" exit(0); } stacks-1.35/src/populations.h000644 000765 000024 00000023372 12571641525 017036 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2012-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __POPULATIONS_H__ #define __POPULATIONS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::setw; using std::setprecision; using std::fixed; #include using std::stringstream; #include using std::vector; #include using std::map; #include using std::set; #include "constants.h" #include "stacks.h" #include "locus.h" #include "renz.h" #include "PopMap.h" #include "PopSum.h" #include "utils.h" #include "log_utils.h" #include "catalog_utils.h" #include "sql_utilities.h" #include "genotype_dictionaries.h" #include "ordered.h" #include "smoothing.h" #include "bootstrap.h" enum corr_type {p_value, bonferroni_win, bonferroni_gen, no_correction}; enum bs_type {bs_exact, bs_approx, bs_none}; enum merget {merge_sink, merge_src}; enum phaset {merge_failure, simple_merge, complex_phase, nomapping_fail, multimapping_fail, multiple_fails}; const int max_snp_dist = 500; class GenPos { public: uint id; uint bp; uint snp_index; loc_type type; GenPos(int id, int snp_index, int bp) { this->id = id; this->snp_index = snp_index; this->bp = bp; this->type = snp; } GenPos(int id, int snp_index, int bp, loc_type type) { this->id = id; this->snp_index = snp_index; this->bp = bp; this->type = type; } }; void help( void ); void version( void ); int parse_command_line(int, char**); int build_file_list(vector > &, map > &, map > &); int load_marker_list(string, set &); int load_marker_column_list(string, map > &); int apply_locus_constraints(map &, PopMap *, map > &, ofstream &); int prune_polymorphic_sites(map &, PopMap *, PopSum *, map > &, map > &, set &, ofstream &); int log_haplotype_cnts(map &, ofstream &); bool order_unordered_loci(map &); int merge_shared_cutsite_loci(map &, PopMap *, PopSum *, map > &, ofstream &); phaset merge_and_phase_loci(PopMap *, CSLocus *, CSLocus *, set &, ofstream &); int merge_datums(int, int, Datum **, Datum **, set &, int); int merge_csloci(CSLocus *, CSLocus *, set &); int datum_adjust_snp_positions(map > &, CSLocus *, Datum *, map &); int tabulate_haplotypes(map &, PopMap *); int create_genotype_map(CSLocus *, PopMap *); int call_population_genotypes(CSLocus *, PopMap *); int tally_haplotype_freq(CSLocus *, PopMap *, int &, double &, string &); int translate_genotypes(map &, map > &, map &, PopMap *, map &, set &); int correct_fst_bonferroni_win(vector &); int bootstrap_fst_approximate_dist(vector &, vector &, double *, int *, map > &); int kernel_smoothed_popstats(map &, PopMap *, PopSum *, int, ofstream &); int bootstrap_popstats_approximate_dist(vector &, vector &, vector &, double *, int *, int, map > &, map > &); double bootstrap_approximate_pval(int, double, map > &); int calculate_summary_stats(vector > &, map > &, map &, PopMap *, PopSum *); int calculate_haplotype_stats(vector > &, map > &, map &, PopMap *, PopSum *); int kernel_smoothed_hapstats(vector &, PopSum *, int, double *); int calculate_haplotype_divergence(vector > &, map > &, map > &, map &, PopMap *, PopSum *); int calculate_haplotype_divergence_pairwise(vector > &, map > &, map > &, map &, PopMap *, PopSum *); double count_haplotypes_at_locus(int, int, Datum **, map &); bool fixed_locus(map > &, Datum **, vector &); bool uncalled_haplotype(const char *); int nuc_substitution_dist(map &, double **); int nuc_substitution_identity(map &, double **); int nuc_substitution_identity_max(map &, double **); HapStat *haplotype_amova(map &, map > &, Datum **, LocSum **, vector &); double amova_ssd_total(vector &, map &, double **); double amova_ssd_wp(vector &, map > &, map &, map > &, double **); double amova_ssd_ap_wg(vector &, map > &, map &, map > &, double **, double **); double amova_ssd_ag(vector &, map > &, map &, map > &, double **, double); double haplotype_d_est(map > &, Datum **, LocSum **, vector &); LocStat *haplotype_diversity(int, int, Datum **); int write_sql(map &, PopMap *); int write_fst_stats(vector > &, map > &, map &, PopMap *, PopSum *, ofstream &); int write_generic(map &, PopMap *, map &, bool); int write_genomic(map &, PopMap *); int write_fasta(map &, PopMap *, map &, vector &); int write_strict_fasta(map &, PopMap *, map &, vector &); int write_vcf(map &, PopMap *, PopSum *, map &, vector &, map > &); int write_vcf_ordered(map &, PopMap *, PopSum *, map &, vector &, map > &, ofstream &); int write_vcf_haplotypes(map &, PopMap *, PopSum *, map &, vector &); int populate_snp_calls(map &, PopMap *, map &, vector &, map > &); int find_datum_allele_depths(Datum *, int, char, char, int, int &, int &); int write_genepop(map &, PopMap *, PopSum *, map > &, map &); int write_genepop_ordered(map &, PopMap *, PopSum *, map > &, map &, ofstream &); int write_structure(map &, PopMap *, PopSum *, map > &, map &); int write_structure_ordered(map &, PopMap *, PopSum *, map > &, map &, ofstream &); int write_phase(map &, PopMap *, PopSum *, map > &, map &); int write_fastphase(map &, PopMap *, PopSum *, map > &, map &); int write_beagle(map &, PopMap *, PopSum *, map > &, map &); int write_beagle_phased(map &, PopMap *, PopSum *, map > &, map &); int write_plink(map &, PopMap *, PopSum *, map > &, map &); int write_hzar(map &, PopMap *, PopSum *, map > &, map &); int write_treemix(map &, PopMap *, PopSum *, map > &, map &); int write_phylip(map &, PopMap *, PopSum *, map > &, map &); int write_fullseq_phylip(map &, PopMap *, PopSum *, map > &, map &); int tally_observed_haplotypes(vector &, int, char &, char &); int tally_ref_alleles(LocSum **, int, int, char &, char &); int load_snp_calls(string, PopMap *); bool compare_pop_map(pair, pair); bool hap_compare(pair, pair); bool compare_genpos(GenPos, GenPos); #endif // __POPULATIONS_H__ stacks-1.35/src/process_radtags.cc000644 000765 000024 00000120043 12574066143 017773 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // process_radtags -- clean raw reads using a sliding window approach; // split reads by barcode, check RAD cutsite is intact, correct barcodes/cutsites // within one basepair, truncate reads on request. // #include "process_radtags.h" // // Global variables to hold command-line options. // FileT in_file_type = FileT::unknown; FileT out_file_type = FileT::unknown; string in_file; string in_file_p1; string in_file_p2; string in_path_1; string in_path_2; string out_path; string barcode_file; string renz_1; string renz_2; char *adapter_1; char *adapter_2; barcodet barcode_type = null_null; bool retain_header = false; bool filter_adapter = false; bool paired = false; bool clean = false; bool quality = false; bool recover = false; bool interleaved = false; bool merge = false; bool discards = false; bool overhang = false; bool filter_illumina = false; bool check_radtag = true; uint truncate_seq = 0; int barcode_dist_1 = 1; int barcode_dist_2 = -1; double win_size = 0.15; uint score_limit = 10; uint len_limit = 0; uint num_threads = 1; // // How to shift FASTQ-encoded quality scores from ASCII down to raw scores // score = encoded letter - 64; Illumina version 1.3 - 1.5 // score = encoded letter - 33; Sanger / Illumina version 1.6+ int qual_offset = 33; // // Handle variable-size barcodes. // uint min_bc_size_1 = 0; uint max_bc_size_1 = 0; uint min_bc_size_2 = 0; uint max_bc_size_2 = 0; // // Kmer data for adapter filtering. // int kmer_size = 5; int distance = 1; int adp_1_len = 0; int adp_2_len = 0; AdapterHash adp_1_kmers, adp_2_kmers; map renz; map renz_cnt; map renz_len; int main (int argc, char* argv[]) { initialize_renz(renz, renz_cnt, renz_len); parse_command_line(argc, argv); // // If input files are gzipped, output gziped files, unless the user chooses an output type. // if (out_file_type == FileT::unknown) { if (in_file_type == FileT::gzfastq || in_file_type == FileT::bam) out_file_type = FileT::gzfastq; else out_file_type = FileT::fastq; } if (paired) cerr << "Processing paired-end data.\n"; else cerr << "Processing single-end data.\n"; cerr << "Using Phred+" << qual_offset << " encoding for quality scores.\n"; if (truncate_seq > 0) cerr << "Reads will be truncated to " << truncate_seq << "bp\n"; if (filter_illumina) cerr << "Discarding reads marked as 'failed' by Illumina's chastity/purity filters.\n"; if (filter_adapter) { cerr << "Filtering reads for adapter sequence:\n"; if (adapter_1 != NULL) { cerr << " " << adapter_1 << "\n"; init_adapter_seq(kmer_size, adapter_1, adp_1_len, adp_1_kmers); } if (adapter_2 != NULL) { cerr << " " << adapter_2 << "\n"; init_adapter_seq(kmer_size, adapter_2, adp_2_len, adp_2_kmers); } cerr << " " << distance << " mismatches allowed to adapter sequence.\n"; } vector > files; vector barcodes; set se_bc, pe_bc; map pair_1_fhs, pair_2_fhs, rem_1_fhs, rem_2_fhs; map pair_1_gzfhs, pair_2_gzfhs, rem_1_gzfhs, rem_2_gzfhs; map > counters; map > barcode_log; build_file_list(files); load_barcodes(barcode_file, barcodes, se_bc, pe_bc, min_bc_size_1, max_bc_size_1, min_bc_size_2, max_bc_size_2); if (recover && barcode_type != null_null) { if (barcode_type == index_null || barcode_type == inline_null) cerr << "Will attempt to recover barcodes with at most " << barcode_dist_1 << " mismatches.\n"; else cerr << "Will attempt to recover barcodes with at most " << barcode_dist_1 << " / " << barcode_dist_2 << " mismatches.\n"; } if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) open_files(files, barcodes, pair_1_gzfhs, pair_2_gzfhs, rem_1_gzfhs, rem_2_gzfhs, counters); else open_files(files, barcodes, pair_1_fhs, pair_2_fhs, rem_1_fhs, rem_2_fhs, counters); int result = 1; for (uint i = 0; i < files.size(); i++) { cerr << "Processing file " << i+1 << " of " << files.size() << " [" << files[i].first.c_str() << "]\n"; counters[files[i].first]["total"] = 0; counters[files[i].first]["ill_filtered"] = 0; counters[files[i].first]["adapter"] = 0; counters[files[i].first]["low_quality"] = 0; counters[files[i].first]["noradtag"] = 0; counters[files[i].first]["ambiguous"] = 0; counters[files[i].first]["retained"] = 0; counters[files[i].first]["recovered"] = 0; if (paired) { if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) result = process_paired_reads(files[i].first, files[i].second, se_bc, pe_bc, pair_1_gzfhs, pair_2_gzfhs, rem_1_gzfhs, rem_2_gzfhs, counters[files[i].first], barcode_log); else result = process_paired_reads(files[i].first, files[i].second, se_bc, pe_bc, pair_1_fhs, pair_2_fhs, rem_1_fhs, rem_2_fhs, counters[files[i].first], barcode_log); } else { if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) result = process_reads(files[i].first, se_bc, pe_bc, pair_1_gzfhs, counters[files[i].first], barcode_log); else result = process_reads(files[i].first, se_bc, pe_bc, pair_1_fhs, counters[files[i].first], barcode_log); } cerr << " " << counters[files[i].first]["total"] << " total reads; "; if (filter_illumina) cerr << "-" << counters[files[i].first]["ill_filtered"] << " failed Illumina reads; "; cerr << "-" << counters[files[i].first]["ambiguous"] << " ambiguous barcodes; " << "-" << counters[files[i].first]["noradtag"] << " ambiguous RAD-Tags; " << "+" << counters[files[i].first]["recovered"] << " recovered; " << "-" << counters[files[i].first]["low_quality"] << " low quality reads; " << counters[files[i].first]["retained"] << " retained reads.\n"; if (filter_adapter) cerr << " " << counters[files[i].first]["adapter"] << " reads with adapter sequence.\n"; if (!result) { cerr << "Error processing reads.\n"; break; } } cerr << "Closing files, flushing buffers...\n"; if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) { close_file_handles(pair_1_gzfhs); if (paired) { close_file_handles(rem_1_gzfhs); close_file_handles(rem_2_gzfhs); close_file_handles(pair_2_gzfhs); } } else { close_file_handles(pair_1_fhs); if (paired) { close_file_handles(rem_1_fhs); close_file_handles(rem_2_fhs); close_file_handles(pair_2_fhs); } } print_results(argc, argv, barcodes, counters, barcode_log); return 0; } template int process_paired_reads(string prefix_1, string prefix_2, set &se_bc, set &pe_bc, map &pair_1_fhs, map &pair_2_fhs, map &rem_1_fhs, map &rem_2_fhs, map &counter, map > &barcode_log) { Input *fh_1, *fh_2; Read *r_1, *r_2; ofstream *discard_fh_1, *discard_fh_2; int return_val = 1; string path_1 = in_path_1 + prefix_1; string path_2 = in_path_2 + prefix_2; if (interleaved) cerr << " Reading data from:\n " << path_1 << "\n"; else cerr << " Reading data from:\n " << path_1 << " and\n " << path_2 << "\n"; if (in_file_type == FileT::fastq) { fh_1 = new Fastq(path_1.c_str()); fh_2 = interleaved ? fh_1 : new Fastq(path_2.c_str()); } else if (in_file_type == FileT::gzfastq) { fh_1 = new GzFastq(path_1.c_str()); fh_2 = interleaved ? fh_1 : new GzFastq(path_2.c_str()); } else if (in_file_type == FileT::bam) { fh_1 = new BamUnAln(path_1.c_str()); fh_2 = fh_1; } else if (in_file_type == FileT::bustard) { fh_1 = new Bustard(path_1.c_str()); fh_2 = interleaved ? fh_1 : new Bustard(path_2.c_str()); } // // Open a file for recording discarded reads // if (discards) { path_1 = out_path + prefix_1 + ".discards"; discard_fh_1 = new ofstream(path_1.c_str(), ifstream::out); if (discard_fh_1->fail()) { cerr << "Error opening discard output file '" << path_1 << "'\n"; exit(1); } path_2 = out_path + prefix_2 + ".discards"; discard_fh_2 = new ofstream(path_2.c_str(), ifstream::out); if (discard_fh_1->fail()) { cerr << "Error opening discard output file '" << path_2 << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); Seq *s_2 = fh_2->next_seq(); if (s_1 == NULL || s_2 == NULL) { cerr << "Attempting to read first pair of input records, unable to allocate " << "Seq object (Was the correct input type specified?).\n"; exit(1); } r_1 = new Read(strlen(s_1->seq), 1, min_bc_size_1, win_size); r_2 = new Read(strlen(s_2->seq), 2, min_bc_size_2, win_size); // // Set len_limit so that if we encounter reads already shorter than truncate_seq limit // they will be discarded. // if (truncate_seq > 0) len_limit = truncate_seq; BarcodePair bc; // // If no barcodes were specified, set the barcode object to be the input file names. // if (max_bc_size_1 == 0) bc.set(prefix_1, prefix_2); long i = 1; do { if (i % 10000 == 0) cerr << " Processing RAD-Tag " << i << " \r"; parse_input_record(s_1, r_1); parse_input_record(s_2, r_2); counter["total"] += 2; if (barcode_type != null_null && barcode_type != inline_null && barcode_type != index_null) bc.set(r_1->se_bc, r_2->pe_bc); else if (barcode_type != null_null) bc.set(r_1->se_bc); process_barcode(r_1, r_2, bc, pair_1_fhs, se_bc, pe_bc, barcode_log, counter); // // Adjust the size of the read to accommodate truncating the sequence and variable // barcode lengths. With standard Illumina data we want to output constant length // reads even as the barcode size may change. Other technologies, like IonTorrent // need to be truncated uniformly. // if (truncate_seq > 0) { if (truncate_seq + r_1->inline_bc_len <= r_1->len) r_1->set_len(truncate_seq + r_1->inline_bc_len); if (truncate_seq + r_2->inline_bc_len <= r_2->len) r_2->set_len(truncate_seq + r_2->inline_bc_len); } else { if (barcode_type == inline_null || barcode_type == inline_inline || barcode_type == inline_index) r_1->set_len(r_1->len - (max_bc_size_1 - r_1->inline_bc_len)); if (barcode_type == inline_index || barcode_type == index_index) r_2->set_len(r_2->len - (max_bc_size_2 - r_2->inline_bc_len)); } if (r_1->retain) process_singlet(r_1, renz_1, false, barcode_log[bc], counter); if (r_2->retain) process_singlet(r_2, renz_2, true, barcode_log[bc], counter); int result_1 = 1; int result_2 = 1; if (r_1->retain && r_2->retain) { if (retain_header) { result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], s_1, r_1) : write_fasta(pair_1_fhs[bc], s_1, r_1); result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_2_fhs[bc], s_2, r_2) : write_fasta(pair_2_fhs[bc], s_2, r_2); } else { result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], r_1, overhang) : write_fasta(pair_1_fhs[bc], r_1, overhang); result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_2_fhs[bc], r_2, overhang) : write_fasta(pair_2_fhs[bc], r_2, overhang); } } else if (r_1->retain && !r_2->retain) { // // Write to the remainder file. // if (retain_header) result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_1_fhs[bc], s_1, r_1) : write_fasta(rem_1_fhs[bc], s_1, r_1); else result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_1_fhs[bc], r_1, overhang) : write_fasta(rem_1_fhs[bc], r_1, overhang); } else if (!r_1->retain && r_2->retain) { // // Write to the remainder file. // if (retain_header) result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_2_fhs[bc], s_2, r_2) : write_fasta(rem_2_fhs[bc], s_2, r_2); else result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_2_fhs[bc], r_2, overhang) : write_fasta(rem_2_fhs[bc], r_2, overhang); } if (!result_1 || !result_2) { cerr << "Error writing to output file for '" << bc.str() << "'\n"; return_val = -1; break; } if (discards && !r_1->retain) result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(discard_fh_1, s_1) : write_fasta(discard_fh_1, s_1); if (discards && !r_2->retain) result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(discard_fh_2, s_2) : write_fasta(discard_fh_2, s_2); if (!result_1 || !result_2) { cerr << "Error writing to discard file for '" << bc.str() << "'\n"; return_val = -1; break; } delete s_1; delete s_2; i++; } while ((s_1 = fh_1->next_seq()) != NULL && (s_2 = fh_2->next_seq()) != NULL); if (discards) { delete discard_fh_1; delete discard_fh_2; } delete fh_1; if (interleaved == false) delete fh_2; delete r_1; delete r_2; return return_val; } template int process_reads(string prefix, set &se_bc, set &pe_bc, map &pair_1_fhs, map &counter, map > &barcode_log) { Input *fh; Read *r; ofstream *discard_fh; int return_val = 1; string path = in_path_1 + prefix; if (in_file_type == FileT::fastq) fh = new Fastq(path.c_str()); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(path.c_str()); else if (in_file_type == FileT::bam) fh = new BamUnAln(path.c_str()); else if (in_file_type == FileT::bustard) fh = new Bustard(path.c_str()); // // Open a file for recording discarded reads // if (discards) { path = out_path + prefix + ".discards"; discard_fh = new ofstream(path.c_str(), ifstream::out); if (discard_fh->fail()) { cerr << "Error opening discard output file '" << path << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s = fh->next_seq(); if (s == NULL) { cerr << "Attempting to read first input record, unable to allocate " << "Seq object (Was the correct input type specified?).\n"; exit(1); } // // Set len_limit so that if we encounter reads already shorter than truncate_seq limit // they will be discarded. // if (truncate_seq > 0) len_limit = truncate_seq; r = new Read(strlen(s->seq), 1, min_bc_size_1, win_size); BarcodePair bc; // // If no barcodes were specified, set the barcode object to be the input file name so // that reads are written to an output file of the same name as the input file. // if (max_bc_size_1 == 0) bc.set(prefix); //cerr << "Length: " << r->len << "; Window length: " << r->win_len << "; Stop position: " << r->stop_pos << "\n"; long i = 1; do { if (i % 10000 == 0) cerr << " Processing RAD-Tag " << i << " \r"; counter["total"]++; parse_input_record(s, r); if (barcode_type == inline_null || barcode_type == index_null) bc.set(r->se_bc); else if (barcode_type == index_inline || barcode_type == inline_index) bc.set(r->se_bc, r->pe_bc); process_barcode(r, NULL, bc, pair_1_fhs, se_bc, pe_bc, barcode_log, counter); // // Adjust the size of the read to accommodate truncating the sequence and variable // barcode lengths. With standard Illumina data we want to output constant length // reads even as the barcode size may change. Other technologies, like IonTorrent // need to be truncated uniformly. // if (truncate_seq > 0) { if (truncate_seq + r->inline_bc_len <= r->len) r->set_len(truncate_seq + r->inline_bc_len); } else { if (barcode_type == inline_null || barcode_type == inline_inline || barcode_type == inline_index) r->set_len(r->len - (max_bc_size_1 - r->inline_bc_len)); } if (r->retain) process_singlet(r, renz_1, false, barcode_log[bc], counter); int result = 1; if (r->retain) { if (retain_header) result = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], s, r) : write_fasta(pair_1_fhs[bc], s, r); else result = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], r, overhang) : write_fasta(pair_1_fhs[bc], r, overhang); } if (!result) { cerr << "Error writing to output file for '" << bc.str() << "'\n"; return_val = -1; break; } if (discards && !r->retain) result = out_file_type == FileT::fastq ? write_fastq(discard_fh, s) : write_fasta(discard_fh, s); if (!result) { cerr << "Error writing to discard file for '" << bc.str() << "'\n"; return_val = -1; break; } delete s; i++; } while ((s = fh->next_seq()) != NULL); if (discards) delete discard_fh; // // Close the file and delete the Input object. // delete fh; delete r; return return_val; } inline int process_singlet(Read *href, string res_enz, bool paired_end, map &bc_log, map &counter) { char *p; if (filter_illumina && href->filter) { counter["ill_filtered"]++; href->retain = 0; return 0; } // // If this read is already shorter than our length limit, discard it. // if (len_limit > 0 && (href->len - href->inline_bc_len) < len_limit) { counter["low_quality"]++; if (barcode_type != null_null) bc_log["low_qual"]++; href->retain = 0; return 0; } // // Is the RADTAG intact? // if (check_radtag && res_enz.length() > 0) { bool rad_cor = false; for (int i = 0; i < renz_cnt[res_enz]; i++) { p = href->seq + href->inline_bc_len; if (strncmp(p, renz[res_enz][i], renz_len[res_enz]) == 0) rad_cor = true; } if (rad_cor == false) { // // Try to correct the RAD-Tag. // if (!correct_radtag(href, res_enz, counter)) { if (barcode_type != null_null) bc_log["noradtag"]++; counter["noradtag"]++; href->retain = 0; return 0; } } } // // Drop this sequence if it has any uncalled nucleotides. // if (clean) { for (char *p = href->seq + href->inline_bc_len; *p != '\0'; p++) if (*p == '.' || *p == 'N') { counter["low_quality"]++; href->retain = 0; if (barcode_type != null_null) bc_log["low_qual"]++; return 0; } } // // Drop this sequence if it has low quality scores. // if (quality && check_quality_scores(href, qual_offset, score_limit, len_limit, href->inline_bc_len) <= 0) { counter["low_quality"]++; if (barcode_type != null_null) bc_log["low_qual"]++; href->retain = 0; return 0; } // // Drop this sequence if it contains adapter sequence. // if (filter_adapter) { int res = 1; if (paired_end == true && adp_2_len > 0) res = filter_adapter_seq(href, adapter_2, adp_2_len, adp_2_kmers, kmer_size, distance, len_limit); if (paired_end == false && adp_1_len > 0) res = filter_adapter_seq(href, adapter_1, adp_1_len, adp_1_kmers, kmer_size, distance, len_limit); if (res <= 0) { // cerr << "Sequence " << href->seq << " contains adapter.\n"; counter["adapter"]++; href->retain = 0; return 0; } } if (barcode_type != null_null) bc_log["retained"]++; counter["retained"]++; return 0; } int correct_radtag(Read *href, string res_enz, map &counter) { if (recover == false) return 0; // // If the RAD-Tag sequence is off by no more than a single nucleotide, correct it. // int d = 0; for (int i = 0; i < renz_cnt[res_enz]; i++) { d = dist(renz[res_enz][i], href->seq + href->inline_bc_len); if (d <= 1) { // // Correct the read. // strncpy(href->seq + href->inline_bc_len, renz[res_enz][i], renz_len[res_enz]); counter["recovered"]++; return 1; } } return 0; } int dist(const char *res_enz, char *seq) { const char *p; char *q; int dist = 0; for (p = res_enz, q = seq; *p != '\0'; p++, q++) if (*p != *q) dist++; return dist; } int print_results(int argc, char **argv, vector &barcodes, map > &counters, map > &barcode_log) { map >::iterator it; string log_path = out_path + "process_radtags.log"; ofstream log(log_path.c_str()); if (log.fail()) { cerr << "Unable to open log file '" << log_path << "'\n"; return 0; } cerr << "Outputing details to log: '" << log_path << "'\n\n"; init_log(log, argc, argv); log << "File\t" << "Retained Reads\t"; if (filter_illumina) log << "Illumina Filtered\t"; if (filter_adapter) log << "Adapter Seq" << "\t"; log << "Low Quality\t" << "Ambiguous Barcodes\t" << "Ambiguous RAD-Tag\t" << "Total\n"; for (it = counters.begin(); it != counters.end(); it++) { log << it->first << "\t" << it->second["retained"] << "\t"; if (filter_illumina) log << it->second["ill_filtered"] << "\t"; if (filter_adapter) log << it->second["adapter"] << "\t"; log << it->second["low_quality"] << "\t" << it->second["ambiguous"] << "\t" << it->second["noradtag"] << "\t" << it->second["total"] << "\n"; } map c; c["total"] = 0; c["low_quality"] = 0; c["adapter"] = 0; c["ill_filtered"] = 0; c["ambiguous"] = 0; c["noradtag"] = 0; // // Total up the individual counters // for (it = counters.begin(); it != counters.end(); it++) { c["total"] += it->second["total"]; c["ill_filtered"] += it->second["ill_filtered"]; c["low_quality"] += it->second["low_quality"]; c["adapter"] += it->second["adapter"]; c["ambiguous"] += it->second["ambiguous"]; c["noradtag"] += it->second["noradtag"]; c["retained"] += it->second["retained"]; } cerr << c["total"] << " total sequences;\n"; if (filter_illumina) cerr << " " << c["ill_filtered"] << " failed Illumina filtered reads;\n"; if (filter_adapter) cerr << " " << c["adapter"] << " reads contained adapter sequence;\n"; cerr << " " << c["ambiguous"] << " ambiguous barcode drops;\n" << " " << c["low_quality"] << " low quality read drops;\n" << " " << c["noradtag"] << " ambiguous RAD-Tag drops;\n" << c["retained"] << " retained reads.\n"; log << "\n" << "Total Sequences\t" << c["total"] << "\n"; if (filter_illumina) log << "Failed Illumina filtered reads\t" << c["ill_filtered"] << "\n"; if (filter_adapter) log << "Reads containing adapter sequence\t" << c["adapter"] << "\n"; log << "Ambiguous Barcodes\t" << c["ambiguous"] << "\n" << "Low Quality\t" << c["low_quality"] << "\n" << "Ambiguous RAD-Tag\t" << c["noradtag"] << "\n" << "Retained Reads\t" << c["retained"] << "\n"; if (max_bc_size_1 == 0) return 0; // // Where barcode filenames specified? // bool bc_names = false; for (uint i = 0; i < barcodes.size(); i++) if (barcodes[i].name_exists()) { bc_names = true; break; } // // Print out barcode information. // log << "\n" << "Barcode\t"; if (bc_names) log << "Filename\t"; log << "Total\t" << "No RadTag\t" << "Low Quality\t" << "Retained\n"; set barcode_list; for (uint i = 0; i < barcodes.size(); i++) { barcode_list.insert(barcodes[i]); log << barcodes[i] << "\t"; if (bc_names) log << barcodes[i].name << "\t"; if (barcode_log.count(barcodes[i]) == 0) log << "0\t" << "0\t" << "0\t" << "0\n"; else log << barcode_log[barcodes[i]]["total"] << "\t" << barcode_log[barcodes[i]]["noradtag"] << "\t" << barcode_log[barcodes[i]]["low_qual"] << "\t" << barcode_log[barcodes[i]]["retained"] << "\n"; } log << "\n" << "Sequences not recorded\n" << "Barcode\t" << "Total\n"; // // Sort unused barcodes by number of occurances. // map >::iterator bit; vector > bcs; for (bit = barcode_log.begin(); bit != barcode_log.end(); bit++) bcs.push_back(make_pair(bit->first, bit->second["total"])); sort(bcs.begin(), bcs.end(), compare_barcodes); for (uint i = 0; i < bcs.size(); i++) { if (barcode_list.count(bcs[i].first)) continue; if (bcs[i].second == 0) continue; log << bcs[i].first << "\t" << bcs[i].second << "\n"; } log.close(); return 0; } int compare_barcodes(pair a, pair b) { return a.second > b.second; } int parse_command_line(int argc, char* argv[]) { FileT ftype; int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"quality", no_argument, NULL, 'q'}, {"clean", no_argument, NULL, 'c'}, {"recover", no_argument, NULL, 'r'}, {"discards", no_argument, NULL, 'D'}, {"paired", no_argument, NULL, 'P'}, {"interleaved", no_argument, NULL, 'I'}, {"merge", no_argument, NULL, 'm'}, {"disable_rad_check", no_argument, NULL, 'R'}, {"filter_illumina", no_argument, NULL, 'F'}, {"retain_header", no_argument, NULL, 'H'}, {"null_index", no_argument, NULL, 'U'}, {"index_null", no_argument, NULL, 'u'}, {"inline_null", no_argument, NULL, 'V'}, {"index_index", no_argument, NULL, 'W'}, {"inline_inline", no_argument, NULL, 'x'}, {"index_inline", no_argument, NULL, 'Y'}, {"inline_index", no_argument, NULL, 'Z'}, {"barcode_dist_1", required_argument, NULL, 'B'}, {"barcode_dist_2", required_argument, NULL, 'C'}, {"infile_type", required_argument, NULL, 'i'}, {"outfile_type", required_argument, NULL, 'y'}, {"file", required_argument, NULL, 'f'}, {"file_p1", required_argument, NULL, '1'}, {"file_p2", required_argument, NULL, '2'}, {"path", required_argument, NULL, 'p'}, {"outpath", required_argument, NULL, 'o'}, {"truncate", required_argument, NULL, 't'}, {"renz_1", required_argument, NULL, 'e'}, {"renz_2", required_argument, NULL, 'z'}, {"barcodes", required_argument, NULL, 'b'}, {"window_size", required_argument, NULL, 'w'}, {"score_limit", required_argument, NULL, 's'}, {"encoding", required_argument, NULL, 'E'}, {"len_limit", required_argument, NULL, 'L'}, {"adapter_1", required_argument, NULL, 'A'}, {"adapter_2", required_argument, NULL, 'G'}, {"adapter_mm", required_argument, NULL, 'T'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "HuUVWxYZhvRFIcqrDPmB:C:i:y:f:o:t:e:z:b:1:2:p:s:w:E:L:A:G:T:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'i': if (strcasecmp(optarg, "bustard") == 0) in_file_type = FileT::bustard; else if (strcasecmp(optarg, "bam") == 0) in_file_type = FileT::bam; else if (strcasecmp(optarg, "gzfastq") == 0) in_file_type = FileT::gzfastq; else in_file_type = FileT::fastq; break; case 'y': if (strcasecmp(optarg, "fastq") == 0) out_file_type = FileT::fastq; else if (strcasecmp(optarg, "gzfastq") == 0) out_file_type = FileT::gzfastq; else if (strcasecmp(optarg, "fasta") == 0) out_file_type = FileT::fasta; else if (strcasecmp(optarg, "gzfasta") == 0) out_file_type = FileT::gzfasta; break; case 'E': if (strcasecmp(optarg, "phred64") == 0) qual_offset = 64; else if (strcasecmp(optarg, "phred33") == 0) qual_offset = 33; else { cerr << "Unknown quality score encoding, '" << optarg << "'\n"; help(); } break; case 'f': in_file = optarg; ftype = FileT::fastq; break; case 'p': in_path_1 = optarg; in_path_2 = in_path_1; ftype = FileT::fastq; break; case '1': paired = true; in_file_p1 = optarg; ftype = FileT::fastq; break; case '2': paired = true; in_file_p2 = optarg; ftype = FileT::fastq; break; case 'P': paired = true; break; case 'I': interleaved = true; break; case 'B': barcode_dist_1 = is_integer(optarg); break; case 'C': barcode_dist_2 = is_integer(optarg); break; case 'o': out_path = optarg; break; case 'q': quality = true; break; case 'c': clean = true; break; case 'r': recover = true; break; case 't': truncate_seq = is_integer(optarg); break; case 'e': renz_1 = optarg; break; case 'z': renz_2 = optarg; break; case 'b': barcode_file = optarg; if (barcode_type == null_null) barcode_type = inline_null; break; case 'm': merge = true; break; case 'D': discards = true; break; case 'R': check_radtag = false; break; case 'F': filter_illumina = true; break; case 'U': barcode_type = null_index; break; case 'u': barcode_type = index_null; break; case 'V': barcode_type = inline_null; break; case 'W': barcode_type = index_index; break; case 'x': barcode_type = inline_inline; break; case 'Y': barcode_type = index_inline; break; case 'Z': barcode_type = inline_index; break; case 'A': adapter_1 = new char[strlen(optarg) + 1]; strcpy(adapter_1, optarg); filter_adapter = true; break; case 'G': adapter_2 = new char[strlen(optarg) + 1]; strcpy(adapter_2, optarg); filter_adapter = true; break; case 'T': distance = is_integer(optarg); break; case 'H': retain_header = true; break; case 'L': len_limit = is_integer(optarg); break; case 'w': win_size = is_double(optarg); break; case 's': score_limit = is_integer(optarg); break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (in_file.length() == 0 && in_path_1.length() == 0 && in_file_p1.length() == 0) { cerr << "You must specify an input file of a directory path to a set of input files.\n"; help(); } if (in_file.length() > 0 && in_path_1.length() > 0) { cerr << "You must specify either a single input file (-f) or a directory path (-p), not both.\n"; help(); } if (in_file.length() > 0 && (in_file_p1.length() > 0 || in_file_p2.length() > 0)) { cerr << "You must specify either a single input file (-f) or a set of paired files (-1, -2), not both.\n"; help(); } if (in_path_1.length() > 0 && (in_file_p1.length() > 0 || in_file_p2.length() > 0)) { cerr << "You must specify either a file path (-p) or a set of paired files (-1, -2), not both.\n"; help(); } if (in_path_1.length() > 0 && in_path_1.at(in_path_1.length() - 1) != '/') in_path_1 += "/"; if (in_path_2.length() > 0 && in_path_2.at(in_path_2.length() - 1) != '/') in_path_2 += "/"; if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (in_file_type == FileT::unknown) in_file_type = ftype; if (in_file_type == FileT::bam && paired == true && interleaved == false) { cerr << "You may only specify a BAM input file for paired-end data if the read pairs are interleaved.\n"; help(); } if (in_file_type == FileT::bam && (barcode_type != inline_null && barcode_type != inline_inline && barcode_type != null_null)) { cerr << "For BAM input files only inline or unbarcoded data can be processed.\n"; help(); } if (barcode_file.length() == 0 && barcode_type != null_null) { cerr << "You specified a barcode type without providing a file containing barcodes.\n"; help(); } if (barcode_file.length() == 0) cerr << "No barcodes specified, files will not be demultiplexed.\n"; if (barcode_file.length() > 0 && merge) { cerr << "You may specify a set of barcodes, or that all files should be merged, not both.\n"; help(); } if (check_radtag && renz_1.length() == 0) { cerr << "You must specify the restriction enzyme used.\n"; help(); } if (check_radtag && renz.count(renz_1) == 0) { cerr << "Unrecognized restriction enzyme specified: '" << renz_1.c_str() << "'.\n"; help(); } if (check_radtag && renz_2.length() > 0 && renz.count(renz_2) == 0) { cerr << "Unrecognized restriction enzyme specified: '" << renz_2.c_str() << "'.\n"; help(); } if (score_limit > 40) { cerr << "Score limit must be between 0 and 40.\n"; help(); } if (win_size < 0 || win_size >= 1) { cerr << "Window size is a fraction between 0 and 1.\n"; help(); } if (recover && barcode_type != null_null) { if (barcode_type != index_null && barcode_type != inline_null && barcode_dist_2 < 0) barcode_dist_2 = barcode_dist_1; } return 0; } void version() { std::cerr << "process_radtags " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "process_radtags " << VERSION << "\n" << "process_radtags [-f in_file | -p in_dir [-P] [-I] | -1 pair_1 -2 pair_2] -b barcode_file -o out_dir -e enz [-c] [-q] [-r] [-t len] [-D] [-w size] [-s lim] [-h]\n" << " f: path to the input file if processing single-end sequences.\n" << " i: input file type, either 'bustard' for the Illumina BUSTARD format, 'bam', 'fastq' (default), or 'gzfastq' for gzipped FASTQ.\n" << " y: output type, either 'fastq', 'gzfastq', 'fasta', or 'gzfasta' (default is to match the input file type).\n" << " p: path to a directory of files.\n" << " P: files contained within directory specified by '-p' are paired.\n" << " I: specify that the paired-end reads are interleaved in single files.\n" << " 1: first input file in a set of paired-end sequences.\n" << " 2: second input file in a set of paired-end sequences.\n" << " o: path to output the processed files.\n" << " b: path to a file containing barcodes for this run.\n" << " c: clean data, remove any read with an uncalled base.\n" << " q: discard reads with low quality scores.\n" << " r: rescue barcodes and RAD-Tags.\n" << " t: truncate final read length to this value.\n" << " E: specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger, default) or 'phred64' (Illumina 1.3 - 1.5).\n" << " D: capture discarded reads to a file.\n" << " w: set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15).\n" << " s: set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10).\n" << " h: display this help messsage." << "\n\n" << " Barcode options:\n" << " --inline_null: barcode is inline with sequence, occurs only on single-end read (default).\n" << " --index_null: barcode is provded in FASTQ header (Illumina i5 or i7 read).\n" << " --null_index: barcode is provded in FASTQ header (Illumina i7 read if both i5 and i7 read are provided).\n" << " --inline_inline: barcode is inline with sequence, occurs on single and paired-end read.\n" << " --index_index: barcode is provded in FASTQ header (Illumina i5 and i7 reads).\n" << " --inline_index: barcode is inline with sequence on single-end read and occurs in FASTQ header (from either i5 or i7 read).\n" << " --index_inline: barcode occurs in FASTQ header (Illumina i5 or i7 read) and is inline with single-end sequence (for single-end data) on paired-end read (for paired-end data).\n\n" << " Restriction enzyme options:\n" << " -e , --renz_1 : provide the restriction enzyme used (cut site occurs on single-end read)\n" << " --renz_2 : if a double digest was used, provide the second restriction enzyme used (cut site occurs on the paired-end read).\n" << " Currently supported enzymes include:\n" << " "; map::iterator it; uint cnt = renz_cnt.size(); it = renz_cnt.begin(); for (uint i = 1; i <= cnt; i++) { std::cerr << "'" << it->first << "'"; if (i < cnt - 1) std::cerr << ", "; else if (i == cnt - 1) std::cerr << ", or "; if (i % 8 == 0) std::cerr << "\n "; it++; } std::cerr << "\n" << " Adapter options:\n" << " --adapter_1 : provide adaptor sequence that may occur on the single-end read for filtering.\n" << " --adapter_2 : provide adaptor sequence that may occur on the paired-read for filtering.\n" << " --adapter_mm : number of mismatches allowed in the adapter sequence.\n\n" << " Output options:\n" << " --retain_header: retain unmodified FASTQ headers in the output.\n" << " --merge: if no barcodes are specified, merge all input files into a single output file.\n\n" << " Advanced options:\n" << " --filter_illumina: discard reads that have been marked by Illumina's chastity/purity filter as failing.\n" << " --disable_rad_check: disable checking if the RAD site is intact.\n" << " --len_limit : specify a minimum sequence length (useful if your data has already been trimmed).\n" << " --barcode_dist_1: the number of allowed mismatches when rescuing single-end barcodes (default 1).\n" << " --barcode_dist_2: the number of allowed mismatches when rescuing paired-end barcodes (defaults to --barcode_dist_1).\n"; exit(0); } stacks-1.35/src/process_radtags.h000644 000765 000024 00000005445 12533677757 017663 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __PROCESS_RADTAGS_H__ #define __PROCESS_RADTAGS_H__ #include #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include #include using std::stringstream; using std::istream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::pair; #include "constants.h" #include "renz.h" #include "clean.h" #include "file_io.h" #include "write.h" #include "utils.h" #include "log_utils.h" #include "BustardI.h" // Reading input files in Tab-separated Bustard format #include "FastqI.h" // Reading input files in FASTQ format #include "gzFastq.h" // Reading gzipped input files in FASTQ format #include "BamUnalignedI.h" // Reading data from unaligned BAM files void help( void ); void version( void ); int parse_command_line(int, char **); template int process_reads(string, set &, set &, map &, map &, map > &); template int process_paired_reads(string, string, set &, set &, map &, map &, map &, map &, map &, map > &); int process_singlet(Read *, string, bool, map &, map &); int correct_radtag(Read *, string, map &); int check_quality_scores(Read *, bool); int dist(const char *, char *); int print_results(int, char **, vector &, map > &, map > &); int compare_barcodes(pair, pair); #endif // __PROCESS_RADTAGS_H__ stacks-1.35/src/process_shortreads.cc000644 000765 000024 00000112271 12574066143 020530 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // process_shortreads -- clean raw reads using a sliding window approach; // split reads by barcode if barcodes provided, correct barcodes // within one basepair, truncate reads on request. // #include "process_shortreads.h" // // Global variables to hold command-line options. // FileT in_file_type = FileT::unknown; FileT out_file_type = FileT::unknown; string in_file; string in_file_p1; string in_file_p2; string in_path_1; string in_path_2; string out_path; string barcode_file; char *adapter_1; char *adapter_2; barcodet barcode_type = null_null; bool retain_header = false; bool filter_adapter = false; bool paired = false; bool clean = false; bool quality = false; bool recover = false; bool interleaved = false; bool merge = false; bool discards = false; bool overhang = true; bool matepair = false; bool filter_illumina = false; bool trim_reads = true; uint truncate_seq = 0; int barcode_dist_1 = 1; int barcode_dist_2 = -1; double win_size = 0.15; int score_limit = 10; int len_limit = 31; int num_threads = 1; // // How to shift FASTQ-encoded quality scores from ASCII down to raw scores // score = encoded letter - 64; Illumina version 1.3 - 1.5 // score = encoded letter - 33; Sanger / Illumina version 1.6+ int qual_offset = 33; // // Handle variable-size barcodes. // uint min_bc_size_1 = 0; uint max_bc_size_1 = 0; uint min_bc_size_2 = 0; uint max_bc_size_2 = 0; // // Kmer data for adapter filtering. // int kmer_size = 5; int distance = 1; int adp_1_len = 0; int adp_2_len = 0; AdapterHash adp_1_kmers, adp_2_kmers; int main (int argc, char* argv[]) { parse_command_line(argc, argv); // // If input files are gzipped, output gziped files, unless the user chooses an output type. // if (out_file_type == FileT::unknown) { if (in_file_type == FileT::gzfastq || in_file_type == FileT::bam) out_file_type = FileT::gzfastq; else out_file_type = FileT::fastq; } cerr << "Using Phred+" << qual_offset << " encoding for quality scores.\n" << "Reads trimmed shorter than " << len_limit << " nucleotides will be discarded.\n"; if (truncate_seq > 0) cerr << "Reads will be truncated to " << truncate_seq << "bp\n"; if (filter_illumina) cerr << "Discarding reads marked as 'failed' by Illumina's chastity/purity filters.\n"; if (filter_adapter) { cerr << "Filtering reads for adapter sequence:\n"; if (adapter_1 != NULL) { cerr << " " << adapter_1 << "\n"; init_adapter_seq(kmer_size, adapter_1, adp_1_len, adp_1_kmers); } if (adapter_2 != NULL) { cerr << " " << adapter_2 << "\n"; init_adapter_seq(kmer_size, adapter_2, adp_2_len, adp_2_kmers); } cerr << " " << distance << " mismatches allowed to adapter sequence.\n"; } vector > files; vector barcodes; set se_bc, pe_bc; map pair_1_fhs, pair_2_fhs, rem_1_fhs, rem_2_fhs; map pair_1_gzfhs, pair_2_gzfhs, rem_1_gzfhs, rem_2_gzfhs; map > counters; map > barcode_log; build_file_list(files); load_barcodes(barcode_file, barcodes, se_bc, pe_bc, min_bc_size_1, max_bc_size_1, min_bc_size_2, max_bc_size_2); if (recover && barcode_type != null_null) { if (barcode_type == index_null || barcode_type == inline_null) cerr << "Will attempt to recover barcodes with at most " << barcode_dist_1 << " mismatches.\n"; else cerr << "Will attempt to recover barcodes with at most " << barcode_dist_1 << " / " << barcode_dist_2 << " mismatches.\n"; } if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) open_files(files, barcodes, pair_1_gzfhs, pair_2_gzfhs, rem_1_gzfhs, rem_2_gzfhs, counters); else open_files(files, barcodes, pair_1_fhs, pair_2_fhs, rem_1_fhs, rem_2_fhs, counters); int result = 1; for (uint i = 0; i < files.size(); i++) { cerr << "Processing file " << i+1 << " of " << files.size() << " [" << files[i].first.c_str() << "]\n"; counters[files[i].first]["total"] = 0; counters[files[i].first]["ill_filtered"] = 0; counters[files[i].first]["low_quality"] = 0; counters[files[i].first]["trimmed"] = 0; counters[files[i].first]["adapter"] = 0; counters[files[i].first]["ambiguous"] = 0; counters[files[i].first]["retained"] = 0; counters[files[i].first]["orphaned"] = 0; counters[files[i].first]["recovered"] = 0; if (paired) { if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) result = process_paired_reads(files[i].first, files[i].second, se_bc, pe_bc, pair_1_gzfhs, pair_2_gzfhs, rem_1_gzfhs, rem_2_gzfhs, counters[files[i].first], barcode_log); else result = process_paired_reads(files[i].first, files[i].second, se_bc, pe_bc, pair_1_fhs, pair_2_fhs, rem_1_fhs, rem_2_fhs, counters[files[i].first], barcode_log); } else { if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) result = process_reads(files[i].first, se_bc, pe_bc, pair_1_gzfhs, counters[files[i].first], barcode_log); else result = process_reads(files[i].first, se_bc, pe_bc, pair_1_fhs, counters[files[i].first], barcode_log); } cerr << " " << counters[files[i].first]["total"] << " total reads; "; if (filter_illumina) cerr << "-" << counters[files[i].first]["ill_filtered"] << " failed Illumina reads; "; cerr << "-" << counters[files[i].first]["ambiguous"] << " ambiguous barcodes; " << "+" << counters[files[i].first]["recovered"] << " recovered; " << "-" << counters[files[i].first]["low_quality"] << " low quality reads; " << counters[files[i].first]["retained"] << " retained reads.\n" << " "; if (filter_adapter) cerr << counters[files[i].first]["adapter"] << " reads with adapter sequence; "; cerr << counters[files[i].first]["trimmed"] << " trimmed reads; " << counters[files[i].first]["orphaned"] << " orphaned paired-ends.\n"; if (!result) { cerr << "Error processing reads.\n"; break; } } cerr << "Closing files, flushing buffers...\n"; if (out_file_type == FileT::gzfastq || out_file_type == FileT::gzfasta) { close_file_handles(pair_1_gzfhs); if (paired) { close_file_handles(rem_1_gzfhs); close_file_handles(rem_2_gzfhs); close_file_handles(pair_2_gzfhs); } } else { close_file_handles(pair_1_fhs); if (paired) { close_file_handles(rem_1_fhs); close_file_handles(rem_2_fhs); close_file_handles(pair_2_fhs); } } print_results(argc, argv, barcodes, counters, barcode_log); return 0; } template int process_paired_reads(string prefix_1, string prefix_2, set &se_bc, set &pe_bc, map &pair_1_fhs, map &pair_2_fhs, map &rem_1_fhs, map &rem_2_fhs, map &counter, map > &barcode_log) { Input *fh_1, *fh_2; Read *r_1, *r_2; ofstream *discard_fh_1, *discard_fh_2; int return_val = 1; string path_1 = in_path_1 + prefix_1; string path_2 = in_path_2 + prefix_2; if (interleaved) cerr << " Reading data from:\n " << path_1 << "\n"; else cerr << " Reading data from:\n " << path_1 << " and\n " << path_2 << "\n"; if (in_file_type == FileT::fastq) { fh_1 = new Fastq(path_1.c_str()); fh_2 = interleaved ? fh_1 : new Fastq(path_2.c_str()); } else if (in_file_type == FileT::gzfastq) { fh_1 = new GzFastq(path_1.c_str()); fh_2 = interleaved ? fh_1 : new GzFastq(path_2.c_str()); } else if (in_file_type == FileT::bam) { fh_1 = new BamUnAln(path_1.c_str()); fh_2 = fh_1; } else if (in_file_type == FileT::bustard) { fh_1 = new Bustard(path_1.c_str()); fh_2 = interleaved ? fh_1 : new Bustard(path_2.c_str()); } // // Open a file for recording discarded reads // if (discards) { path_1 = out_path + prefix_1 + ".discards"; discard_fh_1 = new ofstream(path_1.c_str(), ifstream::out); if (discard_fh_1->fail()) { cerr << "Error opening discard output file '" << path_1 << "'\n"; exit(1); } path_2 = out_path + prefix_2 + ".discards"; discard_fh_2 = new ofstream(path_2.c_str(), ifstream::out); if (discard_fh_1->fail()) { cerr << "Error opening discard output file '" << path_2 << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s_1 = fh_1->next_seq(); Seq *s_2 = fh_2->next_seq(); if (s_1 == NULL || s_2 == NULL) { cerr << "Attempting to read first pair of input records, unable to allocate " << "Seq object (Was the correct input type specified?).\n"; exit(1); } r_1 = new Read(strlen(s_1->seq), 1, min_bc_size_1, win_size); r_2 = new Read(strlen(s_2->seq), 2, min_bc_size_2, win_size); BarcodePair bc; // // If no barcodes were specified, set the barcode object to be the input file names. // if (max_bc_size_1 == 0) bc.set(prefix_1, prefix_2); long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read " << i << " \r"; parse_input_record(s_1, r_1); parse_input_record(s_2, r_2); counter["total"] += 2; if (barcode_type != null_null && barcode_type != inline_null && barcode_type != index_null) bc.set(r_1->se_bc, r_2->pe_bc); else if (barcode_type != null_null) bc.set(r_1->se_bc); process_barcode(r_1, r_2, bc, pair_1_fhs, se_bc, pe_bc, barcode_log, counter); // // Adjust the size of the read to accommodate truncating the sequence and variable // barcode lengths. With standard Illumina data we want to output constant length // reads even as the barcode size may change. Other technologies, like IonTorrent // need to be truncated uniformly. // if (truncate_seq > 0) { if (truncate_seq + r_1->inline_bc_len <= r_1->len) r_1->set_len(truncate_seq + r_1->inline_bc_len); if (truncate_seq + r_2->inline_bc_len <= r_2->len) r_2->set_len(truncate_seq + r_2->inline_bc_len); } else { if (barcode_type == inline_null || barcode_type == inline_inline || barcode_type == inline_index) r_1->set_len(r_1->len - (max_bc_size_1 - r_1->inline_bc_len)); if (barcode_type == inline_index || barcode_type == index_index) r_2->set_len(r_2->len - (max_bc_size_2 - r_2->inline_bc_len)); } if (r_1->retain) process_singlet(r_1, false, barcode_log[bc], counter); if (r_2->retain) process_singlet(r_2, true, barcode_log[bc], counter); if (matepair) { rev_complement(r_1->seq, r_1->inline_bc_len, overhang); reverse_qual(r_1->phred, r_1->inline_bc_len, overhang); } int result_1 = 1; int result_2 = 1; if (r_1->retain && r_2->retain) { if (retain_header) { result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], s_1, r_1) : write_fasta(pair_1_fhs[bc], s_1, r_1); result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_2_fhs[bc], s_2, r_2) : write_fasta(pair_2_fhs[bc], s_2, r_2); } else { result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], r_1, overhang) : write_fasta(pair_1_fhs[bc], r_1, overhang); result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_2_fhs[bc], r_2, overhang) : write_fasta(pair_2_fhs[bc], r_2, overhang); } } else if (r_1->retain && !r_2->retain) { // // Write to a remainder file. // if (retain_header) result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_1_fhs[bc], s_1, r_1) : write_fasta(rem_1_fhs[bc], s_1, r_1); else result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_1_fhs[bc], r_1, overhang) : write_fasta(rem_1_fhs[bc], r_1, overhang); } else if (!r_1->retain && r_2->retain) { // Write to a remainder file. if (retain_header) result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_2_fhs[bc], s_2, r_2) : write_fasta(rem_2_fhs[bc], s_2, r_2); else result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(rem_2_fhs[bc], r_2, overhang) : write_fasta(rem_2_fhs[bc], r_2, overhang); } if (!result_1 || !result_2) { cerr << "Error writing to output file for '" << bc.str() << "'\n"; return_val = -1; break; } if (discards && !r_1->retain) result_1 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(discard_fh_1, s_1) : write_fasta(discard_fh_1, s_1); if (discards && !r_2->retain) result_2 = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(discard_fh_2, s_2) : write_fasta(discard_fh_2, s_2); delete s_1; delete s_2; if (!result_1 || !result_2) { cerr << "Error writing to discard file for '" << bc.str() << "'\n"; return_val = -1; break; } i++; } while ((s_1 = fh_1->next_seq()) != NULL && (s_2 = fh_2->next_seq()) != NULL); if (discards) { delete discard_fh_1; delete discard_fh_2; } delete fh_1; if (interleaved == false) delete fh_2; return return_val; } template int process_reads(string prefix, set &se_bc, set &pe_bc, map &pair_1_fhs, map &counter, map > &barcode_log) { Input *fh; Read *r; ofstream *discard_fh; int return_val = 1; string path = in_path_1 + prefix; if (in_file_type == FileT::fastq) fh = new Fastq(path.c_str()); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(path.c_str()); else if (in_file_type == FileT::bam) fh = new BamUnAln(path.c_str()); else if (in_file_type == FileT::bustard) fh = new Bustard(path.c_str()); // // Open a file for recording discarded reads // if (discards) { path = path + ".discards"; discard_fh = new ofstream(path.c_str(), ifstream::out); if (discard_fh->fail()) { cerr << "Error opening discard output file '" << path << "'\n"; exit(1); } } // // Read in the first record, initializing the Seq object s. Then // initialize the Read object r, then loop, using the same objects. // Seq *s = fh->next_seq(); if (s == NULL) { cerr << "Attempting to read first input record, unable to allocate " << "Seq object (Was the correct input type specified?).\n"; exit(1); } r = new Read(strlen(s->seq), 1, min_bc_size_1, win_size); BarcodePair bc; // // If no barcodes were specified, set the barcode object to be the input file name so // that reads are written to an output file of the same name as the input file. // if (max_bc_size_1 == 0) bc.set(prefix); //cerr << "Length: " << r->len << "; Window length: " << r->win_len << "; Stop position: " << r->stop_pos << "\n"; long i = 1; do { if (i % 10000 == 0) cerr << " Processing short read " << i << " \r"; counter["total"]++; parse_input_record(s, r); if (barcode_type == inline_null || barcode_type == index_null) bc.set(r->se_bc); else if (barcode_type == index_inline || barcode_type == inline_index) bc.set(r->se_bc, r->pe_bc); process_barcode(r, NULL, bc, pair_1_fhs, se_bc, pe_bc, barcode_log, counter); // // Adjust the size of the read to accommodate truncating the sequence and variable // barcode lengths. With standard Illumina data we want to output constant length // reads even as the barcode size may change. Other technologies, like IonTorrent // need to be truncated uniformly. // if (truncate_seq > 0) { if (truncate_seq + r->inline_bc_len <= r->len) r->set_len(truncate_seq + r->inline_bc_len); } else { if (barcode_type == inline_null || barcode_type == inline_inline || barcode_type == inline_index) r->set_len(r->len - (max_bc_size_1 - r->inline_bc_len)); } if (r->retain) process_singlet(r, false, barcode_log[bc], counter); int result = 1; if (r->retain) { if (retain_header) result = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], s, r) : write_fasta(pair_1_fhs[bc], s, r); else result = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(pair_1_fhs[bc], r, overhang) : write_fasta(pair_1_fhs[bc], r, overhang); } if (!result) { cerr << "Error writing to output file for '" << bc.str() << "'\n"; return_val = -1; break; } if (discards && !r->retain) result = (out_file_type == FileT::fastq || out_file_type == FileT::gzfastq) ? write_fastq(discard_fh, s) : write_fasta(discard_fh, s); if (!result) { cerr << "Error writing to discard file for '" << bc.str() << "'\n"; return_val = -1; break; } delete s; i++; } while ((s = fh->next_seq()) != NULL); if (discards) delete discard_fh; // // Close the file and delete the Input object. // delete fh; return return_val; } inline int process_singlet(Read *href, bool paired_end, map &bc_log, map &counter) { if (filter_illumina && href->filter) { counter["ill_filtered"]++; href->retain = 0; return 0; } // // Drop this sequence if it has any uncalled nucleotides // if (clean) { for (char *p = href->seq + href->inline_bc_len; *p != '\0'; p++) if (*p == '.' || *p == 'N') { counter["low_quality"]++; href->retain = 0; return 0; } } bool adapter_trim = false; bool quality_trim = false; // // Drop or trim this sequence if it has low quality scores // if (quality) { int res = check_quality_scores(href, qual_offset, score_limit, len_limit, href->inline_bc_len); if (trim_reads) { if (res == 0) { counter["low_quality"]++; href->retain = 0; return 0; } else if (res < 0) { quality_trim = true; } } else { if (res <= 0) { counter["low_quality"]++; href->retain = 0; return 0; } } } // // Drop or trim this sequence if it contains adapter sequence. // if (filter_adapter) { int res = 1; if (paired_end == true && adp_2_len > 0) res = filter_adapter_seq(href, adapter_2, adp_2_len, adp_2_kmers, kmer_size, distance, len_limit); if (paired_end == false && adp_1_len > 0) res = filter_adapter_seq(href, adapter_1, adp_1_len, adp_1_kmers, kmer_size, distance, len_limit); if (res == 0) { counter["adapter"]++; href->retain = 0; return 0; } else if (res < 0) { counter["adapter"]++; adapter_trim = true; } } if (adapter_trim || quality_trim) counter["trimmed"]++; if (barcode_type != null_null) bc_log["retained"]++; counter["retained"]++; return 0; } int dist(const char *res_enz, char *seq) { const char *p; char *q; int dist = 0; for (p = res_enz, q = seq; *p != '\0'; p++, q++) if (*p != *q) dist++; return dist; } int print_results(int argc, char **argv, vector &barcodes, map > &counters, map > &barcode_log) { map >::iterator it; string log_path = out_path + "process_shortreads.log"; ofstream log(log_path.c_str()); if (log.fail()) { cerr << "Unable to open log file '" << log_path << "'\n"; return 0; } cerr << "Outputing details to log: '" << log_path << "'\n\n"; init_log(log, argc, argv); log << "File\t" << "Retained Reads\t"; if (filter_illumina) log << "Illumina Filtered\t"; if (filter_adapter) log << "Adapter Seq" << "\t"; log << "Low Quality\t" << "Ambiguous Barcodes\t" << "Trimmed Reads\t" << "Orphaned paired-end reads\t" << "Total\n"; for (it = counters.begin(); it != counters.end(); it++) { log << it->first << "\t" << it->second["retained"] << "\t"; if (filter_illumina) log << it->second["ill_filtered"] << "\t"; if (filter_adapter) log << it->second["adapter"] << "\t"; log << it->second["low_quality"] << "\t" << it->second["ambiguous"] << "\t" << it->second["trimmed"] << "\t" << it->second["orphaned"] << "\t" << it->second["total"] << "\n"; } map c; c["total"] = 0; c["low_quality"] = 0; c["adapter"] = 0; c["ill_filtered"] = 0; c["ambiguous"] = 0; c["trimmed"] = 0; c["orphaned"] = 0; // // Total up the individual counters // for (it = counters.begin(); it != counters.end(); it++) { c["total"] += it->second["total"]; c["ill_filtered"] += it->second["ill_filtered"]; c["adapter"] += it->second["adapter"]; c["low_quality"] += it->second["low_quality"]; c["ambiguous"] += it->second["ambiguous"]; c["trimmed"] += it->second["trimmed"]; c["orphaned"] += it->second["orphaned"]; c["retained"] += it->second["retained"]; } cerr << c["total"] << " total sequences;\n"; if (filter_illumina) cerr << " " << c["ill_filtered"] << " failed Illumina filtered reads;\n"; if (filter_adapter) cerr << " " << c["adapter"] << " reads contained adapter sequence;\n"; cerr << " " << c["ambiguous"] << " ambiguous barcode drops;\n" << " " << c["low_quality"] << " low quality read drops;\n" << " " << c["trimmed"] << " trimmed reads;\n" << " " << c["orphaned"] << " orphaned paired-end reads;\n" << c["retained"] << " retained reads.\n"; log << "\n" << "Total Sequences\t" << c["total"] << "\n"; if (filter_illumina) log << "Failed Illumina filtered reads\t" << c["ill_filtered"] << "\n"; if (filter_adapter) log << "Reads containing adapter sequence\t" << c["adapter"] << "\n"; log << "Ambiguous Barcodes\t" << c["ambiguous"] << "\n" << "Low Quality\t" << c["low_quality"] << "\n" << "Trimmed Reads\t" << c["trimmed"] << "\n" << "Orphaned Paired-ends\t" << c["orphaned"] << "\n" << "Retained Reads\t" << c["retained"] << "\n"; if (max_bc_size_1 == 0) return 0; // // Where barcode filenames specified? // bool bc_names = false; for (uint i = 0; i < barcodes.size(); i++) if (barcodes[i].name_exists()) { bc_names = true; break; } // // Print out barcode information. // log << "\n" << "Barcode\t"; if (bc_names) log << "Filename\t"; log << "Total\t" << "Retained\n"; set barcode_list; for (uint i = 0; i < barcodes.size(); i++) { barcode_list.insert(barcodes[i]); log << barcodes[i] << "\t"; if (bc_names) log << barcodes[i].name << "\t"; if (barcode_log.count(barcodes[i]) == 0) log << "0\t" << "0\t" << "0\n"; else log << barcode_log[barcodes[i]]["total"] << "\t" << barcode_log[barcodes[i]]["retained"] << "\n"; } log << "\n" << "Sequences not recorded\n" << "Barcode\t" << "Total\n"; // // Sort unused barcodes by number of occurances. // map >::iterator bit; vector > bcs; for (bit = barcode_log.begin(); bit != barcode_log.end(); bit++) bcs.push_back(make_pair(bit->first, bit->second["total"])); sort(bcs.begin(), bcs.end(), compare_barcodes); for (uint i = 0; i < bcs.size(); i++) { if (barcode_list.count(bcs[i].first)) continue; if (bcs[i].second == 0) continue; log << bcs[i].first << "\t" << bcs[i].second << "\n"; } log.close(); return 0; } int compare_barcodes(pair a, pair b) { return a.second > b.second; } int parse_command_line(int argc, char* argv[]) { FileT ftype; int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"quality", no_argument, NULL, 'q'}, {"clean", no_argument, NULL, 'c'}, {"recover", no_argument, NULL, 'r'}, {"discards", no_argument, NULL, 'D'}, {"paired", no_argument, NULL, 'P'}, {"interleaved", no_argument, NULL, 'I'}, {"merge", no_argument, NULL, 'm'}, {"mate-pair", no_argument, NULL, 'M'}, {"no_overhang", no_argument, NULL, 'O'}, {"filter_illumina", no_argument, NULL, 'F'}, {"retain_header", no_argument, NULL, 'H'}, {"no_read_trimming", no_argument, NULL, 'N'}, {"null_index", no_argument, NULL, 'U'}, {"index_null", no_argument, NULL, 'u'}, {"inline_null", no_argument, NULL, 'V'}, {"index_index", no_argument, NULL, 'W'}, {"inline_inline", no_argument, NULL, 'x'}, {"index_inline", no_argument, NULL, 'Y'}, {"inline_index", no_argument, NULL, 'Z'}, {"barcode_dist_1", required_argument, NULL, 'B'}, {"barcode_dist_2", required_argument, NULL, 'C'}, {"infile_type", required_argument, NULL, 'i'}, {"outfile_type", required_argument, NULL, 'y'}, {"file", required_argument, NULL, 'f'}, {"file_p1", required_argument, NULL, '1'}, {"file_p2", required_argument, NULL, '2'}, {"path", required_argument, NULL, 'p'}, {"outpath", required_argument, NULL, 'o'}, {"truncate", required_argument, NULL, 't'}, {"barcodes", required_argument, NULL, 'b'}, {"window_size", required_argument, NULL, 'w'}, {"score_limit", required_argument, NULL, 's'}, {"encoding", required_argument, NULL, 'E'}, {"len_limit", required_argument, NULL, 'L'}, {"adapter_1", required_argument, NULL, 'A'}, {"adapter_2", required_argument, NULL, 'G'}, {"adapter_mm", required_argument, NULL, 'T'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hHvcqrINFuVWxYZOPmDi:y:f:o:t:B:C:b:1:2:p:s:w:E:L:A:G:T:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'i': if (strcasecmp(optarg, "bustard") == 0) in_file_type = FileT::bustard; else if (strcasecmp(optarg, "bam") == 0) in_file_type = FileT::bam; else if (strcasecmp(optarg, "gzfastq") == 0) in_file_type = FileT::gzfastq; else in_file_type = FileT::fastq; break; case 'y': if (strcasecmp(optarg, "fastq") == 0) out_file_type = FileT::fastq; else if (strcasecmp(optarg, "gzfastq") == 0) out_file_type = FileT::gzfastq; else if (strcasecmp(optarg, "fasta") == 0) out_file_type = FileT::fasta; else if (strcasecmp(optarg, "gzfasta") == 0) out_file_type = FileT::gzfasta; break; case 'E': if (strcasecmp(optarg, "phred64") == 0) qual_offset = 64; else if (strcasecmp(optarg, "phred33") == 0) qual_offset = 33; break; case 'f': in_file = optarg; ftype = FileT::fastq; break; case 'p': in_path_1 = optarg; in_path_2 = in_path_1; ftype = FileT::fastq; break; case '1': paired = true; in_file_p1 = optarg; ftype = FileT::fastq; break; case '2': paired = true; in_file_p2 = optarg; ftype = FileT::fastq; break; case 'P': paired = true; break; case 'I': interleaved = true; break; case 'B': barcode_dist_1 = is_integer(optarg); break; case 'C': barcode_dist_2 = is_integer(optarg); break; case 'o': out_path = optarg; break; case 'm': merge = true; break; case 'M': matepair = true; break; case 'D': discards = true; break; case 'q': quality = true; break; case 'c': clean = true; break; case 'r': recover = true; break; case 'O': overhang = false; break; case 'F': filter_illumina = true; break; case 'H': retain_header = true; break; case 'N': trim_reads = false; break; case 't': truncate_seq = is_integer(optarg); break; case 'b': barcode_file = optarg; if (barcode_type == null_null) barcode_type = inline_null; break; case 'U': barcode_type = null_index; break; case 'u': barcode_type = index_null; break; case 'V': barcode_type = inline_null; break; case 'W': barcode_type = index_index; break; case 'x': barcode_type = inline_inline; break; case 'Y': barcode_type = index_inline; break; case 'Z': barcode_type = inline_index; break; case 'A': adapter_1 = new char[strlen(optarg) + 1]; strcpy(adapter_1, optarg); filter_adapter = true; break; case 'G': adapter_2 = new char[strlen(optarg) + 1]; strcpy(adapter_2, optarg); filter_adapter = true; break; case 'T': distance = is_integer(optarg); break; case 'L': len_limit = is_integer(optarg); break; case 'w': win_size = is_double(optarg); break; case 's': score_limit = is_integer(optarg); break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (in_file.length() == 0 && in_path_1.length() == 0 && in_file_p1.length() == 0) { cerr << "You must specify an input file of a directory path to a set of input files.\n"; help(); } if (in_file.length() > 0 && in_path_1.length() > 0) { cerr << "You must specify either a single input file (-f) or a directory path (-p), not both.\n"; help(); } if (in_file.length() > 0 && (in_file_p1.length() > 0 || in_file_p2.length() > 0)) { cerr << "You must specify either a single input file (-f) or a set of paired files (-1, -2), not both.\n"; help(); } if (in_path_1.length() > 0 && (in_file_p1.length() > 0 || in_file_p2.length() > 0)) { cerr << "You must specify either a file path (-p) or a set of paired files (-1, -2), not both.\n"; help(); } if (in_path_1.length() > 0 && in_path_1.at(in_path_1.length() - 1) != '/') in_path_1 += "/"; if (in_path_2.length() > 0 && in_path_2.at(in_path_2.length() - 1) != '/') in_path_2 += "/"; if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (barcode_file.length() == 0) { overhang = false; cerr << "No barcodes specified, files will not be demultiplexed.\n"; } if (barcode_file.length() > 0 && merge) { cerr << "You may specify a set of barcodes, or that all files should be merged, not both.\n"; help(); } if (in_file_type == FileT::unknown) in_file_type = ftype; if (in_file_type == FileT::bam && paired == true && interleaved == false) { cerr << "You may only specify a BAM input file for paired-end data if the read pairs are interleaved.\n"; help(); } if (in_file_type == FileT::bam && (barcode_type != inline_null && barcode_type != inline_inline && barcode_type != null_null)) { cerr << "For BAM input files only inline or unbarcoded data can be processed.\n"; help(); } if (score_limit < 0 || score_limit > 40) { cerr << "Score limit must be between 0 and 40.\n"; help(); } if (win_size < 0 || win_size >= 1) { cerr << "Window size is a fraction between 0 and 1.\n"; help(); } if (recover && barcode_type != null_null) { if (barcode_type != index_null && barcode_type != inline_null && barcode_dist_2 < 0) barcode_dist_2 = barcode_dist_1; } return 0; } void version() { std::cerr << "process_shortreads " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "process_shortreads " << VERSION << "\n" << "process_shortreads [-f in_file | -p in_dir [-P] [-I] | -1 pair_1 -2 pair_2] -b barcode_file -o out_dir [-i type] [-y type] [-c] [-q] [-r] [-E encoding] [-t len] [-D] [-w size] [-s lim] [-h]\n" << " f: path to the input file if processing single-end seqeunces.\n" << " i: input file type, either 'bustard' for the Illumina BUSTARD format, 'bam', 'fastq' (default), or 'gzfastq' for gzipped FASTQ.\n" << " p: path to a directory of single-end Illumina files.\n" << " 1: first input file in a set of paired-end sequences.\n" << " 2: second input file in a set of paired-end sequences.\n" << " P: specify that input is paired (for use with '-p').\n" << " I: specify that the paired-end reads are interleaved in single files.\n" << " o: path to output the processed files.\n" << " y: output type, either 'fastq' or 'fasta' (default fastq).\n" << " b: a list of barcodes for this run.\n" << " c: clean data, remove any read with an uncalled base.\n" << " q: discard reads with low quality scores.\n" << " r: rescue barcodes.\n" << " t: truncate final read length to this value.\n" << " E: specify how quality scores are encoded, 'phred33' (Illumina 1.8+, Sanger) or 'phred64' (Illumina 1.3 - 1.5, default).\n" << " D: capture discarded reads to a file.\n" << " w: set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15).\n" << " s: set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10).\n" << " h: display this help messsage.\n\n" << " Barcode options:\n" << " --inline_null: barcode is inline with sequence, occurs only on single-end read (default).\n" << " --index_null: barcode is provded in FASTQ header (Illumina i5 or i7 read).\n" << " --null_index: barcode is provded in FASTQ header (Illumina i7 read if both i5 and i7 read are provided).\n" << " --inline_inline: barcode is inline with sequence, occurs on single and paired-end read.\n" << " --index_index: barcode is provded in FASTQ header (Illumina i5 and i7 reads).\n" << " --inline_index: barcode is inline with sequence on single-end read and occurs in FASTQ header (from either i5 or i7 read).\n" << " --index_inline: barcode occurs in FASTQ header (Illumina i5 or i7 read) and is inline with single-end sequence (for single-end data) on paired-end read (for paired-end data).\n\n" << " Adapter options:\n" << " --adapter_1 : provide adaptor sequence that may occur on the first read for filtering.\n" << " --adapter_2 : provide adaptor sequence that may occur on the paired-read for filtering.\n" << " --adapter_mm : number of mismatches allowed in the adapter sequence.\n\n" << " Output options:\n" << " --retain_header: retain unmodified FASTQ headers in the output.\n" << " --merge: if no barcodes are specified, merge all input files into a single output file (or single pair of files).\n\n" << " Advanced options:\n" << " --no_read_trimming: do not trim low quality reads, just discard them.\n" << " --len_limit : when trimming sequences, specify the minimum length a sequence must be to keep it (default 31bp).\n" << " --filter_illumina: discard reads that have been marked by Illumina's chastity/purity filter as failing.\n" << " --barcode_dist: provide the distace between barcodes to allow for barcode rescue (default 2)\n" << " --mate-pair: raw reads are circularized mate-pair data, first read will be reverse complemented.\n" << " --no_overhang: data does not contain an overhang nucleotide between barcode and seqeunce.\n"; exit(0); } stacks-1.35/src/process_shortreads.h000644 000765 000024 00000005265 12533677757 020414 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __PROCESS_SHORTREADS_H__ #define __PROCESS_SHORTREADS_H__ #include #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include #include using std::stringstream; using std::istream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::pair; #include "constants.h" #include "clean.h" #include "file_io.h" #include "utils.h" #include "log_utils.h" #include "write.h" #include "BustardI.h" // Reading input files in Tab-separated Bustard format #include "FastqI.h" // Reading input files in FASTQ format #include "gzFastq.h" // Reading gzipped input files in FASTQ format #include "BamUnalignedI.h" // Reading data from unaligned BAM files void help( void ); void version( void ); int parse_command_line(int, char **); template int process_reads(string, set &, set &, map &, map &, map > &); template int process_paired_reads(string, string, set &, set &, map &, map &, map &, map &, map &, map > &); int process_singlet(Read *, bool, map &, map &); int dist(const char *, char *); int print_results(int, char **, vector &, map > &, map > &); int compare_barcodes(pair, pair); #endif // __PROCESS_SHORTREADS_H__ stacks-1.35/src/pstacks.cc000644 000765 000024 00000064747 12533677757 016320 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // pstacks -- search an existing set of stacks for polymorphisms // #include "pstacks.h" // // Global variables to hold command-line options. // FileT in_file_type; string in_file; FileT out_file_type; string out_path; int sql_id = 0; int min_stack_cov = 1; int num_threads = 1; // // For use with the multinomial model to call fixed nucleotides. // modelt model_type = snp; double alpha = 0.05; double bound_low = 0.0; double bound_high = 1.0; double p_freq = 0.5; double barcode_err_freq = 0.0; double heterozygote_limit = -3.84; double homozygote_limit = 3.84; int main (int argc, char* argv[]) { parse_command_line(argc, argv); cerr << "Min depth of coverage to report a stack: " << min_stack_cov << "\n" << "Model type: "; switch (model_type) { case snp: cerr << "SNP\n"; break; case fixed: cerr << "Fixed\n"; break; case bounded: cerr << "Bounded; lower epsilon bound: " << bound_low << "; upper bound: " << bound_high << "\n"; break; } cerr << "Alpha significance level for model: " << alpha << "\n"; // // Set limits to call het or homozygote according to chi-square distribution with one // degree of freedom: // http://en.wikipedia.org/wiki/Chi-squared_distribution#Table_of_.CF.872_value_vs_p-value // if (alpha == 0.1) { heterozygote_limit = -2.71; homozygote_limit = 2.71; } else if (alpha == 0.05) { heterozygote_limit = -3.84; homozygote_limit = 3.84; } else if (alpha == 0.01) { heterozygote_limit = -6.64; homozygote_limit = 6.64; } else if (alpha == 0.001) { heterozygote_limit = -10.83; homozygote_limit = 10.83; } // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif HashMap radtags; set merge_map; map unique; load_radtags(in_file, radtags); reduce_radtags(radtags, unique); //dump_stacks(unique); map merged; populate_merged_tags(unique, merged); //dump_merged_stacks(merged); // Call the consensus sequence again, now that remainder tags have been merged. cerr << "Identifying polymorphic sites and calling consensus sequences..."; call_consensus(merged, unique, true); cerr << "done.\n"; count_raw_reads(unique, merged); calc_coverage_distribution(unique, merged); cerr << "Writing loci, SNPs, alleles to '" << out_path << "...'\n"; write_results(merged, unique); return 0; } int call_alleles(MergedStack *mtag, vector &reads) { int row; int height = reads.size(); string allele; char base; vector::iterator snp; DNANSeq *d; for (row = 0; row < height; row++) { allele.clear(); uint snp_cnt = 0; for (snp = mtag->snps.begin(); snp != mtag->snps.end(); snp++) { if ((*snp)->type != snp_type_het) continue; snp_cnt++; d = reads[row]; base = (*d)[(*snp)->col]; // // Check to make sure the nucleotide at the location of this SNP is // of one of the two possible states the multinomial model called. // if (base == (*snp)->rank_1 || base == (*snp)->rank_2) allele += base; else break; } if (snp_cnt > 0 && allele.length() == snp_cnt) mtag->alleles[allele]++; } return 0; } int call_consensus(map &merged, map &unique, bool invoke_model) { // // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. // map::iterator it; vector keys; for (it = merged.begin(); it != merged.end(); it++) keys.push_back(it->first); int i; #pragma omp parallel private(i) { #pragma omp for schedule(dynamic) for (i = 0; i < (int) keys.size(); i++) { MergedStack *mtag; PStack *utag; mtag = merged[keys[i]]; // // Create a two-dimensional array, each row containing one read. For // each unique tag that has been merged together, add the sequence for // that tag into our array as many times as it originally occurred. // vector::iterator j; vector reads; for (j = mtag->utags.begin(); j != mtag->utags.end(); j++) { utag = unique[*j]; for (uint k = 0; k < utag->count; k++) { reads.push_back(utag->seq); } } // // Iterate over each column of the array and call the consensus base. // int row, col; int length = reads[0]->size(); int height = reads.size(); string con; map nuc; map::iterator max, n; DNANSeq *d; for (col = 0; col < length; col++) { nuc['A'] = 0; nuc['C'] = 0; nuc['G'] = 0; nuc['T'] = 0; nuc['N'] = 0; for (row = 0; row < height; row++) { d = reads[row]; if (nuc.count((*d)[col])) nuc[(*d)[col]]++; } // // Find the base with a plurality of occurances and call it. // max = nuc.end(); for (n = nuc.begin(); n != nuc.end(); n++) { if (n->first == 'N') continue; if (max == nuc.end() || n->second > max->second) max = n; } con += max->second == 0 ? 'N' : max->first; // // Search this column for the presence of a SNP // if (invoke_model) switch(model_type) { case snp: call_multinomial_snp(mtag, col, nuc, true); break; case bounded: call_bounded_multinomial_snp(mtag, col, nuc, true); break; case fixed: call_multinomial_fixed(mtag, col, nuc); break; } } if (invoke_model) { call_alleles(mtag, reads); if (model_type == fixed) { // // Mask nucleotides that are not fixed. // vector::iterator s; for (s = mtag->snps.begin(); s != mtag->snps.end(); s++) { if ((*s)->type == snp_type_unk) con.replace((*s)->col, 1, "N"); } } } mtag->add_consensus(con.c_str()); // // If SNPs were called at this locus but no alleles could be determined, // blacklist this tag. This can occur if there are two many uncalled bases // in the locus (Ns), such that haplotypes can't be consistently read // due to the presence of the Ns in the reads. // if (mtag->alleles.empty()) for (uint j = 0; j < mtag->snps.size(); j++) if (mtag->snps[j]->type == snp_type_het) { mtag->blacklisted = 1; break; } } } return 0; } double calc_coverage_distribution(map &unique, map &merged) { map::iterator it; vector::iterator k; PStack *tag; double depth = 0.0; double total = 0.0; double sum = 0.0; double mean = 0.0; double max = 0.0; double stdev = 0.0; for (it = merged.begin(); it != merged.end(); it++) { depth = 0.0; for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; depth += tag->count; } if (depth < min_stack_cov) continue; if (depth > max) max = depth; sum += depth; total++; } mean = sum / total; // // Calculate the standard deviation // for (it = merged.begin(); it != merged.end(); it++) { depth = 0.0; for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; depth += tag->count; } if (depth < min_stack_cov) continue; sum += pow((depth - mean), 2); } stdev = sqrt(sum / (total - 1)); cerr << " Mean coverage depth is " << mean << "; Std Dev: " << stdev << "; Max: " << max << "\n"; return mean; } int count_raw_reads(map &unique, map &merged) { map::iterator it; vector::iterator k; PStack *tag; long int m = 0; for (it = merged.begin(); it != merged.end(); it++) { for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; m += tag->count; } m += it->second->remtags.size(); } cerr << " Number of utilized reads " << m << "\n"; return 0; } int write_results(map &m, map &u) { map::iterator i; vector::iterator j; vector::iterator k; vector::iterator s; map::iterator t; MergedStack *tag_1; PStack *tag_2; stringstream sstr; bool gzip = (in_file_type == FileT::bam) ? true : false; // // Parse the input file name to create the output files // size_t pos_1 = in_file.find_last_of("/"); size_t pos_2 = in_file.find_last_of("."); string tag_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".tags.tsv"; string snp_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".snps.tsv"; string all_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".alleles.tsv"; if (gzip) { tag_file += ".gz"; snp_file += ".gz"; all_file += ".gz"; } // // Open the output files for writing. // gzFile gz_tags, gz_snps, gz_alle; ofstream tags, snps, alle; if (gzip) { gz_tags = gzopen(tag_file.c_str(), "wb"); if (!gz_tags) { cerr << "Error: Unable to open gzipped tag file '" << tag_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_tags, libz_buffer_size); #endif gz_snps = gzopen(snp_file.c_str(), "wb"); if (!gz_snps) { cerr << "Error: Unable to open gzipped snps file '" << snp_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_snps, libz_buffer_size); #endif gz_alle = gzopen(all_file.c_str(), "wb"); if (!gz_alle) { cerr << "Error: Unable to open gzipped alleles file '" << all_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_alle, libz_buffer_size); #endif } else { tags.open(tag_file.c_str()); if (tags.fail()) { cerr << "Error: Unable to open tag file for writing.\n"; exit(1); } snps.open(snp_file.c_str()); if (snps.fail()) { cerr << "Error: Unable to open SNPs file for writing.\n"; exit(1); } alle.open(all_file.c_str()); if (alle.fail()) { cerr << "Error: Unable to open allele file for writing.\n"; exit(1); } } // // Record the version of Stacks used and the date generated as a comment in the catalog. // // Obtain the current date. // stringstream log; time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%F %T", timeinfo); log << "# pstacks version " << VERSION << "; generated on " << date << "\n"; if (gzip) { gzputs(gz_tags, log.str().c_str()); gzputs(gz_snps, log.str().c_str()); gzputs(gz_alle, log.str().c_str()); } else { tags << log.str(); snps << log.str(); alle << log.str(); } int id; char *buf; // = new char[m.begin()->second->len + 1]; int wrote = 0; int excluded = 0; int blacklisted = 0; for (i = m.begin(); i != m.end(); i++) { tag_1 = i->second; float total = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) total += u[*k]->count; if (total < min_stack_cov) { excluded++; continue; } // // Calculate the log likelihood of this merged stack. // tag_1->gen_matrix(u); tag_1->calc_likelihood_pstacks(); wrote++; if (tag_1->blacklisted) blacklisted++; // First write the consensus sequence sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << tag_1->loc.chr << "\t" << tag_1->loc.bp << "\t" << (tag_1->loc.strand == plus ? "+" : "-") << "\t" << "consensus\t" << "\t\t" << tag_1->con << "\t" << tag_1->deleveraged << "\t" << tag_1->blacklisted << "\t" << tag_1->lumberjackstack << "\t" << tag_1->lnl << "\n"; // // Write a sequence recording the output of the SNP model for each nucleotide. // sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << "\t" << "\t" << "\t" << "model\t" << "\t" << "\t"; for (s = tag_1->snps.begin(); s != tag_1->snps.end(); s++) { switch((*s)->type) { case snp_type_het: sstr << "E"; break; case snp_type_hom: sstr << "O"; break; default: sstr << "U"; break; } } sstr << "\t" << "\t" << "\t" << "\t" << "\n"; if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); // Now write out the components of each unique tag merged into this one. id = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) { tag_2 = u[*k]; buf = tag_2->seq->seq(); for (j = tag_2->map.begin(); j != tag_2->map.end(); j++) { sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t\t\t\t" << "primary\t" << id << "\t" << *j << "\t" << buf << "\t\t\t\t\n"; if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); } id++; delete [] buf; } // // Write out the model calls for each nucleotide in this locus. // for (s = tag_1->snps.begin(); s != tag_1->snps.end(); s++) { sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << (*s)->col << "\t"; switch((*s)->type) { case snp_type_het: sstr << "E\t"; break; case snp_type_hom: sstr << "O\t"; break; default: sstr << "U\t"; break; } sstr << std::fixed << std::setprecision(2) << (*s)->lratio << "\t" << (*s)->rank_1 << "\t" << (*s)->rank_2 << "\t\t\n"; } if (gzip) gzputs(gz_snps, sstr.str().c_str()); else snps << sstr.str(); sstr.str(""); // Write the expressed alleles seen for the recorded SNPs and // the percentage of tags a particular allele occupies. // char pct[id_len]; for (t = tag_1->alleles.begin(); t != tag_1->alleles.end(); t++) { sprintf(pct, "%.2f", ((t->second/total) * 100)); sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << t->first << "\t" << pct << "\t" << t->second << "\n"; } if (gzip) gzputs(gz_alle, sstr.str().c_str()); else alle << sstr.str(); sstr.str(""); } if (gzip) { gzclose(gz_tags); gzclose(gz_snps); gzclose(gz_alle); } else { tags.close(); snps.close(); alle.close(); } cerr << " Wrote " << wrote << " loci, excluded " << excluded << " loci due to insuffient depth of coverage; blacklisted " << blacklisted << " loci.\n"; return 0; } int populate_merged_tags(map &unique, map &merged) { map::iterator i; map::iterator it_new, it_old; map > locations; map >::iterator k; set::iterator s; char id[id_len]; PStack *u; MergedStack *m; int global_id = 1; // // Create a map of each unique Stack that has been aligned to the same genomic location. // for (i = unique.begin(); i != unique.end(); i++) { snprintf(id, id_len - 1, "%s|%d|%s", i->second->loc.chr, i->second->loc.bp, i->second->loc.strand == plus ? "+" : "-"); locations[id].insert(i->second->id); } it_old = merged.begin(); for (k = locations.begin(); k != locations.end(); k++) { m = new MergedStack; m->id = global_id; // // Record the consensus and physical location for this stack. // s = k->second.begin(); m->add_consensus(unique[*s]->seq); m->loc.set(unique[*s]->loc.chr, unique[*s]->loc.bp, unique[*s]->loc.strand); // // Record the individual stacks that were aligned together. // for (; s != k->second.end(); s++) { u = unique[*s]; m->count += u->count; m->utags.push_back(u->id); } // // Insert the new MergedStack giving a hint as to which position // to insert it at. // it_new = merged.insert(it_old, pair(global_id, m)); it_old = it_new; global_id++; } cerr << " Merged " << unique.size() << " unique Stacks into " << merged.size() << " loci.\n"; return 0; } // // This function assumes that there may be identical reads, mapped to multiple // places in the genome. In this case, reads are broken down by read ID // and split into different Stack objects. // int reduce_radtags(HashMap &radtags, map &unique) { HashMap::iterator it; vector::iterator sit; PStack *u; int global_id = 1; for (it = radtags.begin(); it != radtags.end(); it++) { // // Make sure there aren't any reads of identical sequence that have been mapped to // different genomic locations. // map locations; map::iterator lit; for (sit = (*it).second.begin(); sit != (*it).second.end(); sit++) locations[(*sit)->loc_str]++; for (lit = locations.begin(); lit != locations.end(); lit++) { // // Populate a Stack object for this unique radtag. // u = new PStack; u->id = global_id; u->count = lit->second; u->add_seq(it->first); // // Record the physical location of this stack. // for (sit = (*it).second.begin(); sit != (*it).second.end(); sit++) { if (strcmp((*sit)->loc_str, lit->first.c_str()) == 0) { u->add_id((*sit)->id); u->loc.set((*sit)->loc.chr, (*sit)->loc.bp, (*sit)->loc.strand); } } unique[u->id] = u; global_id++; } } return 0; } // // We expect tags to have already been aligned to a reference genome. Therefore, the tags // are identified by their chromosome and basepair location. // int load_radtags(string in_file, HashMap &radtags) { Input *fh = NULL; Seq *c; if (in_file_type == FileT::bowtie) fh = new Bowtie(in_file.c_str()); else if (in_file_type == FileT::sam) fh = new Sam(in_file.c_str()); else if (in_file_type == FileT::bam) fh = new Bam(in_file.c_str()); else if (in_file_type == FileT::tsv) fh = new Tsv(in_file.c_str()); cerr << "Parsing " << in_file.c_str() << "\n"; int i = 1; while ((c = fh->next_seq()) != NULL) { if (i % 10000 == 0) cerr << "Loading aligned sequence " << i << " \r"; radtags[c->seq].push_back(c); i++; } if (i == 0) { cerr << "Error: Unable to load data from '" << in_file.c_str() << "'.\n"; exit(1); } cerr << " " << "Analyzed " << i - 1 << " sequence reads; " << "Identified " << radtags.size() << " unique stacks from those reads.\n"; // // Close the file and delete the Input object. // delete fh; return 0; } int dump_stacks(map &u) { map::iterator it; vector::iterator fit; vector >::iterator pit; vector::iterator mit; for (it = u.begin(); it != u.end(); it++) { cerr << "Stack ID: " << (*it).second->id << "\n" << " Seq: " << (*it).second->seq->seq() << "\n" << " IDs: "; for (fit = (*it).second->map.begin(); fit != (*it).second->map.end(); fit++) cerr << *fit << " "; cerr << "\n\n"; } return 0; } int dump_merged_stacks(map &m) { map::iterator it; vector >::iterator pit; vector::iterator fit; for (it = m.begin(); it != m.end(); it++) { cerr << "MergedStack ID: " << it->second->id << "\n" << " Consensus: "; if (it->second->con != NULL) cerr << it->second->con << "\n"; else cerr << "\n"; cerr << " IDs: "; for (fit = it->second->utags.begin(); fit != it->second->utags.end(); fit++) cerr << (*fit) << " "; cerr << "\n" << " Distances: "; for (pit = it->second->dist.begin(); pit != it->second->dist.end(); pit++) cerr << (*pit).first << ": " << (*pit).second << ", "; cerr << "\n\n"; } return 0; } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"infile_type", required_argument, NULL, 't'}, {"outfile_type", required_argument, NULL, 'y'}, {"file", required_argument, NULL, 'f'}, {"outpath", required_argument, NULL, 'o'}, {"id", required_argument, NULL, 'i'}, {"min_cov", required_argument, NULL, 'm'}, {"num_threads", required_argument, NULL, 'p'}, {"bc_err_freq", required_argument, NULL, 'e'}, {"model_type", required_argument, NULL, 'T'}, {"bound_low", required_argument, NULL, 'L'}, {"bound_high", required_argument, NULL, 'U'}, {"alpha", required_argument, NULL, 'A'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvOT:A:L:U:f:o:i:e:p:m:s:f:t:y:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 't': if (strcmp(optarg, "bowtie") == 0) in_file_type = FileT::bowtie; else if (strcmp(optarg, "sam") == 0) in_file_type = FileT::sam; else if (strcmp(optarg, "bam") == 0) in_file_type = FileT::bam; else if (strcmp(optarg, "tsv") == 0) in_file_type = FileT::tsv; else in_file_type = FileT::unknown; break; case 'y': if (strcmp(optarg, "sam") == 0) out_file_type = FileT::sam; else out_file_type = FileT::sql; break; case 'f': in_file = optarg; break; case 'o': out_path = optarg; break; case 'i': sql_id = is_integer(optarg); if (sql_id < 0) { cerr << "SQL ID (-i) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 'm': min_stack_cov = atoi(optarg); break; case 'e': barcode_err_freq = atof(optarg); break; case 'T': if (strcmp(optarg, "snp") == 0) { model_type = snp; } else if (strcmp(optarg, "fixed") == 0) { model_type = fixed; } else if (strcmp(optarg, "bounded") == 0) { model_type = bounded; } else { cerr << "Unknown model type specified '" << optarg << "'\n"; help(); } case 'L': bound_low = atof(optarg); break; case 'U': bound_high = atof(optarg); break; case 'A': alpha = atof(optarg); break; case 'p': num_threads = atoi(optarg); break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (alpha != 0.1 && alpha != 0.05 && alpha != 0.01 && alpha != 0.001) { cerr << "SNP model alpha significance level must be either 0.1, 0.05, 0.01, or 0.001.\n"; help(); } if (bound_low != 0 && (bound_low < 0 || bound_low >= 1.0)) { cerr << "SNP model lower bound must be between 0.0 and 1.0.\n"; help(); } if (bound_high != 1 && (bound_high <= 0 || bound_high > 1.0)) { cerr << "SNP model upper bound must be between 0.0 and 1.0.\n"; help(); } if (bound_low > 0 || bound_high < 1.0) { model_type = bounded; } if (in_file.length() == 0 || in_file_type == FileT::unknown) { cerr << "You must specify an input file of a supported type.\n"; help(); } if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (model_type == fixed && barcode_err_freq == 0) { cerr << "You must specify the barcode error frequency.\n"; help(); } return 0; } void version() { std::cerr << "pstacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "pstacks " << VERSION << "\n" << "pstacks -t file_type -f file_path [-o path] [-i id] [-m min_cov] [-p num_threads] [-h]" << "\n" << " t: input file Type. Supported types: bowtie, sam, or bam.\n" << " f: input file path.\n" << " o: output path to write results.\n" << " i: SQL ID to insert into the output to identify this sample.\n" << " m: minimum depth of coverage to report a stack (default 1).\n" << " p: enable parallel execution with num_threads threads.\n" << " h: display this help messsage.\n" << " Model options:\n" << " --model_type : either 'snp' (default), 'bounded', or 'fixed'\n" << " For the SNP or Bounded SNP model:\n" << " --alpha : chi square significance level required to call a heterozygote or homozygote, either 0.1, 0.05 (default), 0.01, or 0.001.\n" << " For the Bounded SNP model:\n" << " --bound_low : lower bound for epsilon, the error rate, between 0 and 1.0 (default 0).\n" << " --bound_high : upper bound for epsilon, the error rate, between 0 and 1.0 (default 1).\n" << " For the Fixed model:\n" << " --bc_err_freq : specify the barcode error frequency, between 0 and 1.0.\n"; exit(0); } stacks-1.35/src/pstacks.h000644 000765 000024 00000005575 12533677757 016154 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __PSTACKS_H__ #define __PSTACKS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include #include // Support for gzipped output files. #include // Process command-line options #include #include #include #include using std::ofstream; using std::stringstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include // std::setprecision #include using std::unordered_map; #include using std::vector; #include using std::map; #include using std::set; #include using std::pair; #include "constants.h" #include "stacks.h" // Major data structures for holding stacks #include "mstack.h" #include "kmers.h" #include "utils.h" #include "models.h" // Contains maximum likelihood statistical models. #include "Tsv.h" // Reading input files in Tab-separated values format #include "BowtieI.h" // Reading input files in Bowtie format #include "SamI.h" // Reading input files in SAM format #include "BamI.h" // Reading input files in BAM format #include "DNANSeq.h" const int barcode_size = 5; #ifdef HAVE_SPARSEHASH typedef sparse_hash_map, hash_charptr, eqstr> HashMap; #else typedef unordered_map, hash_charptr, eqstr> HashMap; #endif void help( void ); void version( void ); int parse_command_line(int, char**); int load_radtags(string, HashMap &); int reduce_radtags(HashMap &, map &); int populate_merged_tags(map &, map &); int call_consensus(map &, map &, bool); int call_alleles(MergedStack *, vector &); int count_raw_reads(map &, map &); double calc_coverage_distribution(map &, map &); int write_results(map &, map &); // // Debugging // int dump_stacks(map &); int dump_merged_stacks(map &); #endif // __PSTACKS_H__ stacks-1.35/src/renz.h000644 000765 000024 00000017663 12571641525 015445 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2011-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __RENZ_H__ #define __RENZ_H__ #include using std::map; #include using std::string; const char *ageI[] = {"CCGGT", // A/CCGGT, AgeI "ACCGG"}; const char *aluI[] = {"CT", // AG/CT, AluI "AG"}; const char *apeKI[] = {"CAGC", "CTGC", // G/CWGC, ApeKI; W=A or T "GTCG", "GACG"}; const char *apoI[] = {"AATTC", "AATTT", // R/AATTY, ApoI (also known as XapI) "GAATT", "AAATT"}; const char *bamHI[] = {"GATCC", // G/GATCC, BamHI "GGATC"}; const char *bgIII[] = {"GATCT", // A/GATCT, BgIII "AGATC"}; const char *bstYI[] = {"GATCC", "GATCT", // R/GATCY, BstYI (also known as PsuI) "GGATC", "AGATC"}; const char *claI[] = {"CGAT", // AT/CGAT, ClaI "ATCG"}; const char *ddeI[] = {"TAAG", "TCAG", "TGAG", "TTAG", // C/TNAG, DdeI "CTTA", "CTGA", "CTCA", "CTAA"}; const char *dpnII[] = {"GATC", // GATC, DpnII "GATC"}; const char *eaeI[] = {"GGCCA", "GGCCG", // Y/GGCCR, EaeI "TGGCC", "CGGCC"}; const char *ecoRI[] = {"AATTC", // G/AATTC, EcoRI "GAATT"}; const char *ecoRV[] = {"ATC", // GAT/ATC, EcoRV "GAT"}; const char *ecoT22I[] = {"TGCAT", // A/TGCAT, EcoT22I "ATGCA"}; const char *hindIII[] = {"AGCTT", // A/AGCTT, HindIII "TCGAA"}; const char *kpnI[] = {"GTACC", // C/CATGG, KpnI "GGTAC"}; const char *mluCI[] = {"AATT", // AATT, MluCI "AATT"}; const char *mseI[] = {"TAA", // T/TAA, MseI "TTA"}; const char *mspI[] = {"CGG", // C/CGG, MspI "CCG"}; const char *ndeI[] = {"TA", // CA/TATG, NdeI "TA"}; const char *nheI[] = {"CTAGC", // G/CTAGC, NheI "GCTAG"}; const char *nlaIII[] = {"CATG", // CATG, NlaIII "CATG"}; const char *notI[] = {"GGCCGC", // GC/GGCCGC, NotI "GCGGCC"}; const char *nsiI[] = {"TGCAT", // ATGCA/T, NsiI "ATGCA"}; const char *pstI[] = {"TGCAG", // CTGCA/G, PstI "CTGCA"}; const char *rsaI[] = {"AC", // GT/AC, RsaI "GT"}; const char *sacI[] = {"AGCTC", // GAGCT/C, SacI "GAGCT"}; const char *sau3AI[] = {"GATC", // GATC, Sau3AI "GATC"}; const char *sbfI[] = {"TGCAGG", // CCTGCA/GG, SbfI "CCTGCA"}; const char *sexAI[] = {"CCAGGT", "CCTGGT", // A/CCWGGT, SexAI; W=A or T "ACCTGG", "ACCAGG"}; const char *sgrAI[] = {"CCGGCG", "CCGGTG", // CR/CCGGYG, SgrAI; R=A or G; Y=C or T "CGCCGG", "CACCGG"}; const char *speI[] = {"CTAGT", // A/CTAGT, SpeI "ACTAG"}; const char *sphI[] = {"CATGC", // GCATG/C, SphI "GCATG"}; const char *taqI[] = {"CGA", // T/CGA, TaqI "TCG"}; const char *xbaI[] = {"CTAGA", // T/CTAGA, XbaI "TCTAG"}; const char *xhoI[] = {"TCGAG", // C/TCGAG, XhoI "CTCGA"}; void initialize_renz(map &renz, map &renz_cnt, map &renz_len) { renz["sbfI"] = sbfI; // CCTGCA/GG, SbfI renz["pstI"] = pstI; // CTGCA/G, PstI renz["notI"] = notI; // GC/GGCCGC, NotI renz["ecoRI"] = ecoRI; // G/AATTC, EcoRI renz["sgrAI"] = sgrAI; // CR/CCGGYG, SgrAI; R=A or G; Y=C or T renz["apeKI"] = apeKI; // G/CWGC, ApeKI; W=A or T renz["hindIII"] = hindIII; // A/AGCTT, HindIII renz["dpnII"] = dpnII; // GATC, DpnII renz["sphI"] = sphI; // GCATG/C, SphI renz["nlaIII"] = nlaIII; // CATG, NlaIII renz["mluCI"] = mluCI; // AATT, MluCI renz["ecoT22I"] = ecoT22I; // A/TGCAT, EcoT22I renz["ndeI"] = ndeI; // CA/TATG, NdeI renz["nsiI"] = nsiI; // ATGCA/T, NsiI renz["mseI"] = mseI; // T/TAA, MseI renz["mspI"] = mspI; // C/CGG, MspI renz["sexAI"] = sexAI; // A/CCWGGT, SexAI; W=A or T renz["sau3AI"] = sau3AI; // GATC, Sau3AI renz["bamHI"] = bamHI; // G/GATCC, BamHI renz["xbaI"] = xbaI; // T/CTAGA, XbaI renz["eaeI"] = eaeI; // Y/GGCCR, EaeI renz["taqI"] = taqI; // T/CGA, TaqI renz["claI"] = claI; // AT/CGAT, ClaI renz["nheI"] = nheI; // G/CTAGC, NheI renz["speI"] = speI; // A/CTAGT, SpeI renz["apoI"] = apoI; // R/AATTY, ApoI, XapI renz["bstYI"] = bstYI; // R/GATCY, BstYI, PsuI renz["xhoI"] = xhoI; // C/TCGAG, XhoI renz["sacI"] = sacI; // GAGCT/C, SacI renz["bgIII"] = bgIII; // A/GATCT, BgIII renz["ecoRV"] = ecoRV; // GAT/ATC, EcoRV renz["kpnI"] = kpnI; // C/CATGG, KpnI renz["ddeI"] = ddeI; // C/TNAG, DdeI renz["aluI"] = aluI; // AG/CT, AluI renz["ageI"] = ageI; // A/CCGGT, AgeI renz["rsaI"] = rsaI; // GT/AC, RsaI renz_cnt["sbfI"] = 1; renz_cnt["pstI"] = 1; renz_cnt["notI"] = 1; renz_cnt["ecoRI"] = 1; renz_cnt["sgrAI"] = 2; renz_cnt["apeKI"] = 2; renz_cnt["hindIII"] = 1; renz_cnt["dpnII"] = 1; renz_cnt["sphI"] = 1; renz_cnt["nlaIII"] = 1; renz_cnt["mluCI"] = 1; renz_cnt["ecoT22I"] = 1; renz_cnt["ndeI"] = 1; renz_cnt["nsiI"] = 1; renz_cnt["mseI"] = 1; renz_cnt["mspI"] = 1; renz_cnt["sexAI"] = 2; renz_cnt["sau3AI"] = 1; renz_cnt["bamHI"] = 1; renz_cnt["xbaI"] = 1; renz_cnt["eaeI"] = 2; renz_cnt["taqI"] = 1; renz_cnt["claI"] = 1; renz_cnt["nheI"] = 1; renz_cnt["speI"] = 1; renz_cnt["apoI"] = 2; renz_cnt["bstYI"] = 2; renz_cnt["xhoI"] = 1; renz_cnt["sacI"] = 1; renz_cnt["bgIII"] = 1; renz_cnt["ecoRV"] = 1; renz_cnt["kpnI"] = 1; renz_cnt["ddeI"] = 4; renz_cnt["aluI"] = 1; renz_cnt["ageI"] = 1; renz_cnt["rsaI"] = 1; renz_len["sbfI"] = 6; renz_len["pstI"] = 5; renz_len["notI"] = 6; renz_len["ecoRI"] = 5; renz_len["sgrAI"] = 6; renz_len["apeKI"] = 4; renz_len["hindIII"] = 5; renz_len["dpnII"] = 4; renz_len["sphI"] = 5; renz_len["nlaIII"] = 4; renz_len["mluCI"] = 4; renz_len["ecoT22I"] = 5; renz_len["ndeI"] = 2; renz_len["nsiI"] = 5; renz_len["mseI"] = 3; renz_len["mspI"] = 3; renz_len["sexAI"] = 6; renz_len["sau3AI"] = 4; renz_len["bamHI"] = 5; renz_len["xbaI"] = 5; renz_len["eaeI"] = 5; renz_len["taqI"] = 3; renz_len["claI"] = 4; renz_len["nheI"] = 5; renz_len["speI"] = 5; renz_len["apoI"] = 5; renz_len["bstYI"] = 5; renz_len["xhoI"] = 5; renz_len["sacI"] = 5; renz_len["bgIII"] = 5; renz_len["ecoRV"] = 3; renz_len["kpnI"] = 5; renz_len["ddeI"] = 4; renz_len["aluI"] = 2; renz_len["ageI"] = 5; renz_len["rsaI"] = 2; } void initialize_renz_olap(map &renz_olap) { renz_olap["sbfI"] = 4; } #endif // __RENZ_H__ stacks-1.35/src/rxstacks.cc000644 000765 000024 00000136442 12533677757 016502 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // rxstacks -- make model call corrections and haplotype corrections // across a population of samples. // #include "rxstacks.h" // Global variables to hold command-line options. int num_threads = 1; int batch_id = 0; string in_path; string out_path; FileT in_file_type = FileT::sql; double confounded_limit = 0.75; bool filter_confounded = false; bool prune_haplotypes = false; int max_haplotype_cnt = 0; bool lnl_dist = false; bool filter_lnl = false; double lnl_limit = 0.0; bool verbose = false; // // For use with the multinomial model to call fixed nucleotides. // modelt model_type = snp; double alpha = 0.1; double bound_low = 0.0; double bound_high = 1.0; double p_freq = 0.5; double barcode_err_freq = 0.0; double heterozygote_limit = -2.71; double homozygote_limit = 2.71; const int barcode_size = 5; int main (int argc, char* argv[]) { parse_command_line(argc, argv); cerr << "Log liklihood filtering: " << (filter_lnl == true ? "on" : "off") << "; threshold: " << lnl_limit << "\n" << "Prune haplotypes: " << (prune_haplotypes == true ? "yes" : "no") << "\n" << "Filter confounded loci: " << (filter_confounded == true ? "yes" : "no") << "\n"; // // Set limits to call het or homozygote according to chi-square distribution with one // degree of freedom: // http://en.wikipedia.org/wiki/Chi-squared_distribution#Table_of_.CF.872_value_vs_p-value // if (alpha == 0.1) { heterozygote_limit = -2.71; homozygote_limit = 2.71; } else if (alpha == 0.05) { heterozygote_limit = -3.84; homozygote_limit = 3.84; } else if (alpha == 0.01) { heterozygote_limit = -6.64; homozygote_limit = 6.64; } else if (alpha == 0.001) { heterozygote_limit = -10.83; homozygote_limit = 10.83; } // // Set the number of OpenMP parallel threads to execute. // if (verbose) num_threads = 1; #ifdef _OPENMP omp_set_num_threads(num_threads); #endif vector > files; if (!build_file_list(files)) exit(1); // // Open and initialize the log files. // ofstream log_fh, log_snp_fh, log_hap_fh; init_log(argc, argv, log_fh, log_snp_fh, log_hap_fh); // // Load the catalog // stringstream catalog_file; map catalog; bool compressed = false; int res; catalog_file << in_path << "batch_" << batch_id << ".catalog"; if ((res = load_loci(catalog_file.str(), catalog, false, false, compressed)) == 0) { cerr << "Unable to load the catalog '" << catalog_file.str() << "'\n"; return 0; } in_file_type = compressed == true ? FileT::gzsql : FileT::sql; // // Let's fill in the SNP model calls to include both hets and homozygotes to // make it easier to iterate over them later. // fill_catalog_snps(catalog); // // Load matches to the catalog // vector > catalog_matches; map samples; vector sample_ids; for (uint i = 0; i < files.size(); i++) { vector m; load_catalog_matches(in_path + files[i].second, m); if (m.size() == 0) { cerr << "Warning: unable to find any matches in file '" << files[i].second << "', excluding this sample from population analysis.\n"; continue; } catalog_matches.push_back(m); if (samples.count(m[0]->sample_id) == 0) { samples[m[0]->sample_id] = files[i].second; sample_ids.push_back(m[0]->sample_id); } else { cerr << "Fatal error: sample ID " << m[0]->sample_id << " occurs twice in this data set, likely the pipeline was run incorrectly.\n"; exit(0); } } // // Create the population map // cerr << "Populating observed haplotypes for " << sample_ids.size() << " samples, " << catalog.size() << " loci.\n"; PopMap *pmap = new PopMap(sample_ids.size(), catalog.size()); pmap->populate(sample_ids, catalog, catalog_matches); // // Sum haplotype counts across the population for each catalog locus. // sum_haplotype_counts(catalog, pmap); // // Calculate mean log likelihood across the population for each catalog locus. // calc_lnl_means(catalog, pmap); int catalog_id, sample_id, tag_id; string file; // // Process samples matched to the catalog, one by one. // for (uint i = 0; i < catalog_matches.size(); i++) { sample_id = catalog_matches[i][0]->sample_id; file = samples[sample_id]; cerr << "Loading stacks from sample " << file << " [" << i+1 << " of " << catalog_matches.size() << "]...\n"; ////// ////// ////// if (sample_id != 176) continue; ////// ////// map stacks; int res; if ((res = load_loci(in_path + file, stacks, true, true, compressed)) == 0) { cerr << "Unable to load sample file '" << file << "'\n"; continue; } cerr << "Making corrections to sample " << file << "..."; set > uniq_matches; set >::iterator it; vector > matches; // // There are multiple matches per stack, but we only need to process // each stack once to make corrections. // for (uint j = 0; j < catalog_matches[i].size(); j++) { catalog_id = catalog_matches[i][j]->cat_id; tag_id = catalog_matches[i][j]->tag_id; uniq_matches.insert(make_pair(catalog_id, tag_id)); } // // Put the catalog/tag ID pairs into a vector for parallel processing. // for (it = uniq_matches.begin(); it != uniq_matches.end(); it++) matches.push_back(*it); unsigned long int nuc_cnt = 0; unsigned long int unk_hom_cnt = 0; unsigned long int unk_het_cnt = 0; unsigned long int het_unk_cnt = 0; unsigned long int hom_unk_cnt = 0; unsigned long int het_hom_cnt = 0; unsigned long int hom_het_cnt = 0; unsigned long int conf_loci_cnt = 0; unsigned long int pruned_hap_cnt = 0; unsigned long int pruned_mst_hap_cnt = 0; unsigned long int blacklist_cnt = 0; unsigned long int lnl_cnt = 0; #pragma omp parallel private(catalog_id, tag_id) { Datum *d; Locus *loc; CSLocus *cloc; #pragma omp for schedule(dynamic, 1) reduction(+:nuc_cnt) reduction(+:unk_hom_cnt) reduction(+:unk_het_cnt) \ reduction(+:hom_unk_cnt) reduction(+:het_unk_cnt) reduction(+:hom_het_cnt) reduction(+:het_hom_cnt) \ reduction(+:conf_loci_cnt) reduction(+:pruned_hap_cnt) reduction(+:pruned_mst_hap_cnt) reduction(+:blacklist_cnt) reduction(+:lnl_cnt) for (uint j = 0; j < matches.size(); j++) { catalog_id = matches[j].first; tag_id = matches[j].second; //if (tag_id == 10970) { // cerr << "Hit the tag.\n"; //} //// if (catalog_id != 3080) continue; if (catalog.count(catalog_id) == 0) continue; cloc = catalog[catalog_id]; loc = stacks[tag_id]; if (filter_confounded && ((double) cloc->confounded_cnt / (double) cloc->cnt > confounded_limit)) { // cerr << "Catalog locus " << cloc->id << " is confounded; confounded cnt: " // << cloc->confounded_cnt << "; total: " << cloc->cnt // << "; freq: " << (double) cloc->confounded_cnt / (double)cloc->cnt << "\n"; loc->blacklisted = true; conf_loci_cnt++; continue; } d = pmap->datum(catalog_id, sample_id); if (d == NULL) continue; if (filter_lnl && cloc->lnl < lnl_limit) { loc->blacklisted = true; lnl_cnt++; continue; } prune_nucleotides(cloc, loc, d, log_snp_fh, nuc_cnt, unk_hom_cnt, unk_het_cnt, hom_unk_cnt, het_unk_cnt, hom_het_cnt, het_hom_cnt); // // Prune haplotypes from this locus. // if (prune_haplotypes) { prune_mst_haplotypes(cloc, d, loc, pruned_mst_hap_cnt, log_hap_fh); prune_locus_haplotypes(cloc, d, loc, pruned_hap_cnt, log_hap_fh); if (loc->blacklisted) blacklist_cnt++; } } } cerr << "done.\n"; unsigned long int total = unk_hom_cnt + unk_het_cnt + hom_unk_cnt + het_unk_cnt + hom_het_cnt + het_hom_cnt; cerr << "Total nucleotides processed: " << nuc_cnt << "\n" << " Total nucleotides converted: " << total << "\n" << " Converted from unknown to homozygous: " << unk_hom_cnt << " nucleotides.\n" << " Converted from unknown to heterozygous: " << unk_het_cnt << " nucleotides.\n" << " Converted from homozygous to unknown: " << hom_unk_cnt << " nucleotides.\n" << " Converted from heterozygous to unknown: " << het_unk_cnt << " nucleotides.\n" << " Converted from homozygous to heterozygous: " << hom_het_cnt << " nucleotides.\n" << " Converted from heterozygous to homozygous: " << het_hom_cnt << " nucleotides.\n" << "Pruned: " << pruned_mst_hap_cnt << " haplotypes using a tree method.\n" << "Pruned: " << pruned_hap_cnt << " haplotypes using a rare haplotype method.\n" << "Blacklisted: " << blacklist_cnt << " loci due to inability to call haplotypes.\n" << "Blacklisted: " << lnl_cnt << " loci due to log likelihoods below threshold.\n" << "Blacklisted: " << conf_loci_cnt << " confounded loci.\n"; log_fh << file << "\t" << nuc_cnt << "\t" << total << "\t" << unk_hom_cnt << "\t" << unk_het_cnt << "\t" << hom_unk_cnt << "\t" << het_unk_cnt << "\t" << hom_het_cnt << "\t" << het_hom_cnt << "\t" << blacklist_cnt << "\t" << conf_loci_cnt << "\t" << lnl_cnt << "\t" << pruned_hap_cnt << "\t" << pruned_mst_hap_cnt << "\n"; cerr << "Writing modified stacks, SNPs, alleles to '" << out_path << "'..."; // // Rewrite stacks, model outputs, and haplotypes. // write_results(file, stacks); // // Free up memory // cerr << "Freeing memory..."; map::iterator stack_it; for (stack_it = stacks.begin(); stack_it != stacks.end(); stack_it++) delete stack_it->second; cerr << "done.\n"; } log_fh.close(); if (verbose) { log_snp_fh.close(); log_hap_fh.close(); } return 0; } int dist(string hap_1, string hap_2) { int dist = 0; const char *p = hap_1.c_str(); const char *q = hap_2.c_str(); const char *p_end = p + hap_1.length(); const char *q_end = q + hap_2.length(); // // Count the number of characters that are different // between the two sequences. // while (p < p_end && q < q_end) { dist += (*p == *q) ? 0 : 1; p++; q++; } return dist; } int calc_lnl_means(map &catalog, PopMap *pmap) { map::iterator it; CSLocus *cloc; Datum **d; uint cnt, mid, tot; double median, mean; vector lnls; ofstream log_fh; if (lnl_dist) { stringstream log; log << "batch_" << batch_id << ".rxstacks_lnls.tsv"; string log_path = out_path + log.str(); log_fh.open(log_path.c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening log file '" << log_path << "'\n"; exit(1); } log_fh << "# Catalog Locus\tMean\tMedian\n"; } tot = 0; for (it = catalog.begin(); it != catalog.end(); it++) { cloc = it->second; d = pmap->locus(cloc->id); cnt = pmap->sample_cnt(); mean = 0.0; lnls.clear(); for (uint i = 0; i < cnt; i++) { if (d[i] == NULL) continue; lnls.push_back(d[i]->lnl); mean += d[i]->lnl; } if (lnls.size() == 0) continue; sort(lnls.begin(), lnls.end()); mid = lnls.size() / 2; median = lnls.size() % 2 == 0 ? lnls[mid] + lnls[mid+1] / 2.0 : lnls[mid+1]; mean = mean / (double) lnls.size(); cloc->lnl = mean; // // If the mean log likelihood for this catalog locus is below the threshold, count it as // its constituent components will be filtered as encountered later. // if (filter_lnl && cloc->lnl < lnl_limit) tot++; if (lnl_dist) log_fh << cloc->id << "\t" << mean << "\t" << median << "\n"; } if (lnl_dist) log_fh.close(); // // Print number of catalog loci that are confounded and will be removed. // cerr << tot << " catalog loci will be removed from the analysis due to log likelihoods below the threshold.\n"; return 0; } int sum_haplotype_counts(map &catalog, PopMap *pmap) { map::iterator it; CSLocus *cloc; Datum **d; uint cnt; for (it = catalog.begin(); it != catalog.end(); it++) { cloc = it->second; d = pmap->locus(cloc->id); cnt = pmap->sample_cnt(); for (uint i = 0; i < cnt; i++) { if (d[i] == NULL) continue; if (d[i]->obshap.size() == 1) { if (cloc->hap_cnts.count(d[i]->obshap[0]) == 0) cloc->hap_cnts[d[i]->obshap[0]] = 2; else cloc->hap_cnts[d[i]->obshap[0]] += 2; } else { for (uint j = 0; j < d[i]->obshap.size(); j++) if (cloc->hap_cnts.count(d[i]->obshap[j]) == 0) cloc->hap_cnts[d[i]->obshap[j]] = 1; else cloc->hap_cnts[d[i]->obshap[j]] += 1; } } } return 0; } int prune_mst_haplotypes(CSLocus *cloc, Datum *d, Locus *loc, unsigned long &pruned_hap_cnt, ofstream &log_fh) { // // Create a minimum spanning tree in order to determine the minimum distance // between each node in the list. // MinSpanTree *mst = new MinSpanTree; map::iterator it; vector keys; vector haps; Node *n; for (it = cloc->hap_cnts.begin(); it != cloc->hap_cnts.end(); it++) { n = mst->add_node(it->first); haps.push_back(it->first); keys.push_back(n->id); } // // We are going to connect nodes in the graph when a SNP occurs in one // of the positions of the haplotype. // Node *n_1, *n_2; uint snp_pos = 0; for (uint i = 0; i < cloc->snps.size(); i++) { if (cloc->snps[i]->type != snp_type_het) continue; for (uint j = 0; j < haps.size(); j++) { for (uint k = j + 1; k < haps.size(); k++) { // // If these two haplotypes differ by this SNP (and only this SNP), connect them in the graph. // if (haps[j].at(snp_pos) != haps[k].at(snp_pos) && dist(haps[j], haps[k]) == 1) { n_1 = mst->node(haps[j]); n_2 = mst->node(haps[k]); n_1->add_edge(n_2, 1); n_2->add_edge(n_1, 1); } } } snp_pos++; } // // Build the minimum spanning tree. // mst->build_tree(); // // Sort the haplotypes by read depth in this sample // vector > haplotypes; for (uint i = 0; i < d->obshap.size(); i++) haplotypes.push_back(make_pair(string(d->obshap[i]), (double) d->depth[i])); uint size = haplotypes.size(); // // Sort according to haplotype frequency. // sort(haplotypes.begin(), haplotypes.end(), compare_pair_haplotype_rev); if (size <= 2) return 0; // // Pull out the two most frequently occuring haplotypes. // string hap_1, hap_2; double hap_1_depth, hap_2_depth; hap_1 = haplotypes[size - 1].first; hap_1_depth = haplotypes[size - 1].second; haplotypes.pop_back(); if (haplotypes[size - 2].second > haplotypes[size - 3].second) { hap_2 = haplotypes[size - 2].first; hap_2_depth = haplotypes[size - 2].second; haplotypes.pop_back(); } else { hap_2 = ""; hap_2_depth = 0.0; } // // For each remaining haplotpye, check if it can be merged into a node (haplotype) no // more than one nucleotide apart. If there is more than one, merge it into the more // frequently occuring haplotype. // string hap, src_hap, dest_hap, label; double max, weighted; for (uint i = 0; i < haplotypes.size(); i++) { // // Find the current haplotype in the MST. // n_1 = mst->node(haplotypes[i].first); max = 0.0; hap = ""; weighted = 0.0; // // Check any potential edges in the graph for merging. // for (uint j = 0; j < n_1->edges.size(); j++) { label = n_1->edges[j]->child->label; if (label == hap_1) { weighted = (double) cloc->hap_cnts[label] * log(hap_1_depth); // cerr << "Cloc hap: " << label << "; popcnt: " << cloc->hap_cnts[label] << "; hap depth: " << hap_1_depth << "; weighted: " << weighted << "\n"; } else if (label == hap_2) { weighted = (double) cloc->hap_cnts[label] * log(hap_2_depth); // cerr << "Cloc hap: " << label << "; popcnt: " << cloc->hap_cnts[label] << "; hap depth: " << hap_2_depth << "; weighted: " << weighted << "\n"; } else continue; if (weighted == max) { // // There is more than one identical possibility, we can do no more. // hap = ""; break; } else if (weighted > max) { max = weighted; hap = label; } } if (hap.length() == 0) continue; src_hap = convert_catalog_haplotype_to_sample(haplotypes[i].first, cloc, loc); dest_hap = convert_catalog_haplotype_to_sample(hap, cloc, loc); if (verbose) { #pragma omp critical log_fh << cloc->id << "\t" << loc->sample_id << "\t" << loc->id << "\t" << src_hap << "\t" << haplotypes[i].first << "\t" << dest_hap << "\t" << hap << "\t" << "mst" << "\n"; } pruned_hap_cnt++; // // Remove the haplotype. // it = loc->alleles.find(src_hap); if (it != loc->alleles.end()) { loc->alleles.erase(it); } // // Add to the count of the merged-to haplotype. // if (loc->alleles.count(dest_hap) > 0) { loc->alleles[dest_hap]++; } else { cerr << "Error finding allele\n"; } } // // Update the matched haplotypes in the Datum object, so the haplotype pruner can // operate on newly generated, spurious haplotypes. // generate_matched_haplotypes(cloc, loc, d); return 0; } int prune_locus_haplotypes(CSLocus *cloc, Datum *d, Locus *loc, unsigned long &pruned_hap_cnt, ofstream &log_fh) { if (d->obshap.size() < 2) return 0; // // Identify the two most frequent haplotypes in this sample. // vector > haplotypes; double weighted_hap; for (uint i = 0; i < d->obshap.size(); i++) { // // Lookup the number of occurrences of this haplotype in the // population as well as the depth of the haplotype in this indiviudal. // We will weight the occurrences of the haplotype in the population by the natural log // of the read depth of the haplotype in this individual, storing the result. // weighted_hap = (double) cloc->hap_cnts[d->obshap[i]] * log((double) d->depth[i]); haplotypes.push_back(make_pair(string(d->obshap[i]), weighted_hap)); } // // Sort according to haplotype frequency. // sort(haplotypes.begin(), haplotypes.end(), compare_pair_haplotype); if (haplotypes.size() == 0) { cerr << "Error processing catalog locus " << cloc->id << "\n"; return -1; } // // Prune out excess haplotypes. // for (uint i = 2; i < haplotypes.size(); i++) { // // Make sure that those haplotypes we want to discard occur at a frequency lower // than the second most frequent haplotype, instead of being tied for second. // if (haplotypes[i].second >= haplotypes[1].second || (max_haplotype_cnt > 0 && haplotypes[i].second > max_haplotype_cnt)) continue; remove_haplotype(cloc, loc, haplotypes[i].first, pruned_hap_cnt, log_fh, "rare_step_1"); haplotypes.erase(haplotypes.begin() + i); } // // If there are more than two haplotypes remaining and the second, third, etc // haplotype exist only in this individual, prune them out. // if (haplotypes.size() > 2) { int stop_pos = haplotypes.size() - 1; int start_pos = stop_pos; double score = haplotypes[stop_pos].second; while (start_pos > 1) { if (cloc->hap_cnts[haplotypes[start_pos].first] == 1 && haplotypes[start_pos].second == score) start_pos--; else break; } if (start_pos < stop_pos) { for (int i = start_pos; i <= stop_pos; i++) remove_haplotype(cloc, loc, haplotypes[i].first, pruned_hap_cnt, log_fh, "rare_step_1"); } } // // Update the matched haplotypes in the Datum object, so the haplotype pruner can // operate on newly generated, spurious haplotypes. // generate_matched_haplotypes(cloc, loc, d); return 0; } string convert_catalog_haplotype_to_sample(string cat_haplotype, CSLocus *cloc, Locus *loc) { int cat_snp = 0; int cat_idx = -1; int loc_snp = 0; int loc_idx = -1; int k = -1; int j = -1; string hap; do { j++; loc_idx++; // // Advance to a het in the sample locus. // while (j < (int) loc->snps.size() && loc->snps[j]->type != snp_type_het) j++; if (j >= (int) loc->snps.size()) break; loc_snp = loc->snps[j]->col; do { k++; cat_idx++; // // Advance to the het in the catalog locus that corresponds to the sample locus. // while (k < (int) cloc->snps.size() && cloc->snps[k]->type != snp_type_het) k++; if (k >= (int) cloc->snps.size()) break; cat_snp = cloc->snps[k]->col; } while (cat_snp < loc_snp); // // Extract out the nucleotide from the catalog haplotype that matches the sample // haplotype. For example, catalog haplotype may be 'ACGTG' while sample haplotype // is 'CT'. // if (j < (int) loc->snps.size() && k < (int) cloc->snps.size() && cat_snp == loc_snp) { hap += cat_haplotype.at(cat_idx); } else { cerr << "Error processing catalog locus " << cloc->id << "\n"; return ""; } } while (j < (int) loc->snps.size()); return hap; } int remove_haplotype(CSLocus *cloc, Locus *loc, string haplotype, unsigned long &pruned_hap_cnt, ofstream &log_fh, string alg_type) { map::iterator it; string hap = ""; hap = convert_catalog_haplotype_to_sample(haplotype, cloc, loc); if (verbose) { #pragma omp critical log_fh << cloc->id << "\t" << loc->sample_id << "\t" << loc->id << "\t" << hap << "\t" << haplotype << "\t" << "\t" << "\t" << alg_type << "\n"; } // // Remove the haplotype. // it = loc->alleles.find(hap); if (it != loc->alleles.end()) { loc->alleles.erase(it); pruned_hap_cnt++; } // // Decrement the count for this haplotype in the catalog locus. // if (cloc->hap_cnts.count(haplotype) > 0) cloc->hap_cnts[haplotype]--; return 0; } int prune_nucleotides(CSLocus *cloc, Locus *loc, Datum *d, ofstream &log_fh, unsigned long int &nuc_cnt, unsigned long int &unk_hom_cnt, unsigned long int &unk_het_cnt, unsigned long int &hom_unk_cnt, unsigned long int &het_unk_cnt, unsigned long int &hom_het_cnt, unsigned long int &het_hom_cnt) { map nucs; set cnucs; set::iterator it; set rows; nuc_cnt += loc->len; for (uint i = 0; i < loc->snps.size() && i < cloc->snps.size(); i++) { // // Either their is an unknown call in locus, or, there is a snp in the catalog and any state in the locus. // if ((loc->snps[i]->type == snp_type_unk) || (cloc->snps[i]->type == snp_type_het && loc->snps[i]->type == snp_type_hom)) { // cerr << " Looking at SNP call in tag " << loc->id << " at position " << i << "; col: " << loc->snps[i]->col << "\n" // << " Catalog column: " << cloc->snps[i]->col << " (" << i << "); Sample column: " << loc->snps[i]->col << " (" << i << ")\n" // << " Sample has model call type: " << (loc->snps[i]->type == snp_type_unk ? "Unknown" : "Homozygous") << "; nucleotides: '" // << loc->snps[i]->rank_1 << "' and '" << loc->snps[i]->rank_2 << "'\n" // << " Catalog has model call type: " << (cloc->snps[i]->type == snp_type_het ? "Heterozygous" : "Homozygous") << "; nucleotides: '" // << cloc->snps[i]->rank_1 << "' and '" << cloc->snps[i]->rank_2 << "'\n"; if (loc->snps[i]->rank_1 == 'N' || cloc->snps[i]->rank_1 == 'N') continue; cnucs.insert(cloc->snps[i]->rank_1); if (cloc->snps[i]->rank_2 != 0) cnucs.insert(cloc->snps[i]->rank_2); if (cloc->snps[i]->rank_3 != 0) cnucs.insert(cloc->snps[i]->rank_3); if (cloc->snps[i]->rank_4 != 0) cnucs.insert(cloc->snps[i]->rank_4); // cerr << " Catalog has nucleotides: "; // for (it = cnucs.begin(); it != cnucs.end(); it++) // cerr << *it << ", "; // cerr << "\n"; // // Tally the number of occurances of each nucleotide also present in the // catalog in order to fuel the snp calling model. // // Note reads that contain nucleotides not present in the catalog so they // can be excluded when calling haplotypes from the read. // nucs['A'] = 0; nucs['C'] = 0; nucs['G'] = 0; nucs['T'] = 0; nucs['N'] = 0; for (uint k = 0; k < loc->reads.size(); k++) { if (cnucs.count(loc->reads[k][i]) > 0) nucs[loc->reads[k][i]]++; else if (loc->reads[k][i] != 'N') rows.insert(k); } // // Test pruned data for homozygosity or heterozygosity. // invoke_model(loc, i, nucs); if (verbose) { #pragma omp critical log_model_calls(loc, log_fh, unk_hom_cnt, unk_het_cnt, hom_unk_cnt, het_unk_cnt, hom_het_cnt, het_hom_cnt); } else { log_model_calls(loc, log_fh, unk_hom_cnt, unk_het_cnt, hom_unk_cnt, het_unk_cnt, hom_het_cnt, het_hom_cnt); } } nucs.clear(); cnucs.clear(); } // // Re-call alleles. // loc->alleles.clear(); call_alleles(loc, rows); // // If SNPs were called at this locus but no alleles could be determined, // blacklist this tag. This can occur if a fixed nucleotide isn't captured in // the catalog and all the reads are removed for the purpose of reading haplotypes. // if (loc->alleles.size() <= 1) for (uint j = 0; j < loc->snps.size(); j++) if (loc->snps[j]->type == snp_type_het) { loc->blacklisted = 1; break; } // // Update the matched haplotypes in the Datum object, so the haplotype pruner can // operate on newly generated, spurious haplotypes. // generate_matched_haplotypes(cloc, loc, d); return 0; } int invoke_model(Locus *loc, int col, map &nucs) { // // Search this column for the presence of a SNP // switch(model_type) { case snp: call_multinomial_snp(loc, col, nucs); break; case bounded: call_bounded_multinomial_snp(loc, col, nucs); break; default: break; } return 0; } int call_alleles(Locus *loc, set &rows) { int row; int height = loc->reads.size(); string allele; char base; vector::iterator snp; for (row = 0; row < height; row++) { // // If a read had a nucleotide not present in the catalog, do not call // a haplotype from it. // if (rows.count(row) > 0) continue; allele.clear(); uint snp_cnt = 0; for (snp = loc->snps.begin(); snp != loc->snps.end(); snp++) { if ((*snp)->type != snp_type_het) continue; snp_cnt++; base = loc->reads[row][(*snp)->col]; // // Check to make sure the nucleotide at the location of this SNP is // of one of the two possible states the multinomial model called. // if (base == (*snp)->rank_1 || base == (*snp)->rank_2) allele += base; else break; } if (snp_cnt > 0 && allele.length() == snp_cnt) loc->alleles[allele]++; } return 0; } int generate_matched_haplotypes(CSLocus *cloc, Locus *loc, Datum *d) { // // Free the existing matched haplotypes. // for (uint i = 0; i < d->obshap.size(); i++) delete [] d->obshap[i]; d->obshap.clear(); d->depth.clear(); // // Construct a set of haplotypes from the locus relative to the catalog locus. // (The locus already has a set of haplotypes, however, they don't necessarily // account for all the SNPs in the catalog, so we will augment them with sequence // from the consensus.) // vector > merged_snps; map > columns; map >::iterator c; vector >::iterator k; for (uint i = 0; i < cloc->snps.size(); i++) { if (cloc->snps[i]->type != snp_type_het) continue; columns[cloc->snps[i]->col] = make_pair("catalog", cloc->snps[i]); } for (uint i = 0; i < loc->snps.size(); i++) { if (loc->snps[i]->type != snp_type_het) continue; // // Is this column already represented in the catalog? // if (columns.count(loc->snps[i]->col)) columns[loc->snps[i]->col] = make_pair("both", loc->snps[i]); else columns[loc->snps[i]->col] = make_pair("query", loc->snps[i]); } for (c = columns.begin(); c != columns.end(); c++) merged_snps.push_back((*c).second); // // Sort the SNPs by column // sort(merged_snps.begin(), merged_snps.end(), compare_pair_snp); map::iterator b; string old_allele, new_allele; int pos; for (b = loc->alleles.begin(); b != loc->alleles.end(); b++) { old_allele = b->first; new_allele = ""; pos = 0; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { // // If the SNPs from the catalog haplotype beyond the length of the query, add Ns // if (k->first == "catalog") { new_allele += (k->second->col > loc->len - 1) ? 'N' : loc->con[k->second->col]; } else { new_allele += old_allele[pos]; pos++; } } char *h = new char[new_allele.length() + 1]; strcpy(h, new_allele.c_str()); d->obshap.push_back(h); d->depth.push_back(b->second); // loc->alleles[new_allele] = b->second; // cerr << "Adding haplotype: " << new_allele << " [" << b->first << "]\n"; } return 0; } int log_model_calls(Locus *loc, ofstream &log_fh, unsigned long int &unk_hom_cnt, unsigned long int &unk_het_cnt, unsigned long int &hom_unk_cnt, unsigned long int &het_unk_cnt, unsigned long int &hom_het_cnt, unsigned long int &het_hom_cnt) { // // Log model call changes // for (uint j = 0; j < loc->snps.size(); j++) { switch(loc->model[j]) { case 'U': switch(loc->snps[j]->type) { case snp_type_het: if (verbose) log_fh << loc->sample_id << "\t" << loc->id << "\t" << loc->snps[j]->col << "\t" << 'U' << "\t" << 'E' << "\n"; unk_het_cnt++; break; case snp_type_hom: if (verbose) log_fh << loc->sample_id << "\t" << loc->id << "\t" << loc->snps[j]->col << "\t" << 'U' << "\t" << 'O' << "\n"; unk_hom_cnt++; break; case snp_type_unk: default: break; } break; case 'E': switch(loc->snps[j]->type) { case snp_type_het: break; case snp_type_hom: if (verbose) log_fh << loc->sample_id << "\t" << loc->id << "\t" << loc->snps[j]->col << "\t" << 'E' << "\t" << 'O' << "\n"; het_hom_cnt++; break; case snp_type_unk: default: if (verbose) log_fh << loc->sample_id << "\t" << loc->id << "\t" << loc->snps[j]->col << "\t" << 'E' << "\t" << 'U' << "\n"; het_unk_cnt++; break; } break; case 'O': default: switch(loc->snps[j]->type) { case snp_type_het: if (verbose) log_fh << loc->sample_id << "\t" << loc->id << "\t" << loc->snps[j]->col << "\t" << 'O' << "\t" << 'E' << "\n"; hom_het_cnt++; break; case snp_type_hom: break; case snp_type_unk: default: if (verbose) log_fh << loc->sample_id << "\t" << loc->id << "\t" << loc->snps[j]->col << "\t" << 'O' << "\t" << 'U' << "\n"; hom_unk_cnt++; break; } break; } } return 0; } int write_results(string file, map &m) { map::iterator i; vector::iterator j; vector::iterator k; map::iterator t; Locus *tag_1; stringstream sstr; bool gzip = (in_file_type == FileT::gzsql) ? true : false; // // Parse the input file name to create the output files // string tag_file = out_path + file + ".tags.tsv"; string snp_file = out_path + file + ".snps.tsv"; string all_file = out_path + file + ".alleles.tsv"; if (gzip) { tag_file += ".gz"; snp_file += ".gz"; all_file += ".gz"; } // // Open the output files for writing. // gzFile gz_tags, gz_snps, gz_alle; ofstream tags, snps, alle; if (gzip) { gz_tags = gzopen(tag_file.c_str(), "wb"); if (!gz_tags) { cerr << "Error: Unable to open gzipped catalog tag file '" << tag_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_tags, libz_buffer_size); #endif gz_snps = gzopen(snp_file.c_str(), "wb"); if (!gz_snps) { cerr << "Error: Unable to open gzipped catalog snps file '" << snp_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_snps, libz_buffer_size); #endif gz_alle = gzopen(all_file.c_str(), "wb"); if (!gz_alle) { cerr << "Error: Unable to open gzipped catalog alleles file '" << all_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_alle, libz_buffer_size); #endif } else { tags.open(tag_file.c_str()); if (tags.fail()) { cerr << "Error: Unable to open catalog tag file for writing.\n"; exit(1); } snps.open(snp_file.c_str()); if (snps.fail()) { cerr << "Error: Unable to open catalog SNPs file for writing.\n"; exit(1); } alle.open(all_file.c_str()); if (alle.fail()) { cerr << "Error: Unable to open catalog alleles file for writing.\n"; exit(1); } } int wrote = 0; for (i = m.begin(); i != m.end(); i++) { tag_1 = i->second; wrote++; // First write the consensus sequence sstr << "0" << "\t" << tag_1->sample_id << "\t" << tag_1->id << "\t" << tag_1->loc.chr << "\t" << tag_1->loc.bp << "\t" << (tag_1->loc.strand == plus ? "+" : "-") << "\t" << "consensus" << "\t" << "\t" << "\t" << tag_1->con << "\t" << (tag_1->deleveraged == true ? "1" : "0") << "\t" << (tag_1->blacklisted == true ? "1" : "0") << "\t" << (tag_1->lumberjackstack == true ? "1" : "0") << "\t" << tag_1->lnl << "\n"; // // Write a sequence recording the output of the SNP model for each nucleotide. // sstr << "0" << "\t" << tag_1->sample_id << "\t" << tag_1->id << "\t" << "\t" << "\t" << "\t" << "model" << "\t" << "\t" << "\t"; for (uint j = 0; j < tag_1->snps.size(); j++) { switch(tag_1->snps[j]->type) { case snp_type_het: sstr << "E"; break; case snp_type_hom: sstr << "O"; break; default: sstr << "U"; break; } } sstr << "\t" << "\t" << "\t" << "\t" << "\n"; if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); // // Now write out each read from this locus. // for (uint j = 0; j < tag_1->reads.size(); j++) { sstr << "0" << "\t" << tag_1->sample_id << "\t" << tag_1->id << "\t\t\t\t"; if (tag_1->comp_type[j] == primary) sstr << "primary" << "\t"; else sstr << "secondary" << "\t"; sstr << tag_1->comp_cnt[j] << "\t" << tag_1->comp[j] << "\t" << tag_1->reads[j] << "\t\t\t\t\n"; } if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); // // Write out the model calls for each nucleotide in this locus. // for (uint j = 0; j < tag_1->snps.size(); j++) { sstr << "0" << "\t" << tag_1->sample_id << "\t" << tag_1->id << "\t" << tag_1->snps[j]->col << "\t"; switch(tag_1->snps[j]->type) { case snp_type_het: sstr << "E\t"; break; case snp_type_hom: sstr << "O\t"; break; default: sstr << "U\t"; break; } sstr << std::fixed << std::setprecision(3) << tag_1->snps[j]->lratio << "\t" << tag_1->snps[j]->rank_1 << "\t" << (tag_1->snps[j]->rank_2 == 0 ? '-' : tag_1->snps[j]->rank_2) << "\t\t\n"; } if (gzip) gzputs(gz_snps, sstr.str().c_str()); else snps << sstr.str(); sstr.str(""); // // Write the expressed alleles seen for the recorded SNPs and // the percentage of tags a particular allele occupies. // char pct[id_len]; for (t = tag_1->alleles.begin(); t != tag_1->alleles.end(); t++) { sprintf(pct, "%.2f", ((t->second/double(tag_1->reads.size())) * 100)); sstr << "0" << "\t" << tag_1->sample_id << "\t" << tag_1->id << "\t" << t->first << "\t" << pct << "\t" << t->second << "\n"; } if (gzip) gzputs(gz_alle, sstr.str().c_str()); else alle << sstr.str(); sstr.str(""); } if (gzip) { gzclose(gz_tags); gzclose(gz_snps); gzclose(gz_alle); } else { tags.close(); snps.close(); alle.close(); } cerr << "wrote " << wrote << " loci.\n"; return 0; } int build_file_list(vector > &files) { vector parts; string f; // // Read all the files from the Stacks directory. // uint pos; string file; struct dirent *direntry; DIR *dir = opendir(in_path.c_str()); if (dir == NULL) { cerr << "Unable to open directory '" << in_path << "' for reading.\n"; exit(1); } while ((direntry = readdir(dir)) != NULL) { file = direntry->d_name; if (file == "." || file == "..") continue; if (file.substr(0, 6) == "batch_") continue; pos = file.rfind(".tags.tsv"); if (pos < file.length()) files.push_back(make_pair(1, file.substr(0, pos))); } closedir(dir); if (files.size() == 0) { cerr << "Unable to locate any input files to process within '" << in_path << "'\n"; return 0; } cerr << "Found " << files.size() << " input file(s).\n"; return 1; } int fill_catalog_snps(map &catalog) { map::iterator it; CSLocus *cloc; for (it = catalog.begin(); it != catalog.end(); it++) { cloc = it->second; queue snps; for (uint j = 0; j < cloc->snps.size(); j++) snps.push(cloc->snps[j]); cloc->snps.clear(); for (uint j = 0; j < cloc->len; j++) { if (snps.size() > 0 && snps.front()->col == j) { cloc->snps.push_back(snps.front()); snps.pop(); } else { SNP *snp = new SNP; snp->type = snp_type_hom; snp->col = j; snp->lratio = 0; snp->rank_1 = cloc->con[j]; snp->rank_2 = 0; cloc->snps.push_back(snp); } } } return 0; } int init_log(int argc, char **argv, ofstream &log_fh, ofstream &log_snp_fh, ofstream &log_hap_fh) { stringstream log; stringstream sstr; // // Open the log files. // log << out_path << "batch_" << batch_id << ".rxstacks.log"; log_fh.open(log.str().c_str(), ofstream::out); if (log_fh.fail()) { cerr << "Error opening log file '" << log.str() << "'\n"; exit(1); } if (verbose) { log.str(""); log << out_path << "batch_" << batch_id << ".rxstacks.snps.log"; log_snp_fh.open(log.str().c_str(), ofstream::out); if (log_snp_fh.fail()) { cerr << "Error opening log file '" << log.str() << "'\n"; exit(1); } log.str(""); log << out_path << "batch_" << batch_id << ".rxstacks.haplotypes.log"; log_hap_fh.open(log.str().c_str(), ofstream::out); if (log_hap_fh.fail()) { cerr << "Error opening log file '" << log.str() << "'\n"; exit(1); } } // // Obtain the current date. // time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%F %T", timeinfo); sstr << "#"; for (int i = 0; i < argc; i++) sstr << " " << argv[i]; sstr << "\n" << "# rxstacks executed " << date; log_fh << sstr.str() << "\n" << "# Sample\t" << "Total nucs\t" << "Total nucs converted\t" << "Unk to Hom\t" << "Unk to Het\t" << "Hom to Unk\t" << "Het to Unk\t" << "Hom to Het\t" << "Het to Hom\t" << "Confounded loci\t" << "Lnl Filtered loci\t" << "Pruned Haplotypes\t" << "MST-Pruned Haplotypes\n"; if (verbose) { log_snp_fh << sstr.str() << "\n" << "# Sample Id\t" << "Locus ID\t" << "SNP Col\t" << "Orig Value\t" << "Corr Value\n"; log_hap_fh << sstr.str() << "\n" << "# Catalog Locus\t" << "Sample\t" << "Sample Locus\t" << "Sample Haplotype\t" << "Catalog Haplotype\t" << "Corrected Sample Haplotype\t" << "Corrected Catalog Haplotype\t" << "Algorithm\n"; } return 0; } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"conf_filter", no_argument, NULL, 'F'}, {"prune_haplo", no_argument, NULL, 'H'}, {"lnl_filter", no_argument, NULL, 'G'}, {"lnl_dist", no_argument, NULL, 'D'}, {"verbose", no_argument, NULL, 'V'}, {"num_threads", required_argument, NULL, 't'}, {"batch_id", required_argument, NULL, 'b'}, {"in_path", required_argument, NULL, 'P'}, {"outpath", required_argument, NULL, 'o'}, {"model_type", required_argument, NULL, 'T'}, {"bound_low", required_argument, NULL, 'L'}, {"bound_high", required_argument, NULL, 'U'}, {"alpha", required_argument, NULL, 'A'}, {"conf_lim", required_argument, NULL, 'C'}, {"max_haplo", required_argument, NULL, 'M'}, {"lnl_lim", required_argument, NULL, 'I'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hvVFGDHo:t:b:P:T:L:U:A:C:I:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 't': num_threads = atoi(optarg); break; case 'P': in_path = optarg; break; case 'b': batch_id = is_integer(optarg); if (batch_id < 0) { cerr << "Batch ID (-b) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 'o': out_path = optarg; break; case 'T': if (strcmp(optarg, "snp") == 0) { model_type = snp; } else if (strcmp(optarg, "fixed") == 0) { model_type = fixed; } else if (strcmp(optarg, "bounded") == 0) { model_type = bounded; } else { cerr << "Unknown model type specified '" << optarg << "'\n"; help(); } case 'L': bound_low = atof(optarg); break; case 'U': bound_high = atof(optarg); break; case 'A': alpha = atof(optarg); break; case 'F': filter_confounded = true; break; case 'C': confounded_limit = is_double(optarg); filter_confounded = true; break; case 'H': prune_haplotypes = true; break; case 'M': max_haplotype_cnt = is_integer(optarg); break; case 'G': filter_lnl = true; break; case 'I': lnl_limit = is_double(optarg); filter_lnl = true; break; case 'D': lnl_dist = true; break; case 'V': verbose = true; break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: help(); abort(); } } if (in_path.length() == 0) { cerr << "You must specify a path to the directory containing Stacks output files.\n"; help(); } if (out_path.length() == 0) { cerr << "No output path specified, files in '" << in_path << "' will be overwritten.\n"; out_path = in_path; } if (in_path.at(in_path.length() - 1) != '/') in_path += "/"; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (batch_id == 0) { cerr << "You must specify a batch ID.\n"; help(); } if (alpha != 0.1 && alpha != 0.05 && alpha != 0.01 && alpha != 0.001) { cerr << "SNP model alpha significance level must be either 0.1, 0.05, 0.01, or 0.001.\n"; help(); } if (bound_low != 0 && (bound_low < 0 || bound_low >= 1.0)) { cerr << "SNP model lower bound must be between 0.0 and 1.0.\n"; help(); } if (bound_high != 1 && (bound_high <= 0 || bound_high > 1.0)) { cerr << "SNP model upper bound must be between 0.0 and 1.0.\n"; help(); } if (bound_low > 0 || bound_high < 1.0) { model_type = bounded; } if (filter_confounded == true && (confounded_limit < 0 || confounded_limit > 1.0)) { cerr << "Confounded locus limit is a percentage and must be between 0.0 and 1.0.\n"; help(); } return 0; } void version() { std::cerr << "rxstacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "rxstacks " << VERSION << "\n" << "rxstacks -b batch_id -P path [-o path] [-t threads] [-v] [-h]" << "\n" << " b: Batch ID to examine when exporting from the catalog.\n" << " P: path to the Stacks output files.\n" << " o: output path to write results.\n" << " t: number of threads to run in parallel sections of code.\n" << " v: print program version." << "\n" << " h: display this help messsage." << "\n\n" << " Filtering options:\n" << " --lnl_filter: filter catalog loci based on the mean log likelihood of the catalog locus in the population.\n" << " --lnl_lim : minimum log likelihood required to keep a catalog locus.\n" << " --lnl_dist: print distribution of mean log likelihoods for catalog loci.\n" << " --conf_filter: filter confounded loci.\n" << " --conf_lim : between 0.0 and 1.0 (default 0.75), proportion of loci in population that must be confounded relative to the catalog locus.\n" << " --prune_haplo: prune out non-biological haplotypes unlikely to occur in the population.\n" << " --max_haplo : only consider haplotypes for pruning if they occur in fewer than max_haplo_cnt samples.\n" << " Model options:\n" << " --model_type : either 'snp' (default), 'bounded', or 'fixed'\n" << " For the SNP or Bounded SNP model:\n" << " --alpha : chi square significance level required to call a heterozygote or homozygote, either 0.1 (default), 0.05, 0.01, or 0.001.\n" << " For the Bounded SNP model:\n" << " --bound_low : lower bound for epsilon, the error rate, between 0 and 1.0 (default 0).\n" << " --bound_high : upper bound for epsilon, the error rate, between 0 and 1.0 (default 1).\n" << " Logging Options:\n" << " --verbose: extended logging, including coordinates of all changed nucleotides (forces single-threaded execution).\n"; exit(0); } stacks-1.35/src/rxstacks.h000644 000765 000024 00000006176 12441417455 016325 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __RXSTACKS_H__ #define __RXSTACKS_H__ #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include // Open/Read contents of a directory #include #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include // std::setprecision #include using std::stringstream; #include using std::vector; #include using std::queue; #include using std::map; #include using std::set; #include "constants.h" #include "stacks.h" #include "locus.h" #include "PopMap.h" #include "PopSum.h" #include "utils.h" #include "sql_utilities.h" #include "models.h" #include "mst.h" void help( void ); void version( void ); int parse_command_line(int, char**); int build_file_list(vector > &); int init_log(int, char **, ofstream &, ofstream &, ofstream &); int sum_haplotype_counts(map &, PopMap *); int prune_mst_haplotypes(CSLocus *, Datum *, Locus *, unsigned long int &, ofstream &); int prune_locus_haplotypes(CSLocus *, Datum *, Locus *, unsigned long int &, ofstream &); string convert_catalog_haplotype_to_sample(string, CSLocus *, Locus *); int remove_haplotype(CSLocus *, Locus *, string, unsigned long &, ofstream &, string); int dist(string, string); int measure_error(CSLocus *, Locus *, Datum *, ofstream &); int calc_lnl_means(map &, PopMap *); int prune_nucleotides(CSLocus *, Locus *, Datum *, ofstream &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &); int invoke_model(Locus *, int, map &); int call_alleles(Locus *, set &); int generate_matched_haplotypes(CSLocus *, Locus *, Datum *); int fill_catalog_snps(map &); int log_model_calls(Locus *, ofstream &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &, unsigned long int &); int write_results(string, map &); #endif // __RXSTACKS_H__ stacks-1.35/src/SamI.h000644 000765 000024 00000014345 12335173442 015306 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __SAMI_H__ #define __SAMI_H__ // // Code to parse Sam format. This format is created for // reads that have been aligned to a reference genome. It takes the tab-separated form: // // ... ... // // One record per line. // #include "input.h" class Sam: public Input { int parse_cigar(const char *, vector > &, bool); int find_start_bp_neg(int, vector > &); int find_start_bp_pos(int, vector > &); int edit_gaps(vector > &, char *); public: Sam(const char *path) : Input(path) {}; ~Sam() {}; Seq *next_seq(); int next_seq(Seq &) { return 0; }; }; Seq * Sam::next_seq() { vector parts; int flag; uint len; // // Read a record from the file and place it in a Seq object, skipping header // definitions and unaligned sequences. // do { this->fh.getline(this->line, max_len); if (!this->fh.good()) return NULL; len = strlen(this->line); if (this->line[len - 1] == '\r') this->line[len - 1] = '\0'; parse_tsv(this->line, parts); // // According to SAM spec FLAGs are the second field, // if FLAG bit 0x4 is set, sequence is not mapped. // flag = atoi(parts[1].c_str()); flag = flag & 4; flag = flag >> 2; } while (parts[0][0] == '@' || flag == 1); // // Check which strand this is aligned to: // SAM reference: FLAG bit 0x10 - sequence is reverse complemented // flag = atoi(parts[1].c_str()); flag = flag & 16; flag = flag >> 4; // // If the read was aligned on the reverse strand (and is therefore reverse complemented) // alter the start point of the alignment to reflect the right-side of the read, at the // end of the RAD cut site. // // To accomplish this, we must parse the alignment CIGAR string // vector > cigar; this->parse_cigar(parts[5].c_str(), cigar, flag); int bp = flag ? this->find_start_bp_neg(atoi(parts[3].c_str()), cigar) : this->find_start_bp_pos(atoi(parts[3].c_str()), cigar); // // Sam format has a 1-based offset for chrmosome/basepair positions, adjust it to match // the Stacks, 0-based offset. // bp--; Seq *s = new Seq(parts[0].c_str(), parts[9].c_str(), parts[10].c_str(), // Read ID, Sequence, Quality parts[2].c_str(), bp, flag ? minus : plus); // Chr, BasePair, Strand if (cigar.size() > 0) this->edit_gaps(cigar, s->seq); return s; } int Sam::parse_cigar(const char *cigar_str, vector > &cigar, bool orientation) { char buf[id_len]; int dist; const char *p, *q; p = cigar_str; if (*p == '*') return 0; while (*p != '\0') { q = p + 1; while (*q != '\0' && isdigit(*q)) q++; strncpy(buf, p, q - p); buf[q-p] = '\0'; dist = atoi(buf); // // If aligned to the negative strand, sequence has been reverse complemented and // CIGAR string should be interpreted in reverse. // if (orientation == plus) cigar.push_back(make_pair(*q, dist)); else cigar.insert(cigar.begin(), make_pair(*q, dist)); p = q + 1; } return 0; } int Sam::find_start_bp_neg(int aln_bp, vector > &cigar) { uint size = cigar.size(); char op; uint dist; for (uint i = 0; i < size; i++) { op = cigar[i].first; dist = cigar[i].second; switch(op) { case 'I': break; case 'S': if (i < size - 1) aln_bp += dist; break; case 'M': case 'D': aln_bp += dist; break; } } return aln_bp - 1; } int Sam::find_start_bp_pos(int aln_bp, vector > &cigar) { char op; uint dist; op = cigar[0].first; dist = cigar[0].second; if (op == 'S') aln_bp -= dist; return aln_bp; } int Sam::edit_gaps(vector > &cigar, char *seq) { char buf[id_len]; uint size = cigar.size(); char op; uint dist, bp, len, buf_len, j, k, stop; len = strlen(seq); bp = 0; for (uint i = 0; i < size; i++) { op = cigar[i].first; dist = cigar[i].second; switch(op) { case 'S': stop = bp + dist; stop = stop > len ? len : stop; while (bp < stop) { seq[bp] = 'N'; bp++; } break; case 'D': // // A deletion has occured in the read relative to the reference genome. // Pad the read with sufficent Ns to match the deletion, shifting the existing // sequence down. Trim the final length to keep the read length consistent. // strncpy(buf, seq + bp, id_len - 1); buf[id_len - 1] = '\0'; buf_len = strlen(buf); stop = bp + dist; stop = stop > len ? len : stop; while (bp < stop) { seq[bp] = 'N'; bp++; } j = bp; k = 0; while (j < len && k < buf_len) { seq[j] = buf[k]; k++; j++; } break; case 'I': // // An insertion has occurred in the read relative to the reference genome. Delete the // inserted bases and pad the end of the read with Ns. // k = bp + dist; strncpy(buf, seq + k, id_len - 1); buf[id_len - 1] = '\0'; buf_len = strlen(buf); j = bp; k = 0; while (j < len && k < buf_len) { seq[j] = buf[k]; k++; j++; } stop = j + dist; stop = stop > len ? len : stop; while (j < stop) { seq[j] = 'N'; j++; } break; case 'M': bp += dist; break; } } return 0; } #endif // __SAMI_H__ stacks-1.35/src/smoothing.h000644 000765 000024 00000010331 12335173442 016453 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __SMOOTHING_H__ #define __SMOOTHING_H__ #include #include "smoothing_utils.h" template class KSmooth { uint size; // Number of elements expected in the StatT class to smooth. double *weights; // Weight matrix to apply while smoothing. public: KSmooth(int size) { this->size = size; this->weights = calc_weights(); } ~KSmooth() { delete [] this->weights; } int smooth(vector &popstats); }; template int KSmooth::smooth(vector &popstats) { // // To generate smooth genome-wide distributions of Fst, we calculate a kernel-smoothing // moving average of Fst values along each ordered chromosome. // // For each genomic region centered on a nucleotide position c, the contribution of the population // genetic statistic at position p to the region average was weighted by the Gaussian function: // exp( (-1 * (p - c)^2) / (2 * sigma^2)) // // In addition, we weight each position according to (n_k - 1), where n_k is the number of alleles // sampled at that location. // // By default, sigma = 150Kb, for computational efficiency, only calculate average out to 3sigma. // #pragma omp parallel { int dist; uint pos_l, pos_u; double sum, final_weight; PopStat *c, *p; pos_l = 0; pos_u = 0; #pragma omp for schedule(dynamic, 1) for (uint pos_c = 0; pos_c < popstats.size(); pos_c++) { c = popstats[pos_c]; if (c == NULL) continue; for (uint i = 0; i < this->size; i++) c->smoothed[i] = 0.0; sum = 0.0; determine_window_limits(popstats, c->bp, pos_l, pos_u); for (uint pos_p = pos_l; pos_p < pos_u; pos_p++) { p = popstats[pos_p]; if (p == NULL) continue; dist = p->bp > c->bp ? p->bp - c->bp : c->bp - p->bp; if (dist > limit || dist < 0) { #pragma omp critical { cerr << "ERROR: current basepair is out of the sliding window.\n" << " Calculating sliding window; start position: " << pos_l << ", " << (popstats[pos_l] == NULL ? -1 : popstats[pos_l]->bp) << "bp; end position: " << pos_u << ", " << (popstats[pos_u] == NULL ? -1 : popstats[pos_u]->bp) << "bp; center: " << pos_c << ", " << popstats[pos_c]->bp << "bp\n" << " Current position: " << pos_p << ", " << popstats[pos_p]->bp << "; Dist: " << dist << "\n" << " Window positions:\n"; for (uint j = pos_l; j < pos_u; j++) { p = popstats[j]; if (p == NULL) continue; cerr << " Position: " << j << "; " << p->bp << "bp\n"; } //exit(0); } continue; } // sites_cnt++; final_weight = (p->alleles - 1) * this->weights[dist]; for (uint i = 0; i < this->size; i++) c->smoothed[i] += p->stat[i] * final_weight; sum += final_weight; // if (c->loc_id == 9314) { // cerr << " id: " << p->loc_id // << "; dist: " << dist // << "; weight: " << weights[dist] // << "; final_weight: " << final_weight // << "; fst': " << p->stat[3] // << "; sum: " << sum // << "; smoothed: " << c->smoothed[3] << "\n"; // } } // sites_per_snp += (sites_cnt / snp_cnt); // tot_windows++; // // if (snp_cnt < max_snp_dist) { // #pragma omp atomic // snp_dist[snp_cnt]++; // } // c->snp_cnt = snp_cnt; for (uint i = 0; i < this->size; i++) c->smoothed[i] /= sum; } } return 0; } #endif // __SMOOTHING_H__ stacks-1.35/src/smoothing_utils.h000644 000765 000024 00000004030 12335173442 017672 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __SMOOTHING_UTILS_H__ #define __SMOOTHING_UTILS_H__ #include #include using std::vector; extern double sigma; int limit = 3 * sigma; double * calc_weights() { // // Calculate weights for window smoothing operations. // // For each genomic region centered on a nucleotide position c, the contribution of the population // genetic statistic at position p to the region average was weighted by the Gaussian function: // exp( (-1 * (p - c)^2) / (2 * sigma^2)) // double *weights = new double[limit + 1]; for (int i = 0; i <= limit; i++) weights[i] = exp((-1 * pow(i, 2)) / (2 * pow(sigma, 2))); return weights; } template inline int determine_window_limits(vector &sites, uint center_bp, uint &pos_l, uint &pos_u) { int limit_l = center_bp - limit > 0 ? center_bp - limit : 0; int limit_u = center_bp + limit; while (pos_l < sites.size()) { if (sites[pos_l] == NULL) { pos_l++; } else { if (sites[pos_l]->bp < limit_l) pos_l++; else break; } } while (pos_u < sites.size()) { if (sites[pos_u] == NULL) { pos_u++; } else { if (sites[pos_u]->bp < limit_u) pos_u++; else break; } } return 0; } #endif // __SMOOTHING_UTILS_H__ stacks-1.35/src/sql_utilities.h000644 000765 000024 00000037646 12533677757 017402 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2014, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // sql_utilities.h -- template routines to read and write Stacks SQL file formats. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #ifndef __SQL_UTILITIES_H__ #define __SQL_UTILITIES_H__ #include "input.h" #include "utils.h" // // The expected number of tab-separated fields in our SQL input files. // const uint num_tags_fields = 14; const uint num_snps_fields = 10; const uint num_alleles_fields = 6; const uint num_matches_fields = 8; template int load_loci(string sample, map &loci, bool store_reads, bool load_all_model_calls, bool &compressed) { LocusT *c; SNP *snp; string f; char *cmp; const char *p, *q; int len; vector parts; set blacklisted; long int line_num; ifstream fh; gzFile gz_fh; char *line = (char *) malloc(sizeof(char) * max_len); int size = max_len; bool gzip = false; int fh_status = 1; // // First, parse the tag file and pull in the consensus sequence // for each locus. // f = sample + ".tags.tsv"; fh.open(f.c_str(), ifstream::in); if (fh.fail()) { // // Test for a gzipped file. // f = sample + ".tags.tsv.gz"; gz_fh = gzopen(f.c_str(), "rb"); if (!gz_fh) { cerr << " Unable to open '" << sample << "'\n"; return 0; } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_fh, libz_buffer_size); #endif gzip = true; compressed = true; } cerr << " Parsing " << f.c_str() << "\n"; uint id; line_num = 1; while (fh_status) { fh_status = (gzip == true) ? read_gzip_line(gz_fh, &line, &size) : read_line(fh, &line, &size); if (!fh_status && strlen(line) == 0) continue; if (is_comment(line)) continue; parse_tsv(line, parts); if (parts.size() != num_tags_fields) { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } id = atoi(parts[2].c_str()); if (parts[6] != "consensus") { if (blacklisted.count(id)) continue; // // Make sure this locus has already been defined (consensus sequence SHOULD always // be specified first in the file for a particular locus). // if (loci.count(id) > 0) { // // Read the model sequence, a series of letters specifying if the model called a // homozygous base (O), a heterozygous base (E), or if the base type was unknown (U). // if (parts[6] == "model") { loci[id]->model = new char[parts[9].length() + 1]; strcpy(loci[id]->model, parts[9].c_str()); } else { // // Otherwise, we expect a primary or secondary read, record these if specified. // loci[id]->depth++; if (store_reads) { char *read = new char[parts[9].length() + 1]; strcpy(read, parts[9].c_str()); loci[id]->reads.push_back(read); char *read_id = new char[parts[8].length() + 1]; strcpy(read_id, parts[8].c_str()); loci[id]->comp.push_back(read_id); // // Store the internal stack number for this read. // loci[id]->comp_cnt.push_back(atoi(parts[7].c_str())); // // Store the read type. // if (parts[6] == "primary") loci[id]->comp_type.push_back(primary); else loci[id]->comp_type.push_back(secondary); } } continue; } else { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (stack " << id << " does not exist).\n"; return 0; } } // // Do not include blacklisted tags in the catalog. They are tags that are composed // of noise and/or repetitive sequence. // if (parts[11] == "1") { blacklisted.insert(id); continue; } c = new LocusT; c->sample_id = atoi(parts[1].c_str()); c->id = id; c->add_consensus(parts[9].c_str()); // // Read in the flags // c->deleveraged = (parts[10] == "1" ? true : false); c->lumberjackstack = (parts[12] == "1" ? true : false); // // Read in the log likelihood of the locus. // c->lnl = is_double(parts[13].c_str()); // // Parse the physical genome location of this locus. // c->loc.set(parts[3].c_str(), atoi(parts[4].c_str()), (parts[5] == "+" ? plus : minus)); // // Parse the components of this stack (either the Illumina ID, or the catalog constituents) // q = parts[8].c_str(); while (*q != '\0') { for (p = q; *q != ',' && *q != '\0'; q++); len = q - p; cmp = new char[len + 1]; strncpy(cmp, p, len); cmp[len] = '\0'; c->comp.push_back(cmp); if (*q != '\0') q++; } loci[c->id] = c; line_num++; } if (gzip) gzclose(gz_fh); else fh.close(); // // Next, parse the SNP file and load model calls. // gzip = false; fh_status = 1; line_num = 1; f = sample + ".snps.tsv"; fh.open(f.c_str(), ifstream::in); if (fh.fail()) { // // Test for a gzipped file. // f = sample + ".snps.tsv.gz"; gz_fh = gzopen(f.c_str(), "rb"); if (!gz_fh) { cerr << " Unable to open '" << sample << "'\n"; return 0; } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_fh, libz_buffer_size); #endif gzip = true; compressed = true; } cerr << " Parsing " << f.c_str() << "\n"; while (fh_status) { fh_status = (gzip == true) ? read_gzip_line(gz_fh, &line, &size) : read_line(fh, &line, &size); if (!fh_status && strlen(line) == 0) continue; if (is_comment(line)) continue; parse_tsv(line, parts); if (parts.size() != num_snps_fields && parts.size() != num_snps_fields - 2) { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } id = atoi(parts[2].c_str()); if (blacklisted.count(id)) continue; // // Only load heterozygous model calls. // if (load_all_model_calls == false && parts[4] != "E") continue; snp = new SNP; snp->col = atoi(parts[3].c_str()); snp->lratio = atof(parts[5].c_str()); snp->rank_1 = parts[6].at(0); snp->rank_2 = parts[7].at(0) == '-' ? 0 : parts[7].at(0); if (parts[4] == "E") snp->type = snp_type_het; else if (parts[4] == "O") snp->type = snp_type_hom; else snp->type = snp_type_unk; if (parts.size() == 10) { if (parts[8].length() == 0 || parts[8].at(0) == '-') snp->rank_3 = 0; else snp->rank_3 = parts[8].at(0); if (parts[9].length() == 0 || parts[9].at(0) == '-') snp->rank_4 = 0; else snp->rank_4 = parts[9].at(0); } if (loci.count(id) > 0) { loci[id]->snps.push_back(snp); } else { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". SNP asks for nonexistent locus with ID: " << id << "\n"; return 0; } line_num++; } if (gzip) gzclose(gz_fh); else fh.close(); // // Finally, parse the Alleles file // gzip = false; fh_status = 1; line_num = 1; f = sample + ".alleles.tsv"; fh.open(f.c_str(), ifstream::in); if (fh.fail()) { // // Test for a gzipped file. // f = sample + ".alleles.tsv.gz"; gz_fh = gzopen(f.c_str(), "rb"); if (!gz_fh) { cerr << " Unable to open '" << sample << "'\n"; return 0; } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_fh, libz_buffer_size); #endif gzip = true; compressed = true; } cerr << " Parsing " << f.c_str() << "\n"; while (fh_status) { fh_status = (gzip == true) ? read_gzip_line(gz_fh, &line, &size) : read_line(fh, &line, &size); if (!fh_status && strlen(line) == 0) continue; if (is_comment(line)) continue; parse_tsv(line, parts); if (parts.size() != num_alleles_fields) { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } id = atoi(parts[2].c_str()); if (blacklisted.count(id)) continue; if (loci.count(id) > 0) { loci[id]->alleles[parts[3]] = atoi(parts[5].c_str()); } else { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". SNP asks for nonexistent locus with ID: " << id << "\n"; return 0; } line_num++; } if (gzip) gzclose(gz_fh); else fh.close(); // // Populate the strings member with the sequence for each allele for each Locus. // typename map::iterator i; for (i = loci.begin(); i != loci.end(); i++) i->second->populate_alleles(); delete [] line; return 1; } template int dump_loci(map &u) { typename map::iterator i; vector::iterator s; for (i = u.begin(); i != u.end(); i++) { cerr << "Locus ID: " << i->second->id << "\n" << " Consensus: " << i->second->con << "\n" << " Genomic Location: " << i->second->loc.chr << "; " << i->second->loc.bp << "bp\n" << " SNPs:\n"; for (s = i->second->snps.begin(); s != i->second->snps.end(); s++) cerr << " Col: " << (*s)->col << " rank 1: " << (*s)->rank_1 << " rank 2: " << (*s)->rank_2 << "\n"; cerr << "\n"; } return 0; } int load_catalog_matches(string sample, vector &matches) { CatMatch *m; string f; vector parts; long int line_num; ifstream fh; gzFile gz_fh; char *line = (char *) malloc(sizeof(char) * max_len); int size = max_len; bool gzip = false; int fh_status = 1; f = sample + ".matches.tsv"; fh.open(f.c_str(), ifstream::in); if (fh.fail()) { // // Test for a gzipped file. // f = sample + ".matches.tsv.gz"; gz_fh = gzopen(f.c_str(), "rb"); if (!gz_fh) { cerr << " Unable to open '" << sample << "'\n"; return 0; } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_fh, libz_buffer_size); #endif gzip = true; } cerr << " Parsing " << f.c_str() << "\n"; line_num = 1; while (fh_status) { fh_status = (gzip == true) ? read_gzip_line(gz_fh, &line, &size) : read_line(fh, &line, &size); line_num++; if (!fh_status && strlen(line) == 0) continue; if (is_comment(line)) continue; parse_tsv(line, parts); if (parts.size() != num_matches_fields) { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } m = new CatMatch; m->batch_id = atoi(parts[1].c_str()); m->cat_id = atoi(parts[2].c_str()); m->sample_id = atoi(parts[3].c_str()); m->tag_id = atoi(parts[4].c_str()); m->haplotype = new char[parts[5].length() + 1]; strcpy(m->haplotype, parts[5].c_str()); m->depth = atoi(parts[6].c_str()); m->lnl = is_double(parts[7].c_str()); matches.push_back(m); } if (gzip) gzclose(gz_fh); else fh.close(); return 0; } int load_model_results(string sample, map &modres) { string f; vector parts; long int line_num; ifstream fh; gzFile gz_fh; char *line = (char *) malloc(sizeof(char) * max_len); int size = max_len; bool gzip = false; int fh_status = 1; // // First, parse the tag file and pull in the consensus sequence // for each Radtag. // gzip = false; fh_status = 1; line_num = 1; f = sample + ".tags.tsv"; fh.open(f.c_str(), ifstream::in); if (fh.fail()) { // // Test for a gzipped file. // f = sample + ".tags.tsv.gz"; gz_fh = gzopen(f.c_str(), "rb"); if (!gz_fh) { cerr << " Unable to open '" << sample << "'\n"; return 0; } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_fh, libz_buffer_size); #endif gzip = true; } cerr << " Parsing " << f.c_str() << "\n"; ModRes *mod; uint tag_id, samp_id; while (fh_status) { fh_status = (gzip == true) ? read_gzip_line(gz_fh, &line, &size) : read_line(fh, &line, &size); line_num++; if (!fh_status && strlen(line) == 0) continue; if (is_comment(line)) continue; parse_tsv(line, parts); if (parts.size() != num_tags_fields) { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } // // Read the model sequence, a series of letters specifying if the model called a // homozygous base (O), a heterozygous base (E), or if the base type was unknown (U). // if (parts[6] != "model") continue; samp_id = atoi(parts[1].c_str()); tag_id = atoi(parts[2].c_str()); mod = new ModRes(samp_id, tag_id, parts[9].c_str()); modres[tag_id] = mod; } if (gzip) gzclose(gz_fh); else fh.close(); delete [] line; return 1; } int load_snp_calls(string sample, map &snpres) { string f; int id, samp_id; vector parts; long int line_num; SNP *snp; SNPRes *snpr; ifstream fh; gzFile gz_fh; char *line = (char *) malloc(sizeof(char) * max_len); int size = max_len; bool gzip = false; int fh_status = 1; // // Parse the SNP file // f = sample + ".snps.tsv"; fh.open(f.c_str(), ifstream::in); if (fh.fail()) { // // Test for a gzipped file. // f = sample + ".snps.tsv.gz"; gz_fh = gzopen(f.c_str(), "rb"); if (!gz_fh) { cerr << " Unable to open '" << sample << "'\n"; return 0; } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_fh, libz_buffer_size); #endif gzip = true; } cerr << " Parsing " << f.c_str() << "\n"; line_num = 1; while (fh_status) { fh_status = (gzip == true) ? read_gzip_line(gz_fh, &line, &size) : read_line(fh, &line, &size); if (!fh_status && strlen(line) == 0) continue; if (is_comment(line)) continue; parse_tsv(line, parts); if (parts.size() != num_snps_fields && parts.size() != num_snps_fields - 2) { cerr << "Error parsing " << f.c_str() << " at line: " << line_num << ". (" << parts.size() << " fields).\n"; return 0; } samp_id = atoi(parts[1].c_str()); id = atoi(parts[2].c_str()); snp = new SNP; snp->col = atoi(parts[3].c_str()); if (parts[4] == "O") snp->type = snp_type_hom; else if (parts[4] == "E") snp->type = snp_type_het; else snp->type = snp_type_unk; snp->lratio = atof(parts[5].c_str()); snp->rank_1 = parts[6].at(0); snp->rank_2 = parts[7].at(0) == '-' ? 0 : parts[7].at(0); if (parts.size() == 10) { if (parts[8].length() == 0 || parts[8].at(0) == '-') snp->rank_3 = 0; else snp->rank_3 = parts[8].at(0); if (parts[9].length() == 0 || parts[9].at(0) == '-') snp->rank_4 = 0; else snp->rank_4 = parts[9].at(0); } if (snpres.count(id) == 0) { snpr = new SNPRes(samp_id, id); snpres[id] = snpr; } snpres[id]->snps.push_back(snp); line_num++; } if (gzip) gzclose(gz_fh); else fh.close(); delete [] line; return 1; } #endif // __SQL_UTILITIES_H__ stacks-1.35/src/sstacks.cc000644 000765 000024 00000065416 12533677757 016315 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // sstacks -- search for occurances of stacks in a catalog of stacks. // #include "sstacks.h" // Global variables to hold command-line options. queue samples; string catalog_path; string out_path; FileT in_file_type = FileT::sql; int num_threads = 1; int batch_id = 0; int samp_id = 0; int catalog = 0; bool verify_haplotypes = true; bool impute_haplotypes = true; bool require_uniq_haplotypes = false; searcht search_type = sequence; int main (int argc, char* argv[]) { parse_command_line(argc, argv); uint sample_cnt = samples.size(); // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif map catalog; bool compressed = false; int res; if (search_type == sequence) cerr << "Searching for matches by sequence identity...\n"; else if (search_type == genomic_loc) cerr << "Searching for matches by genomic location...\n"; catalog_path += ".catalog"; res = load_loci(catalog_path, catalog, false, false, compressed); if (res == 0) { cerr << "Unable to parse catalog, '" << catalog_path << "'\n"; return 0; } string sample_path; int i = 1; while (!samples.empty()) { map sample; sample_path = samples.front(); samples.pop(); cerr << "Processing sample '" << sample_path << "' [" << i << " of " << sample_cnt << "]\n"; res = load_loci(sample_path, sample, false, false, compressed); if (res == 0) { cerr << "Unable to parse '" << sample_path << "'\n"; return 0; } in_file_type = compressed == true ? FileT::gzsql : FileT::sql; // // Assign the ID for this sample data. // samp_id = sample.begin()->second->sample_id; //dump_loci(catalog); //dump_loci(sample); if (search_type == sequence) { cerr << "Searching for sequence matches...\n"; find_matches_by_sequence(catalog, sample); } else if (search_type == genomic_loc) { cerr << "Searching for matches by genomic location...\n"; find_matches_by_genomic_loc(catalog, sample); } write_matches(sample_path, sample); i++; } return 0; } int find_matches_by_genomic_loc(map &sample_1, map &sample_2) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // map::iterator i; map::iterator j; int k; char id[id_len]; // // Build a hash map out of the first sample (usually the catalog) // // // Create a map of the genomic locations of stacks in sample_1 // cerr << " Creating map of genomic locations..."; map > locations; for (j = sample_1.begin(); j != sample_1.end(); j++) { snprintf(id, id_len - 1, "%s|%d|%c", j->second->loc.chr, j->second->loc.bp, j->second->loc.strand == plus ? '+' : '-'); locations[id].insert(j->second->id); } cerr << "done.\n"; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (i = sample_2.begin(); i != sample_2.end(); i++) keys.push_back(i->first); // // Initialize some counters // unsigned long matches = 0; unsigned long nomatch = 0; unsigned long nosnps = 0; unsigned long tot_hap = 0; unsigned long ver_hap = 0; #pragma omp parallel private(i, j, k, id) { unsigned long verified; #pragma omp for reduction(+:matches) reduction(+:tot_hap) reduction(+:ver_hap) reduction(+:nomatch) reduction(+:nosnps) for (k = 0; k < (int) keys.size(); k++) { i = sample_2.find(keys[k]); snprintf(id, id_len - 1, "%s|%d|%c", i->second->loc.chr, i->second->loc.bp, i->second->loc.strand == plus ? '+' : '-'); if (locations.count(id) > 0) { Locus *tag; set::iterator loc_it; vector >::iterator q; matches++; for (loc_it = locations[id].begin(); loc_it != locations[id].end(); loc_it++) { tag = sample_1[*loc_it]; // // Generate haplotypes for query tag relative to the catalog tag. // set query_haplotypes; generate_query_haplotypes(tag, i->second, query_haplotypes); tot_hap += query_haplotypes.size() > 0 ? query_haplotypes.size() : 1; if (verify_haplotypes) { verified = verify_genomic_loc_match(tag, i->second, query_haplotypes, nosnps); ver_hap += verified; if (verified == 0) nomatch++; } else { i->second->add_match(tag->id, tag->strings.begin()->first); } } } } } cerr << keys.size() << " stacks matched against the catalog containing " << sample_1.size() << " loci.\n" << " " << matches << " matching loci, " << nomatch << " contained no verified haplotypes.\n" << " " << nosnps << " loci contained SNPs unaccounted for in the catalog and were excluded.\n" << " " << tot_hap << " total haplotypes examined from matching loci, " << ver_hap << " verified.\n"; return 0; } int verify_genomic_loc_match(Locus *s1_tag, QLocus *s2_tag, set &query_haplotypes, unsigned long &nosnps) { vector::iterator i, j; // // We have found a match between the genomic location of s1 and s2. We now want // to verify that the haplotypes are consistent between the tags, i.e. they // have the same number and types of SNPs. // // // 1. First, if there are no SNPs present in either the query or catalog, just // check that the strings match. // uint min_len = s1_tag->len > s2_tag->len ? s2_tag->len : s1_tag->len; if (s1_tag->snps.size() == 0 && s2_tag->snps.size() == 0 && strncmp(s1_tag->con, s2_tag->con, min_len) == 0) { s2_tag->add_match(s1_tag->id, "consensus"); return 1; } // // 2. Second, we will check that the query locus (s2_tag) does not have any SNPs // lacking in the catalog tag (s1_tag). // bool found; for (j = s2_tag->snps.begin(); j != s2_tag->snps.end(); j++) { found = false; // // SNP occurs in a column that is beyond the length of the catalog // if ((*j)->col > min_len - 1) continue; for (i = s1_tag->snps.begin(); i != s1_tag->snps.end(); i++) { if ((*i)->col == (*j)->col) found = true; } // // Query locus posses a SNP not present in the catalog. // if (found == false) { nosnps++; return 0; } } // // Finally, check that one of the constructed alleles matches the allele // passed in on the stack. // string cat_haplotype; vector >::iterator c; set::iterator a; uint matches = 0; for (a = query_haplotypes.begin(); a != query_haplotypes.end(); a++) { if (impute_haplotypes) { int res = impute_haplotype(*a, s1_tag->strings, cat_haplotype); if (res > 0) { // // If the matching haplotype was imputed, record the depths of the query alleles // under the new, imputed alleles. // if (s2_tag->alleles.count(cat_haplotype) == 0) { if (s2_tag->alleles.count(*a) > 0) s2_tag->alleles[cat_haplotype] = s2_tag->alleles[*a]; else s2_tag->alleles[cat_haplotype] = s2_tag->depth; } //cerr << s2_tag->id << "; Adding cat haplotype: " << cat_haplotype << " based on depth of " << *a << ", " << s2_tag->alleles[cat_haplotype] << "\n"; s2_tag->add_match(s1_tag->id, cat_haplotype); matches++; } else if (res < 0) { cerr << " Failure imputing haplotype for catalog locus: " << s1_tag->id << " and query tag: " << s2_tag->id << "\n"; } } else { for (c = s1_tag->strings.begin(); c != s1_tag->strings.end(); c++) if (*a == c->first) { //cerr << " Adding match between " << s1_tag->id << " and " << c->first << "\n"; s2_tag->add_match(s1_tag->id, c->first); matches++; } } } return matches; } // int impute_haplotype(string query_haplotype, // vector > &cat_haplotypes, // string &match) { // uint max_len = query_haplotype.length() > cat_haplotypes[0].first.length() ? // query_haplotype.length() : // cat_haplotypes[0].first.length(); // //cerr << "Query len: " << query_haplotype.length() << "; Max length: " << max_len << "\n"; // vector cur, next; // for (uint i = 0; i < cat_haplotypes.size(); i++) // cur.push_back(cat_haplotypes[i].first); // match = ""; // // // // Examine the haplotypes one SNP at a time. If we are able to uniquely // // determine the catalog haplotype that the query haplotype corresponds // // to, return it. // // // uint j = 0; // while (cur.size() > 1 && j < max_len) { // for (uint i = 0; i < cur.size(); i++) { // //cerr << "Comparing query[" << j << "]: '" << query_haplotype[j] << "' to catalog '" << cur[i][j] << "'\n"; // if (query_haplotype[j] == cur[i][j]) { // //cerr << " Keeping this haplotype.\n"; // next.push_back(cur[i]); // } // } // cur = next; // next.clear(); // j++; // } // // // // If there is only one left, make sure what we have of the haplotype does match // // and its not simply an erroneously called haplotype. // // // if (cur.size() == 1 && // strncmp(cur[0].c_str(), query_haplotype.c_str(), max_len) == 0) { // match = cur[0]; // return 1; // } // // // // If, after examining all the available SNPs in the query haplotype, there is // // still more than a single possible catalog haplotype, then we can't impute it. // // // return 0; // } int impute_haplotype(string query_haplotype, vector > &cat_haplotypes, string &match) { if (cat_haplotypes.size() == 0) { cerr << "Warning: malformed catalog tag: missing haplotype information.\n"; return -1; } //cerr << "Examining " << query_haplotype << "\n"; uint max_len = query_haplotype.length() > cat_haplotypes[0].first.length() ? query_haplotype.length() : cat_haplotypes[0].first.length(); //cerr << "Query len: " << query_haplotype.length() << "; Max length: " << max_len << "\n"; vector cur, next; uint match_cnt, no_n_cnt; for (uint i = 0; i < cat_haplotypes.size(); i++) cur.push_back(cat_haplotypes[i].first); match = ""; // // Examine the haplotypes one SNP at a time. If we are able to uniquely // determine the catalog haplotype that the query haplotype corresponds // to, return it. // uint j = 0; while (cur.size() > 1 && j < max_len) { for (uint i = 0; i < cur.size(); i++) { //cerr << "Comparing query[" << j << "]: '" << query_haplotype << "' to catalog '" << cur[i] << "'\n"; if (require_uniq_haplotypes && (query_haplotype[j] == cur[i][j] || query_haplotype[j] == 'N')) { //cerr << " Keeping this haplotype.\n"; next.push_back(cur[i]); } else if (query_haplotype[j] == cur[i][j]) { //cerr << " Keeping this haplotype.\n"; next.push_back(cur[i]); } //else { //cerr << " Discarding this haplotype.\n"; //} } cur = next; next.clear(); j++; } // // If there is only one left, make sure what we have of the haplotype does match // and its not simply an erroneously called haplotype. // no_n_cnt = 0; match_cnt = 0; if (cur.size() == 1) { if (require_uniq_haplotypes) { for (uint k = 0; k < max_len; k++) if (query_haplotype[k] != 'N') no_n_cnt++; for (uint k = 0; k < max_len; k++) if (cur[0][k] == query_haplotype[k]) match_cnt++; if (match_cnt == no_n_cnt) { //cerr << "Keeping " << query_haplotype << "\n"; match = cur[0]; return 1; } } else { if (strncmp(cur[0].c_str(), query_haplotype.c_str(), max_len) == 0) { match = cur[0]; return 1; } } } // // If, after examining all the available SNPs in the query haplotype, there is // still more than a single possible catalog haplotype, then we can't impute it. // return 0; } int generate_query_haplotypes(Locus *s1_tag, QLocus *s2_tag, set &query_haplotypes) { // // Construct a set of haplotypes from the query locus relative to the catalog locus. // (The query locus already has a set of haplotypes, however, they don't necessarily // account for all the SNPs in the catalog, so we will augment them with sequence // from the consensus.) // if (s1_tag->snps.size() == 0 && s2_tag->snps.size() == 0) return 0; vector > merged_snps; map > columns; map >::iterator c; vector >::iterator k; vector::iterator i; for (i = s1_tag->snps.begin(); i != s1_tag->snps.end(); i++) columns[(*i)->col] = make_pair("catalog", *i); for (i = s2_tag->snps.begin(); i != s2_tag->snps.end(); i++) { // // Is this column already represented in the catalog? // if (columns.count((*i)->col)) columns[(*i)->col] = make_pair("both", *i); else columns[(*i)->col] = make_pair("query", *i); } for (c = columns.begin(); c != columns.end(); c++) merged_snps.push_back((*c).second); // // Sort the SNPs by column // sort(merged_snps.begin(), merged_snps.end(), compare_pair_snp); map converted_alleles; map::iterator b; string old_allele, new_allele; int pos; for (b = s2_tag->alleles.begin(); b != s2_tag->alleles.end(); b++) { old_allele = b->first; new_allele = ""; pos = 0; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { // // If the SNPs from the catalog haplotype beyond the length of the query, add Ns // if (k->first == "catalog") { new_allele += (k->second->col > s2_tag->len - 1) ? 'N' : s2_tag->con[k->second->col]; } else { new_allele += old_allele[pos]; pos++; } } query_haplotypes.insert(new_allele); converted_alleles[new_allele] = b->second; // cerr << "Adding haplotype: " << new_allele << " [" << b->first << "]\n"; } if (s2_tag->alleles.size() == 0) { new_allele = ""; for (k = merged_snps.begin(); k != merged_snps.end(); k++) { new_allele += (k->second->col > s2_tag->len - 1) ? 'N' : s2_tag->con[k->second->col]; } query_haplotypes.insert(new_allele); // cerr << "Adding haplotype 2: " << new_allele << "\n"; } else { s2_tag->alleles.clear(); for (b = converted_alleles.begin(); b != converted_alleles.end(); b++) s2_tag->alleles[b->first] = b->second; } return 0; } int find_matches_by_sequence(map &sample_1, map &sample_2) { map::iterator i; uint min_tag_len; // // We don't assume all radtags will be the same length. // min_tag_len = sample_1.begin()->second->len > sample_2.begin()->second->len ? sample_2.begin()->second->len : sample_1.begin()->second->len; // // Build a hash map out of the first sample (usually the catalog), // using only the minimum length substring of the longest reads; // HashMap sample_1_map; populate_hash(sample_1, sample_1_map, min_tag_len); // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (i = sample_2.begin(); i != sample_2.end(); i++) keys.push_back(i->first); // // Initialize some counters // unsigned long matches = 0; unsigned long mmatch = 0; unsigned long nosnps = 0; unsigned long nomatch = 0; unsigned long tot_hap = 0; unsigned long ver_hap = 0; #pragma omp parallel { #pragma omp for reduction(+:matches) reduction(+:tot_hap) reduction(+:ver_hap) reduction(+:nomatch) reduction(+:mmatch) for (uint k = 0; k < keys.size(); k++) { QLocus *query = sample_2[keys[k]]; // // Iterate through the haplotypes for this tag in sample_2 // HashMap::iterator hit; vector >::iterator q; // Query records allele_type/search string pairs vector >::iterator c; // Hash map records id/allele_type pairs map > haplo_hits; set loci_hit; for (q = query->strings.begin(); q != query->strings.end(); q++) { // cerr << " Looking for haplotype: " << q->first << " with sequence " << q->second.substr(0, min_tag_len) << "\n"; hit = sample_1_map.find(q->second.substr(0, min_tag_len).c_str()); if (hit != sample_1_map.end()) { tot_hap++; // cerr << " Found a match for " << hit->first << "\n"; for (c = hit->second.begin(); c != hit->second.end(); c++) { // // Record the catalog loci hit by the haplotypes of this query locus. // loci_hit.insert(c->first); // // Record the haplotypes hit between the query and catalog loci. // haplo_hits[q->first].push_back(c->second); if (verify_haplotypes == false) query->add_match(c->first, c->second); } } } if (loci_hit.size() > 0) matches++; if (verify_haplotypes && loci_hit.size() > 0) { uint verified = verify_sequence_match(sample_1, query, loci_hit, haplo_hits, min_tag_len, mmatch, nosnps); ver_hap += verified; if (verified == 0) nomatch++; } } } cerr << keys.size() << " stacks compared against the catalog containing " << sample_1.size() << " loci.\n" << " " << matches << " matching loci, " << nomatch << " contained no verified haplotypes.\n" << " " << mmatch << " loci matched more than one catalog locus and were excluded.\n" << " " << nosnps << " loci contained SNPs unaccounted for in the catalog and were excluded.\n" << " " << tot_hap << " total haplotypes examined from matching loci, " << ver_hap << " verified.\n"; return 0; } int verify_sequence_match(map &sample_1, QLocus *query, set &loci_hit, map > &haplo_hits, uint min_tag_len, unsigned long &mmatch, unsigned long &nosnps) { // // 1. Check that this query locus matches just a single catalog locus. // if (loci_hit.size() > 1) { mmatch++; return 0; } Locus *cat = sample_1[*(loci_hit.begin())]; // // 2. Make sure the query has no SNPs unaccounted for in the catalog. // vector::iterator i, j; bool found; for (i = query->snps.begin(); i != query->snps.end(); i++) { found = false; // // SNP occurs in a column that is beyond the length of the catalog // if ((*i)->col > min_tag_len - 1) continue; for (j = cat->snps.begin(); j != cat->snps.end(); j++) { if ((*i)->col == (*j)->col) found = true; } // // Query locus posses a SNP not present in the catalog. // if (found == false) { nosnps++; return 0; } } // // 3. We want a one-to-one correspondance between a query haplotype and a // catalog haplotype. This relationship fails when the catalog and query seqeunces // are different lengths and the full length haplotype can not be determined. // map >::iterator it; map cat_hap, query_hap; for (it = haplo_hits.begin(); it != haplo_hits.end(); it++) { query_hap[it->first] = it->second.size(); for (uint j = 0; j < it->second.size(); j++) cat_hap[it->second[j]]++; } uint verified = 0; for (it = haplo_hits.begin(); it != haplo_hits.end(); it++) for (uint j = 0; j < it->second.size(); j++) { if (cat_hap[it->second[j]] == 1 && query_hap[it->first] == 1) { verified++; query->add_match(cat->id, it->second[j]); // // If the matching haplotype was imputed, record the depths of the query alleles // under the new, imputed alleles. // if (query->alleles.count(it->second[j]) == 0) { if (query->alleles.count(it->first) > 0) query->alleles[it->second[j]] = query->alleles[it->first]; else query->alleles[it->second[j]] = query->depth; } } } return verified; } int populate_hash(map &sample, HashMap &hash_map, int min_tag_len) { map::iterator it; vector >::iterator all_it; Locus *tag; char *key; // // Create a hash map out of the set of alleles for each Locus. // for (it = sample.begin(); it != sample.end(); it++) { tag = it->second; for (all_it = tag->strings.begin(); all_it != tag->strings.end(); all_it++) { key = new char[min_tag_len + 1]; strncpy(key, all_it->second.c_str(), min_tag_len); key[min_tag_len] = '\0'; hash_map[key].push_back(make_pair(tag->id, all_it->first)); } } //dump_kmer_map(kmer_map); return 0; } int write_matches(string sample_path, map &sample) { map::iterator i; // // Parse the input file names to create the output file // size_t pos_1 = sample_path.find_last_of("/"); string out_file = out_path + sample_path.substr(pos_1 + 1) + ".matches.tsv"; if (in_file_type == FileT::gzsql) out_file += ".gz"; // // Open the output files for writing. // gzFile gz_matches; ofstream matches; if (in_file_type == FileT::gzsql) { gz_matches = gzopen(out_file.c_str(), "wb"); if (!gz_matches) { cerr << "Error: Unable to open gzipped matches file '" << out_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_matches, libz_buffer_size); #endif } else { matches.open(out_file.c_str()); if (matches.fail()) { cerr << "Error: Unable to open matches file for writing.\n"; exit(1); } } // // Record the version of Stacks used and the date generated as a comment in the catalog. // // Obtain the current date. // stringstream log; time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%F %T", timeinfo); log << "# sstacks version " << VERSION << "; generated on " << date << "\n"; if (in_file_type == FileT::gzsql) gzputs(gz_matches, log.str().c_str()); else matches << log.str(); QLocus *qloc; string type; uint match_depth; stringstream sstr; cerr << "Outputing to file " << out_file << "\n"; for (i = sample.begin(); i != sample.end(); i++) { qloc = i->second; for (uint j = 0; j < qloc->matches.size(); j++) { if (verify_haplotypes == false && search_type == genomic_loc) match_depth = qloc->depth; else match_depth = qloc->alleles.count(qloc->matches[j]->cat_type) > 0 ? qloc->alleles[qloc->matches[j]->cat_type] : qloc->depth; sstr << "0" << "\t" << batch_id << "\t" << qloc->matches[j]->cat_id << "\t" << samp_id << "\t" << qloc->id << "\t" << qloc->matches[j]->cat_type << "\t" << match_depth << "\t" << qloc->lnl << "\n"; } if (in_file_type == FileT::gzsql) gzputs(gz_matches, sstr.str().c_str()); else matches << sstr.str(); sstr.str(""); } if (in_file_type == FileT::gzsql) gzclose(gz_matches); else matches.close(); return 0; } int parse_command_line(int argc, char* argv[]) { string sample_file; int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"genomic_loc", no_argument, NULL, 'g'}, {"verify_hap", no_argument, NULL, 'x'}, {"uniq_haplotypes", no_argument, NULL, 'u'}, {"num_threads", required_argument, NULL, 'p'}, {"batch_id", required_argument, NULL, 'b'}, {"catalog", required_argument, NULL, 'c'}, {"sample_2", required_argument, NULL, 's'}, {"outpath", required_argument, NULL, 'o'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hgxuvs:c:o:b:p:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 'p': num_threads = atoi(optarg); break; case 'b': batch_id = is_integer(optarg); if (batch_id < 0) { cerr << "Batch ID (-b) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 's': sample_file = optarg; samples.push(sample_file); break; case 'g': search_type = genomic_loc; break; case 'o': out_path = optarg; break; case 'c': catalog_path = optarg; break; case 'x': verify_haplotypes = false; break; case 'u': require_uniq_haplotypes = true; break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: help(); abort(); } } if (catalog_path.length() == 0) { cerr << "You must specify the prefix path to the catalog.\n"; help(); } if (samples.size() == 0) { cerr << "You must specify at least one sample file.\n"; help(); } if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; return 0; } void version() { std::cerr << "sstacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "sstacks " << VERSION << "\n" << "sstacks -b batch_id -c catalog_file -s sample_file [-s sample_file_2 ...] [-o path] [-p num_threads] [-g] [-x] [-v] [-h]" << "\n" << " p: enable parallel execution with num_threads threads.\n" << " b: MySQL ID of this batch." << "\n" << " c: TSV file from which to load the catalog loci." << "\n" << " s: filename prefix from which to load sample loci." << "\n" << " o: output path to write results." << "\n" << " g: base matching on genomic location, not sequence identity." << "\n" << " x: don't verify haplotype of matching locus." << "\n" << " v: print program version." << "\n" << " h: display this help messsage." << "\n\n"; exit(0); } stacks-1.35/src/sstacks.h000644 000765 000024 00000005172 12533677757 016150 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __SSTACKS_H__ #define __SSTACKS_H__ #include "constants.h" #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include using std::ifstream; using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include using std::vector; #include using std::map; #include using std::set; #include using std::queue; #include using std::unordered_map; #ifdef HAVE_SPARSEHASH #include using google::sparse_hash_map; #endif #include "kmers.h" #include "stacks.h" #include "locus.h" #include "sql_utilities.h" #include "utils.h" #ifdef HAVE_SPARSEHASH typedef sparse_hash_map >, hash_charptr, eqstr> HashMap; #else typedef unordered_map >, hash_charptr, eqstr> HashMap; #endif void help( void ); void version( void ); int parse_command_line(int, char**); int populate_hash(map &, HashMap &, int); int find_matches_by_sequence(map &, map &); int find_matches_by_genomic_loc(map &, map &); int verify_sequence_match(map &, QLocus *, set &, map > &, uint, unsigned long &, unsigned long &); int verify_genomic_loc_match(Locus *, QLocus *, set &, unsigned long &); int generate_query_haplotypes(Locus *, QLocus *, set &); int impute_haplotype(string, vector > &, string &); bool compare_dist(pair, pair); int write_matches(string, map &); #endif // __SSTACKS_H__ stacks-1.35/src/stacks.cc000644 000765 000024 00000004622 12335173442 016100 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // stacks.cc -- routines for the stack-holding containers // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // // $Id$ // #include "stacks.h" Rem::Rem() { this->id = 0; this->seq = NULL; this->utilized = false; } Rem::Rem(int id, uint seq_id, DNASeq *seq) { this->id = id; this->utilized = false; this->map.push_back(seq_id); this->seq = new DNASeq(seq->size, seq->s); } int Rem::add_id(uint id) { this->map.push_back(id); return 0; } int Rem::add_seq(const DNASeq *seq) { if (this->seq != NULL) delete this->seq; this->seq = new DNASeq(seq->size, seq->s); return 0; } int Rem::add_seq(const char *seq) { if (this->seq != NULL) delete this->seq; this->seq = new DNASeq(strlen(seq), seq); return 0; } int PStack::add_id(const char *id) { char *f = new char[strlen(id) + 1]; strcpy(f, id); this->map.push_back(f); return 0; } int PStack::add_seq(const char *seq) { if (this->seq != NULL) delete this->seq; this->len = strlen(seq); this->seq = new DNANSeq(this->len, seq); return 0; } int PStack::add_seq(DNANSeq *seq) { if (this->seq != NULL) delete this->seq; this->seq = new DNANSeq(seq->size(), seq->s); return 0; } int Stack::add_id(uint id) { this->map.push_back(id); return 0; } int Stack::add_seq(const char *seq) { if (this->seq != NULL) delete this->seq; this->seq = new DNASeq(strlen(seq), seq); return 0; } int Stack::add_seq(const DNASeq *seq) { if (this->seq != NULL) delete this->seq; this->seq = new DNASeq(seq->size, seq->s); return 0; } stacks-1.35/src/stacks.h000644 000765 000024 00000011547 12533677757 015770 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __STACKS_H__ #define __STACKS_H__ #include #include using std::string; #include using std::vector; #include using std::map; #include using std::set; #include #include using std::pair; using std::make_pair; #include using std::ofstream; using std::cerr; #include using std::stringstream; #include "constants.h" #include "DNASeq.h" #include "DNANSeq.h" typedef unsigned int uint; typedef string allele_type; enum snp_type {snp_type_het, snp_type_hom, snp_type_unk}; enum read_type {primary, secondary}; enum strand_type {plus, minus}; enum searcht {sequence, genomic_loc}; class PhyLoc { public: char *chr; uint bp; strand_type strand; void set(const char *chr, uint bp, strand_type strand) { if (this->chr != NULL) delete [] this->chr; this->chr = new char[strlen(chr) + 1]; this->bp = bp; this->strand = strand; strcpy(this->chr, chr); } PhyLoc() { chr = NULL; bp = 0; strand = plus; } PhyLoc(const char *chr, uint bp) { this->chr = new char[strlen(chr) + 1]; this->bp = bp; this->strand = plus; strcpy(this->chr, chr); } PhyLoc(const char *chr, uint bp, strand_type strnd) { this->chr = new char[strlen(chr) + 1]; this->bp = bp; this->strand = strnd; strcpy(this->chr, chr); } ~PhyLoc() { delete [] chr; } }; class SNP { public: snp_type type; // Heterozygous or homozygous uint col; float lratio; char rank_1; char rank_2; char rank_3; char rank_4; SNP() { col = 0; lratio = 0.0; rank_1 = 0; rank_2 = 0; rank_3 = 0; rank_4 = 0; } }; class PStack { public: uint id; uint count; // Number of identical reads forming this stack DNANSeq *seq; // Sequence read uint len; // Read length vector map; // List of sequence read IDs merged into this stack PhyLoc loc; // Physical genome location of this stack. PStack() { id = 0; count = 0; seq = NULL; len = 0; } ~PStack() { delete this->seq; for (unsigned int i = 0; i < this->map.size(); i++) delete [] this->map[i]; } int add_id(const char *); int add_seq(const char *); int add_seq(DNANSeq *); }; class Stack { public: uint id; DNASeq *seq; // Sequence read vector map; // List of sequence read IDs merged into this stack Stack() { id = 0; seq = NULL; } ~Stack() { delete this->seq; } uint count() { return this->map.size(); } int add_id(uint); int add_seq(const char *); int add_seq(const DNASeq *); }; class Rem { public: uint id; vector map; // List of sequence read IDs merged into this stack DNASeq *seq; // Sequence read bool utilized; Rem(); Rem(int, uint, DNASeq *); ~Rem() { delete this->seq; } uint count() { return this->map.size(); } int add_id(uint); int add_seq(const char *); int add_seq(const DNASeq *); }; class CatMatch { public: int batch_id; int cat_id; int sample_id; int tag_id; int depth; double lnl; char *haplotype; CatMatch() { batch_id = 0; cat_id = 0; sample_id = 0; tag_id = 0; depth = 0; lnl = 0.0; haplotype = NULL; } ~CatMatch() { delete [] haplotype; } }; class ModRes { public: int sample_id; int tag_id; char *model; ModRes(int samp_id, int tag_id, const char *model) { this->sample_id = samp_id; this->tag_id = tag_id; this->model = new char[strlen(model) + 1]; strcpy(this->model, model); } ~ModRes() { delete [] this->model; } }; class SNPRes { public: int sample_id; int tag_id; vector snps; SNPRes(int samp_id, int tag_id) { this->sample_id = samp_id; this->tag_id = tag_id; } ~SNPRes() { for (uint i = 0; i < this->snps.size(); i++) delete this->snps[i]; this->snps.clear(); } }; #endif // __STACKS_H__ stacks-1.35/src/Tsv.h000644 000765 000024 00000003272 12335173442 015226 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // Code to parse the internal (and tempoary) data format. This format is created for // reads that have been aligned to a reference genome. It takes the tab-separated form: // // // // One record per line. // #ifndef __TSV_H__ #define __TSV_H__ #include "input.h" class Tsv: public Input { public: Tsv(const char *path) : Input(path) {}; ~Tsv() {}; Seq *next_seq(); int next_seq(Seq &) { return 0; } }; Seq *Tsv::next_seq() { vector parts; // // Read a record from the file and place it in a Seq object // this->fh.getline(this->line, max_len); if (!this->fh.good()) { return NULL; } parse_tsv(this->line, parts); string id = parts[0] + "_" + parts[1]; Seq *s = new Seq(id.c_str(), parts[2].c_str(), parts[3].c_str(), parts[0].c_str(), atoi(parts[1].c_str()), plus); return s; } #endif // __TSV_H__ stacks-1.35/src/ustacks.cc000644 000765 000024 00000176157 12533677757 016324 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // ustacks -- build denovo stacks // #include "ustacks.h" // // Global variables to hold command-line options. // FileT in_file_type; string in_file; string out_path; int num_threads = 1; int sql_id = 0; bool call_sec_hapl = true; bool set_kmer_len = true; int kmer_len = 0; int max_kmer_len = 19; int min_merge_cov = 3; uint max_subgraph = 3; int dump_graph = 0; int retain_rem_reads = false; int deleverage_stacks = 0; int remove_rep_stacks = 0; int max_utag_dist = 2; int max_rem_dist = -1; double cov_mean = 0.0; double cov_stdev = 0.0; double cov_scale = 1; int deleverage_trigger; int removal_trigger; // // For use with the multinomial model to call fixed nucleotides. // modelt model_type = snp; double alpha = 0.05; double bound_low = 0.0; double bound_high = 1.0; double p_freq = 0.5; double barcode_err_freq = 0.0; double heterozygote_limit = -3.84; double homozygote_limit = 3.84; int main (int argc, char* argv[]) { parse_command_line(argc, argv); // // Set the max remainder distance to be greater than the max_utag_dist, if it is not // specified on the command line. // if (max_rem_dist == -1) max_rem_dist = max_utag_dist + 2; cerr << "Min depth of coverage to create a stack: " << min_merge_cov << "\n" << "Max distance allowed between stacks: " << max_utag_dist << "\n" << "Max distance allowed to align secondary reads: " << max_rem_dist << "\n" << "Max number of stacks allowed per de novo locus: " << max_subgraph << "\n" << "Deleveraging algorithm: " << (deleverage_stacks ? "enabled" : "disabled") << "\n" << "Removal algorithm: " << (remove_rep_stacks ? "enabled" : "disabled") << "\n" << "Model type: "; switch (model_type) { case snp: cerr << "SNP\n"; break; case fixed: cerr << "Fixed\n"; break; case bounded: cerr << "Bounded; lower epsilon bound: " << bound_low << "; upper bound: " << bound_high << "\n"; break; } cerr << "Alpha significance level for model: " << alpha << "\n"; // // Set limits to call het or homozygote according to chi-square distribution with one // degree of freedom: // http://en.wikipedia.org/wiki/Chi-squared_distribution#Table_of_.CF.872_value_vs_p-value // if (alpha == 0.1) { heterozygote_limit = -2.71; homozygote_limit = 2.71; } else if (alpha == 0.05) { heterozygote_limit = -3.84; homozygote_limit = 3.84; } else if (alpha == 0.01) { heterozygote_limit = -6.64; homozygote_limit = 6.64; } else if (alpha == 0.001) { heterozygote_limit = -10.83; homozygote_limit = 10.83; } // // Set the number of OpenMP parallel threads to execute. // #ifdef _OPENMP omp_set_num_threads(num_threads); #endif DNASeqHashMap radtags; vector radtags_keys; map remainders; set merge_map; map unique; load_radtags(in_file, radtags, radtags_keys); reduce_radtags(radtags, unique, remainders); free_radtags_hash(radtags, radtags_keys); // dump_unique_tags(unique); if (cov_mean == 0 || cov_stdev == 0) calc_coverage_distribution(unique, cov_mean, cov_stdev); cerr << "Coverage mean: " << cov_mean << "; stdev: " << cov_stdev << "\n"; calc_triggers(cov_mean, cov_stdev, deleverage_trigger, removal_trigger); cerr << "Deleveraging trigger: " << deleverage_trigger << "; Removal trigger: " << removal_trigger << "\n"; map merged; populate_merged_tags(unique, merged); if (remove_rep_stacks) { cerr << "Calculating distance for removing repetitive stacks.\n"; calc_kmer_distance(merged, 1); cerr << "Removing repetitive stacks.\n"; remove_repetitive_stacks(unique, merged); } cerr << "Calculating distance between stacks...\n"; calc_kmer_distance(merged, max_utag_dist); cerr << "Merging stacks, maximum allowed distance: " << max_utag_dist << " nucleotide(s)\n"; merge_stacks(unique, remainders, merged, merge_map, max_utag_dist); call_consensus(merged, unique, remainders, false); calc_merged_coverage_distribution(unique, merged); //dump_merged_tags(merged); cerr << "Merging remainder radtags\n"; merge_remainders(merged, remainders); // Call the consensus sequence again, now that remainder tags have been merged. call_consensus(merged, unique, remainders, true); count_raw_reads(unique, remainders, merged); cerr << "Writing loci, SNPs, and alleles to '" << out_path << "'...\n"; write_results(merged, unique, remainders); cerr << "done.\n"; return 0; } int merge_remainders(map &merged, map &rem) { map::iterator it; int j, k; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; uint tot = 0; for (it = rem.begin(); it != rem.end(); it++) { keys.push_back(it->first); tot += it->second->count(); } cerr << " " << tot << " remainder sequences left to merge.\n"; if (max_rem_dist <= 0) { cerr << " Matched 0 remainder reads; unable to match " << tot << " remainder reads.\n"; return 0; } // // Calculate the number of k-mers we will generate. If kmer_len == 0, // determine the optimal length for k-mers. // int con_len = strlen(merged.begin()->second->con); if (set_kmer_len) kmer_len = determine_kmer_length(con_len, max_rem_dist); int num_kmers = con_len - kmer_len + 1; cerr << " Distance allowed between stacks: " << max_rem_dist << "\n" << " Using a k-mer length of " << kmer_len << "\n" << " Number of kmers per sequence: " << num_kmers << "\n"; // // Calculate the minimum number of matching k-mers required for a possible sequence match. // int min_hits = calc_min_kmer_matches(kmer_len, max_rem_dist, con_len, set_kmer_len ? true : false); KmerHashMap kmer_map; vector kmer_map_keys; populate_kmer_hash(merged, kmer_map, kmer_map_keys, kmer_len); int utilized = 0; // // Create a character buffer to hold the Rem sequence, this is faster // than repeatedly decoding the DNASeq buffers. // //it = rem.find(keys[0]); //char *buf = new char[it->second->seq->size + 1]; #pragma omp parallel private(it, k) { #pragma omp for schedule(dynamic) for (j = 0; j < (int) keys.size(); j++) { it = rem.find(keys[j]); Rem *r = it->second; char *buf = new char[r->seq->size + 1]; // // Generate the k-mers for this remainder sequence // vector rem_kmers; buf = r->seq->seq(buf); generate_kmers(buf, kmer_len, num_kmers, rem_kmers); map hits; vector::iterator map_it; // // Lookup the occurances of each remainder k-mer in the MergedStack k-mer map // for (k = 0; k < num_kmers; k++) { if (kmer_map.find(rem_kmers[k]) != kmer_map.end()) for (map_it = kmer_map[rem_kmers[k]].begin(); map_it != kmer_map[rem_kmers[k]].end(); map_it++) hits[*map_it]++; } // // Iterate through the list of hits. For each hit that has more than min_hits // check its full length to verify a match. // map dists; map::iterator hit_it; for (hit_it = hits.begin(); hit_it != hits.end(); hit_it++) { if (hit_it->second < min_hits) continue; int d = dist(merged[hit_it->first], buf); // // Store the distance between these two sequences if it is // below the maximum distance // if (d <= max_rem_dist) { dists[hit_it->first] = d; } } // // Free the k-mers we generated for this remainder read // for (k = 0; k < num_kmers; k++) delete [] rem_kmers[k]; // Check to see if there is a uniquely low distance, if so, // merge this remainder tag. If not, discard it, since we // can't locate a single best-fitting Stack to merge it into. map::iterator s; int min_id = -1; int count = 0; int dist = max_rem_dist + 1; for (s = dists.begin(); s != dists.end(); s++) { if ((*s).second < dist) { min_id = (*s).first; count = 1; dist = (*s).second; } else if ((*s).second == dist) { count++; } } delete [] buf; // Found a merge partner. if (min_id >= 0 && count == 1) { r->utilized = true; #pragma omp critical { merged[min_id]->remtags.push_back(it->first); utilized += it->second->count(); } } } } free_kmer_hash(kmer_map, kmer_map_keys); //delete [] buf; cerr << " Matched " << utilized << " remainder reads; unable to match " << tot - utilized << " remainder reads.\n"; return 0; } int call_alleles(MergedStack *mtag, vector &reads, vector &read_types) { int row; int height = reads.size(); string allele; DNASeq *d; char base; vector::iterator snp; for (row = 0; row < height; row++) { allele.clear(); uint snp_cnt = 0; // // Only call a haplotype from primary reads. // if (!call_sec_hapl && read_types[row] == secondary) continue; for (snp = mtag->snps.begin(); snp != mtag->snps.end(); snp++) { if ((*snp)->type != snp_type_het) continue; snp_cnt++; d = reads[row]; base = (*d)[(*snp)->col]; // // Check to make sure the nucleotide at the location of this SNP is // of one of the two possible states the multinomial model called. // if (base == (*snp)->rank_1 || base == (*snp)->rank_2) allele += base; else break; } if (snp_cnt > 0 && allele.length() == snp_cnt) mtag->alleles[allele]++; } return 0; } int call_consensus(map &merged, map &unique, map &rem, bool invoke_model) { // // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. // map::iterator it; vector keys; for (it = merged.begin(); it != merged.end(); it++) keys.push_back(it->first); int i; #pragma omp parallel private(i) { MergedStack *mtag; Stack *utag; Rem *r; #pragma omp for schedule(dynamic) for (i = 0; i < (int) keys.size(); i++) { mtag = merged[keys[i]]; // // Create a two-dimensional array, each row containing one read. For // each unique tag that has been merged together, add the sequence for // that tag into our array as many times as it originally occurred. // vector::iterator j; vector reads; vector read_types; for (j = mtag->utags.begin(); j != mtag->utags.end(); j++) { utag = unique[*j]; for (uint k = 0; k < utag->count(); k++) { reads.push_back(utag->seq); read_types.push_back(primary); } } // For each remainder tag that has been merged into this Stack, add the sequence. for (j = mtag->remtags.begin(); j != mtag->remtags.end(); j++) { r = rem[*j]; for (uint k = 0; k < r->count(); k++) { reads.push_back(r->seq); read_types.push_back(secondary); } } // // Iterate over each column of the array and call the consensus base. // int row, col; int length = reads[0]->size; int height = reads.size(); string con; map nuc; map::iterator max, n; DNASeq *d; for (col = 0; col < length; col++) { nuc['A'] = 0; nuc['G'] = 0; nuc['C'] = 0; nuc['T'] = 0; for (row = 0; row < height; row++) { d = reads[row]; if (nuc.count((*d)[col])) nuc[(*d)[col]]++; } // // Find the base with a plurality of occurances and call it. // max = nuc.end(); for (n = nuc.begin(); n != nuc.end(); n++) { if (max == nuc.end() || n->second > max->second) max = n; } con += max->first; // // Search this column for the presence of a SNP // if (invoke_model) switch(model_type) { case snp: call_multinomial_snp(mtag, col, nuc, true); break; case bounded: call_bounded_multinomial_snp(mtag, col, nuc, true); break; case fixed: call_multinomial_fixed(mtag, col, nuc); break; } } if (invoke_model) { call_alleles(mtag, reads, read_types); if (model_type == fixed) { // // Mask nucleotides that are not fixed. // vector::iterator s; for (s = mtag->snps.begin(); s != mtag->snps.end(); s++) { if ((*s)->type == snp_type_unk) con.replace((*s)->col, 1, "N"); } } } mtag->add_consensus(con.c_str()); } } return 0; } int populate_merged_tags(map &unique, map &merged) { map::iterator i; map::iterator it_new, it_old; Stack *utag; MergedStack *mtag; int k = 0; it_old = merged.begin(); for (i = unique.begin(); i != unique.end(); i++) { utag = (*i).second; mtag = new MergedStack; mtag->id = k; mtag->count = utag->count(); mtag->utags.push_back(utag->id); mtag->add_consensus(utag->seq); // Insert the new MergedStack giving a hint as to which position // to insert it at. it_new = merged.insert(it_old, pair(k, mtag)); it_old = it_new; k++; } return 0; } int merge_stacks(map &unique, map &rem, map &merged, set &merge_map, int round) { map new_merged; map::iterator it, it_old, it_new; MergedStack *tag_1, *tag_2; vector > merge_lists; uint index = 0; int cohort_id = 0; int id = 1; uint delev_cnt = 0; uint blist_cnt = 0; for (it = merged.begin(); it != merged.end(); it++) { tag_1 = it->second; // // This tag may already have been merged by an earlier operation. // if (merge_map.count(tag_1->id) > 0) continue; queue merge_list; pair::iterator,bool> ret; vector >::iterator k; merge_lists.push_back(set()); if (tag_1->masked) { merge_lists[index].insert(tag_1->id); index++; continue; } // // Construct a list of MergedStacks that are within a particular distance // of this tag. // merge_lists[index].insert(tag_1->id); merge_list.push(tag_1->id); while (!merge_list.empty()) { tag_2 = merged[merge_list.front()]; merge_list.pop(); for (k = tag_2->dist.begin(); k != tag_2->dist.end(); k++) { ret = merge_lists[index].insert(k->first); // // If this Tag has not already been added to the merge list (i.e. we were able // to insert it in to our unique_merge_list, which is a set), add it for consideration // later in the loop. // if (ret.second == true) merge_list.push((*k).first); } } // // Record the nodes that have been merged in this round. // set::iterator j; for (j = merge_lists[index].begin(); j != merge_lists[index].end(); j++) merge_map.insert(*j); index++; } #pragma omp parallel private(tag_1, tag_2) { vector merged_tags; #pragma omp for reduction(+:delev_cnt) reduction(+:blist_cnt) for (uint index = 0; index < merge_lists.size(); index++) { // // Deal with the simple case of a single locus that does not need to be merged. // if (merge_lists[index].size() == 1) { tag_1 = merged[*(merge_lists[index].begin())]; tag_2 = merge_tags(merged, merge_lists[index], 0); // // If this tag is masked, keep the old cohort_id. // if (tag_1->masked) { tag_2->cohort_id = tag_1->cohort_id; } else { tag_2->cohort_id = cohort_id; #pragma omp atomic cohort_id++; } merged_tags.push_back(tag_2); continue; } // // Break large loci down by constructing a minimum // spanning tree and severing long distance edges. // if (deleverage_stacks) { vector tags; bool delev; deleverage(unique, rem, merged, merge_lists[index], cohort_id, tags); if (tags.size() == 1) { delev = false; } else { delev_cnt++; delev = true; } for (uint t = 0; t < tags.size(); t++) { //tags[t]->id = id; tags[t]->deleveraged = delev; if (tags[t]->utags.size() > max_subgraph) { tags[t]->masked = true; tags[t]->blacklisted = true; blist_cnt++; } //new_merged.insert(pair(id, tags[t])); merged_tags.push_back(tags[t]); //id++; } #pragma omp atomic cohort_id++; } else { // // If not deleveraging, merge these tags together into a new MergedStack object. // tag_2 = merge_tags(merged, merge_lists[index], 0); tag_2->cohort_id = cohort_id; if (tag_2->utags.size() > max_subgraph) { tag_2->masked = true; tag_2->blacklisted = true; blist_cnt++; } //new_merged.insert(pair(id, tag_2)); merged_tags.push_back(tag_2); #pragma omp atomic cohort_id++; //id++; } } // // Merge the accumulated tags into the new_merged map. // #pragma omp critical { it_old = merged.begin(); for (uint j = 0; j < merged_tags.size(); j++) { merged_tags[j]->id = id; it_new = new_merged.insert(it_old, pair(id, merged_tags[j])); it_old = it_new; id++; } } } uint new_cnt = new_merged.size(); uint old_cnt = merged.size(); // // Free the memory from the old map of merged tags. // for (it = merged.begin(); it != merged.end(); it++) delete it->second; merged = new_merged; cerr << " " << old_cnt << " stacks merged into " << new_cnt << " stacks; deleveraged " << delev_cnt << " stacks; removed " << blist_cnt << " stacks.\n"; return 0; } MergedStack *merge_tags(map &merged, set &merge_list, int id) { set::iterator i; vector::iterator j; MergedStack *tag_1, *tag_2; tag_1 = new MergedStack; tag_1->id = id; for (i = merge_list.begin(); i != merge_list.end(); i++) { tag_2 = merged[(*i)]; tag_1->deleveraged = tag_2->deleveraged ? true : tag_1->deleveraged; tag_1->masked = tag_2->masked ? true : tag_1->masked; tag_1->blacklisted = tag_2->blacklisted ? true : tag_1->blacklisted; tag_1->lumberjackstack = tag_2->lumberjackstack ? true : tag_1->lumberjackstack; for (j = tag_2->utags.begin(); j != tag_2->utags.end(); j++) tag_1->utags.push_back(*j); for (j = tag_2->remtags.begin(); j != tag_2->remtags.end(); j++) tag_1->remtags.push_back(*j); tag_1->count += tag_2->count; } return tag_1; } MergedStack *merge_tags(map &merged, int *merge_list, int merge_list_size, int id) { int i; vector::iterator j; MergedStack *tag_1, *tag_2; tag_1 = new MergedStack; tag_1->id = id; for (i = 0; i < merge_list_size; i++) { tag_2 = merged[merge_list[i]]; tag_1->deleveraged = tag_2->deleveraged ? true : tag_1->deleveraged; tag_1->masked = tag_2->masked ? true : tag_1->masked; tag_1->blacklisted = tag_2->blacklisted ? true : tag_1->blacklisted; tag_1->lumberjackstack = tag_2->lumberjackstack ? true : tag_1->lumberjackstack; for (j = tag_2->utags.begin(); j != tag_2->utags.end(); j++) tag_1->utags.push_back(*j); for (j = tag_2->remtags.begin(); j != tag_2->remtags.end(); j++) tag_1->remtags.push_back(*j); tag_1->count += tag_2->count; } return tag_1; } int remove_repetitive_stacks(map &unique, map &merged) { // // If enabled, check the depth of coverage of each unique tag, and remove // from consideration any tags with depths greater than removal_trigger. These tags // are likely to be multiple repetitive sites that have been merged together. // Because large stacks of unique tags are likely to also generate many one-off // sequencing error reads, remove all seqeunces that are a distance of one from // the RAD-Tag with high depth of coverage. // map::iterator i; vector >::iterator k; map new_merged; MergedStack *tag_1, *tag_2; set already_merged; // // First, iterate through the stacks and populate a list of tags that will be removed // (those above the removal_trigger and those 1 nucleotide away). If we don't construct // this list first, we will inadvertantly merge short stacks that end up being a // single nucleotide away from one of the lumberjack stacks found later in the process. // int id = 0; // // Merge all stacks that are over the removal trigger with their nearest neighbors and // mask them so they are not further processed by the program. // for (i = merged.begin(); i != merged.end(); i++) { tag_1 = i->second; // // Don't process a tag that has already been merged. // if (already_merged.count(tag_1->id) > 0) continue; if (tag_1->count > removal_trigger) { set unique_merge_list; unique_merge_list.insert(tag_1->id); already_merged.insert(tag_1->id); for (k = tag_1->dist.begin(); k != tag_1->dist.end(); k++) { if (already_merged.count(k->first) == 0) { already_merged.insert(k->first); unique_merge_list.insert(k->first); } } tag_1->lumberjackstack = true; tag_1->masked = true; tag_1->blacklisted = true; // // Merge these tags together into a new MergedStack object. // tag_2 = merge_tags(merged, unique_merge_list, id); tag_2->add_consensus(tag_1->con); new_merged.insert(make_pair(id, tag_2)); id++; } } // // Move the non-lumberjack stacks, unmodified, into the new merged map. // for (i = merged.begin(); i != merged.end(); i++) { tag_1 = i->second; if (already_merged.count(tag_1->id) > 0) continue; set unique_merge_list; unique_merge_list.insert(tag_1->id); tag_2 = merge_tags(merged, unique_merge_list, id); tag_2->add_consensus(tag_1->con); new_merged.insert(make_pair(id, tag_2)); id++; } cerr << " Removed " << already_merged.size() << " stacks.\n"; // // Free the memory from the old map of merged tags. // map::iterator it; for (it = merged.begin(); it != merged.end(); it++) delete it->second; merged = new_merged; cerr << " " << merged.size() << " stacks remain for merging.\n"; return 0; } int deleverage(map &unique, map &rem, map &merged, set &merge_list, int cohort_id, vector &deleveraged_tags) { set::iterator it; vector >::iterator j; MergedStack *tag_1, *tag_2; uint k, l; // // Create a minimum spanning tree in order to determine the minimum distance // between each node in the list. // MinSpanTree *mst = new MinSpanTree; vector keys; for (it = merge_list.begin(); it != merge_list.end(); it++) { keys.push_back(*it); mst->add_node(*it); tag_1 = merged[*it]; // cerr << " " << *it << " -> " << tag_1->utags[0] << "\n"; } // // Measure the distance between each pair of nodes and add edges to our // minimum spanning tree. // Node *n_1, *n_2; for (k = 0; k < keys.size(); k++) { tag_1 = merged[keys[k]]; n_1 = mst->node(keys[k]); for (l = k+1; l < keys.size(); l++) { tag_2 = merged[keys[l]]; n_2 = mst->node(keys[l]); int d = dist(tag_1, tag_2); n_1->add_edge(mst->node(keys[l]), d); n_2->add_edge(mst->node(keys[k]), d); } } // // Build the minimum spanning tree. // mst->build_tree(); // // Visualize the MST // if (dump_graph) { stringstream gout_file; size_t pos_1 = in_file.find_last_of("/"); size_t pos_2 = in_file.find_last_of("."); gout_file << out_path << in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) << "_" << keys[0] << ".dot"; string vis = mst->vis(true); ofstream gvis(gout_file.str().c_str()); gvis << vis; gvis.close(); } set visited; set dists; queue q; Node *n = mst->head(); q.push(n); while (!q.empty()) { n = q.front(); q.pop(); visited.insert(n->id); for (uint i = 0; i < n->min_adj_list.size(); i++) { if (visited.count(n->min_adj_list[i]->id) == 0) { q.push(n->min_adj_list[i]); // cerr << n->id << " -> " << n->min_adj_list[i]->id << ": "; // // Find the edge distance. // for (uint j = 0; j < n->edges.size(); j++) if (n->edges[j]->child == n->min_adj_list[i]) { // cerr << n->edges[j]->dist << "\n"; dists.insert(n->edges[j]->dist); } } } } // // This set is sorted by definition. Check if there is more than a single // distance separating stacks. // if (dists.size() == 1) { tag_1 = merge_tags(merged, merge_list, 0); deleveraged_tags.push_back(tag_1); delete mst; return 0; } uint min_dist = *(dists.begin()); // // If there is more than a single distance, split the minimum spanning tree // into separate loci, by cutting the tree at the larger distances. // set uniq_merge_list; visited.clear(); n = mst->head(); q.push(n); int id = 0; uniq_merge_list.insert(n->id); while (!q.empty()) { n = q.front(); q.pop(); visited.insert(n->id); for (uint i = 0; i < n->min_adj_list.size(); i++) { if (visited.count(n->min_adj_list[i]->id) == 0) { q.push(n->min_adj_list[i]); for (uint j = 0; j < n->edges.size(); j++) { if (n->edges[j]->child == n->min_adj_list[i]) if (n->edges[j]->dist > min_dist) { // cerr << "Merging the following stacks into a locus:\n"; for (it = uniq_merge_list.begin(); it != uniq_merge_list.end(); it++) { tag_1 = merged[*it]; // cerr << " " << *it << " -> " << tag_1->utags[0] << "\n"; } tag_1 = merge_tags(merged, uniq_merge_list, id); tag_1->cohort_id = cohort_id; deleveraged_tags.push_back(tag_1); uniq_merge_list.clear(); id++; } } uniq_merge_list.insert(n->min_adj_list[i]->id); } } } // cerr << "Merging the following stacks into a locus:\n"; for (it = uniq_merge_list.begin(); it != uniq_merge_list.end(); it++) { tag_1 = merged[*it]; // cerr << " " << *it << " -> " << tag_1->utags[0] << "\n"; } tag_1 = merge_tags(merged, uniq_merge_list, id); tag_1->cohort_id = cohort_id; deleveraged_tags.push_back(tag_1); uniq_merge_list.clear(); delete mst; return 0; } int calc_kmer_distance(map &merged, int utag_dist) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // KmerHashMap kmer_map; vector kmer_map_keys; MergedStack *tag_1, *tag_2; map::iterator it; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (it = merged.begin(); it != merged.end(); it++) keys.push_back(it->first); // // Calculate the number of k-mers we will generate. If kmer_len == 0, // determine the optimal length for k-mers. // int con_len = strlen(merged[keys[0]]->con); if (set_kmer_len) kmer_len = determine_kmer_length(con_len, utag_dist); int num_kmers = con_len - kmer_len + 1; cerr << " Distance allowed between stacks: " << utag_dist << "\n" << " Using a k-mer length of " << kmer_len << "\n" << " Number of kmers per sequence: " << num_kmers << "\n"; // // Calculate the minimum number of matching k-mers required for a possible sequence match. // int min_hits = calc_min_kmer_matches(kmer_len, utag_dist, con_len, set_kmer_len ? true : false); populate_kmer_hash(merged, kmer_map, kmer_map_keys, kmer_len); #pragma omp parallel private(tag_1, tag_2) { #pragma omp for schedule(dynamic) for (uint i = 0; i < keys.size(); i++) { tag_1 = merged[keys[i]]; // Don't compute distances for masked tags if (tag_1->masked) continue; vector query_kmers; generate_kmers(tag_1->con, kmer_len, num_kmers, query_kmers); map hits; int d; // // Lookup the occurances of each k-mer in the kmer_map // for (int j = 0; j < num_kmers; j++) { for (uint k = 0; k < kmer_map[query_kmers[j]].size(); k++) hits[kmer_map[query_kmers[j]][k]]++; } // // Free the k-mers we generated for this query // for (int j = 0; j < num_kmers; j++) delete [] query_kmers[j]; // cerr << " Tag " << tag_1->id << " hit " << hits.size() << " kmers.\n"; // // Iterate through the list of hits. For each hit that has more than min_hits // check its full length to verify a match. // map::iterator hit_it; for (hit_it = hits.begin(); hit_it != hits.end(); hit_it++) { // cerr << " Tag " << hit_it->first << " has " << hit_it->second << " hits (min hits: " << min_hits << ")\n"; if (hit_it->second < min_hits) continue; // cerr << " Match found, checking full-length match\n"; tag_2 = merged[hit_it->first]; // Don't compute distances for masked tags if (tag_2->masked) continue; // Don't compare tag_1 against itself. if (tag_1 == tag_2) continue; d = dist(tag_1, tag_2); // cerr << " Distance: " << d << "\n"; // // Store the distance between these two sequences if it is // below the maximum distance (which governs those // sequences to be merged in the following step of the // algorithm.) // if (d <= utag_dist) tag_1->add_dist(tag_2->id, d); } // Sort the vector of distances. sort(tag_1->dist.begin(), tag_1->dist.end(), compare_dist); } } free_kmer_hash(kmer_map, kmer_map_keys); return 0; } int calc_distance(map &merged, int utag_dist) { // // Calculate the distance (number of mismatches) between each pair // of Radtags. We expect all radtags to be the same length; // map::iterator it; MergedStack *tag_1, *tag_2; int i, j; // OpenMP can't parallelize random access iterators, so we convert // our map to a vector of integer keys. vector keys; for (it = merged.begin(); it != merged.end(); it++) keys.push_back(it->first); #pragma omp parallel private(i, j, tag_1, tag_2) { #pragma omp for schedule(dynamic) for (i = 0; i < (int) keys.size(); i++) { tag_1 = merged[keys[i]]; // Don't compute distances for masked tags if (tag_1->masked) continue; int d; for (j = 0; j < (int) keys.size(); j++) { tag_2 = merged[keys[j]]; // Don't compute distances for masked tags if (tag_2->masked) continue; // Don't compare tag_1 against itself. if (tag_1 == tag_2) continue; d = dist(tag_1, tag_2); //cerr << " Distance: " << d << "\n"; // // Store the distance between these two sequences if it is // below the maximum distance (which governs those // sequences to be merged in the following step of the // algorithm.) // if (d == utag_dist) { tag_1->add_dist(tag_2->id, d); //cerr << " HIT.\n"; } } // Sort the vector of distances. sort(tag_1->dist.begin(), tag_1->dist.end(), compare_dist); } } return 0; } int reduce_radtags(DNASeqHashMap &radtags, map &unique, map &rem) { DNASeqHashMap::iterator it; Rem *r; Stack *u; int global_id = 1; for (it = radtags.begin(); it != radtags.end(); it++) { if (it->second.count() < min_merge_cov) { // // Don't record this unique RAD-Tag if its coverage is below // the specified cutoff. However, add the reads to the remainder // vector for later processing. // r = new Rem; r->id = global_id; r->add_seq(it->first); for (uint i = 0; i < it->second.ids.size(); i++) r->add_id(it->second.ids[i]); rem[r->id] = r; global_id++; } else { // // Populate a Stack object for this unique radtag. Create a // map of the IDs for the sequences that have been // collapsed into this radtag. // u = new Stack; u->id = global_id; u->add_seq(it->first); // Copy the original Fastq IDs from which this unique radtag was built. for (uint i = 0; i < it->second.ids.size(); i++) u->add_id(it->second.ids[i]); unique[u->id] = u; global_id++; } } if (unique.size() == 0) { cerr << "Error: Unable to form any stacks, data appear to be unique.\n"; exit(1); } return 0; } int free_radtags_hash(DNASeqHashMap &radtags, vector &radtags_keys) { for (uint i = 0; i < radtags_keys.size(); i++) delete radtags_keys[i]; radtags.clear(); return 0; } int calc_coverage_distribution(map &unique, double &mean, double &stdev) { map::iterator i; double m = 0.0; double s = 0.0; double sum = 0.0; uint max = 0; uint cnt = 0; double total = 0.0; map depth_dist; map::iterator j; for (i = unique.begin(); i != unique.end(); i++) { cnt = i->second->count(); m += cnt; total++; depth_dist[cnt]++; if (cnt > max) max = cnt; } mean = round(m / total); // // Calculate the standard deviation // total = 0.0; for (i = unique.begin(); i != unique.end(); i++) { total++; s = i->second->count(); sum += pow((s - mean), 2); } stdev = sqrt(sum / (total - 1)); cerr << " Mean coverage depth is " << mean << "; Std Dev: " << stdev << " Max: " << max << "\n"; // // Output the distribution of stack depths // //for (j = depth_dist.begin(); j != depth_dist.end(); j++) // cerr << j->first << "\t" << j->second << "\n"; return 0; } double calc_merged_coverage_distribution(map &unique, map &merged) { map::iterator it; vector::iterator k; Stack *tag; double m = 0.0; double s = 0.0; double sum = 0.0; double mean = 0.0; double max = 0.0; double stdev = 0.0; for (it = merged.begin(); it != merged.end(); it++) { m = 0.0; for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; m += tag->count(); } if (m > max) max = m; sum += m; } mean = sum / (double) merged.size(); // // Calculate the standard deviation // for (it = merged.begin(); it != merged.end(); it++) { s = 0.0; for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; s += tag->count(); } sum += pow((s - mean), 2); } stdev = sqrt(sum / (merged.size() - 1)); cerr << " Mean merged coverage depth is " << mean << "; Std Dev: " << stdev << "; Max: " << max << "\n"; return mean; } int count_raw_reads(map &unique, map &rem, map &merged) { map::iterator it; map::iterator sit; vector::iterator k; Stack *tag; long int m = 0; map uniq_ids; map::iterator uit; for (it = merged.begin(); it != merged.end(); it++) { for (k = it->second->utags.begin(); k != it->second->utags.end(); k++) { tag = unique[*k]; m += tag->count(); if (uniq_ids.count(*k) == 0) uniq_ids[*k] = 0; uniq_ids[*k]++; } for (uint j = 0; j < it->second->remtags.size(); j++) m += rem[it->second->remtags[j]]->count(); //m += it->second->remtags.size(); } for (uit = uniq_ids.begin(); uit != uniq_ids.end(); uit++) if (uit->second > 1) cerr << " Unique stack #" << uit->first << " appears in " << uit->second << " merged stacks.\n"; cerr << "Number of utilized reads: " << m << "\n"; //for (sit = unique.begin(); sit != unique.end(); sit++) // if (uniq_ids.count(sit->first) == 0) // cerr << " Stack " << sit->first << ": '" << sit->second->seq << "' unused.\n"; return 0; } int write_results(map &m, map &u, map &r) { map::iterator i; vector::iterator k; vector::iterator s; map::iterator t; MergedStack *tag_1; Stack *tag_2; Rem *rem; stringstream sstr; bool gzip = (in_file_type == FileT::gzfastq || in_file_type == FileT::gzfasta) ? true : false; // // Read in the set of sequencing IDs so they can be included in the output. // vector seq_ids; load_seq_ids(seq_ids); // // Parse the input file name to create the output files // size_t pos_1 = in_file.find_last_of("/"); size_t pos_2 = in_file.find_last_of("."); if (in_file.substr(pos_2) == ".gz") { in_file = in_file.substr(0, pos_2); pos_2 = in_file.find_last_of("."); } string tag_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".tags.tsv"; string snp_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".snps.tsv"; string all_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".alleles.tsv"; if (gzip) { tag_file += ".gz"; snp_file += ".gz"; all_file += ".gz"; } // // Open the output files for writing. // gzFile gz_tags, gz_snps, gz_alle; ofstream tags, snps, alle; if (gzip) { gz_tags = gzopen(tag_file.c_str(), "wb"); if (!gz_tags) { cerr << "Error: Unable to open gzipped tag file '" << tag_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_tags, libz_buffer_size); #endif gz_snps = gzopen(snp_file.c_str(), "wb"); if (!gz_snps) { cerr << "Error: Unable to open gzipped snps file '" << snp_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_snps, libz_buffer_size); #endif gz_alle = gzopen(all_file.c_str(), "wb"); if (!gz_alle) { cerr << "Error: Unable to open gzipped alleles file '" << all_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_alle, libz_buffer_size); #endif } else { tags.open(tag_file.c_str()); if (tags.fail()) { cerr << "Error: Unable to open tag file for writing.\n"; exit(1); } snps.open(snp_file.c_str()); if (snps.fail()) { cerr << "Error: Unable to open SNPs file for writing.\n"; exit(1); } alle.open(all_file.c_str()); if (alle.fail()) { cerr << "Error: Unable to open allele file for writing.\n"; exit(1); } } // // Record the version of Stacks used and the date generated as a comment in the catalog. // // Obtain the current date. // stringstream log; time_t rawtime; struct tm *timeinfo; char date[32]; time(&rawtime); timeinfo = localtime(&rawtime); strftime(date, 32, "%F %T", timeinfo); log << "# ustacks version " << VERSION << "; generated on " << date << "\n"; if (gzip) { gzputs(gz_tags, log.str().c_str()); gzputs(gz_snps, log.str().c_str()); gzputs(gz_alle, log.str().c_str()); } else { tags << log.str(); snps << log.str(); alle << log.str(); } int id; char *buf = new char[m.begin()->second->len + 1]; for (i = m.begin(); i != m.end(); i++) { float total = 0; tag_1 = i->second; // // Calculate the log likelihood of this merged stack. // tag_1->gen_matrix(u, r); tag_1->calc_likelihood(); // First write the consensus sequence sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" //<< tag_1->cohort_id << "\t" << "" << "\t" // chr << 0 << "\t" // bp << "+" << "\t" // strand << "consensus\t" << "\t" << "\t" << tag_1->con << "\t" << tag_1->deleveraged << "\t" << tag_1->blacklisted << "\t" << tag_1->lumberjackstack << "\t" << tag_1->lnl << "\n"; // // Write a sequence recording the output of the SNP model for each nucleotide. // sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" //<< "\t" // cohort_id << "\t" // chr << "\t" // bp << "\t" // strand << "model\t" << "\t" << "\t"; for (s = tag_1->snps.begin(); s != tag_1->snps.end(); s++) { switch((*s)->type) { case snp_type_het: sstr << "E"; break; case snp_type_hom: sstr << "O"; break; default: sstr << "U"; break; } } sstr << "\t" << "\t" << "\t" << "\t" << "\n"; if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); // // Now write out the components of each unique tag merged into this locus. // id = 0; for (k = tag_1->utags.begin(); k != tag_1->utags.end(); k++) { tag_2 = u[*k]; total += tag_2->count(); for (uint j = 0; j < tag_2->map.size(); j++) { sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" //<< "\t" // cohort_id << "\t" // chr << "\t" // bp << "\t" // strand << "primary\t" << id << "\t" << seq_ids[tag_2->map[j]] << "\t" << tag_2->seq->seq(buf) << "\t\t\t\t\n"; if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); } id++; } // // Write out the remainder tags merged into this unique tag. // for (k = tag_1->remtags.begin(); k != tag_1->remtags.end(); k++) { rem = r[*k]; total += rem->map.size(); for (uint j = 0; j < rem->map.size(); j++) sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" //<< "\t" // cohort_id << "\t" // chr << "\t" // bp << "\t" // strand << "secondary\t" << "\t" << seq_ids[rem->map[j]] << "\t" << rem->seq->seq(buf) << "\t\t\t\t\n"; if (gzip) gzputs(gz_tags, sstr.str().c_str()); else tags << sstr.str(); sstr.str(""); } // // Write out the model calls for each nucleotide in this locus. // for (s = tag_1->snps.begin(); s != tag_1->snps.end(); s++) { sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << (*s)->col << "\t"; switch((*s)->type) { case snp_type_het: sstr << "E\t"; break; case snp_type_hom: sstr << "O\t"; break; default: sstr << "U\t"; break; } sstr << std::fixed << std::setprecision(2) << (*s)->lratio << "\t" << (*s)->rank_1 << "\t" << (*s)->rank_2 << "\t\t\n"; } if (gzip) gzputs(gz_snps, sstr.str().c_str()); else snps << sstr.str(); sstr.str(""); // // Write the expressed alleles seen for the recorded SNPs and // the percentage of tags a particular allele occupies. // for (t = tag_1->alleles.begin(); t != tag_1->alleles.end(); t++) { sstr << "0" << "\t" << sql_id << "\t" << tag_1->id << "\t" << (*t).first << "\t" << (((*t).second/total) * 100) << "\t" << (*t).second << "\n"; } if (gzip) gzputs(gz_alle, sstr.str().c_str()); else alle << sstr.str(); sstr.str(""); } if (gzip) { gzclose(gz_tags); gzclose(gz_snps); gzclose(gz_alle); } else { tags.close(); snps.close(); alle.close(); } // // Free sequence IDs. // for (uint i = 0; i < seq_ids.size(); i++) delete [] seq_ids[i]; // // If specified, output reads not utilized in any stacks. // if (retain_rem_reads) { string unused_file = out_path + in_file.substr(pos_1 + 1, (pos_2 - pos_1 - 1)) + ".unused.fa"; gzFile gz_unused; ofstream unused; if (gzip) { unused_file += ".gz"; gz_unused = gzopen(unused_file.c_str(), "wb"); if (!gz_unused) { cerr << "Error: Unable to open gzipped discard file '" << unused_file << "': " << strerror(errno) << ".\n"; exit(1); } #if ZLIB_VERNUM >= 0x1240 gzbuffer(gz_unused, libz_buffer_size); #endif } else { unused.open(unused_file.c_str()); if (unused.fail()) { cerr << "Error: Unable to open discard file for writing.\n"; exit(1); } } map::iterator r_it; for (r_it = r.begin(); r_it != r.end(); r_it++) { if (r_it->second->utilized == false) sstr << ">" << r_it->second->id << "\n" << r_it->second->seq->seq(buf) << "\n"; if (gzip) gzputs(gz_unused, sstr.str().c_str()); else unused << sstr.str(); sstr.str(""); } if (gzip) gzclose(gz_unused); else unused.close(); } delete [] buf; return 0; } int dump_stack_graph(string data_file, map &unique, map &merged, vector &keys, map > &dist_map, map > &cluster_map) { uint s, t; double d, scale, scaled_d; char label[32]; vector colors; std::ofstream data(data_file.c_str()); size_t pos_1 = data_file.find_last_of("/"); size_t pos_2 = data_file.find_last_of("."); string title = data_file.substr(pos_1 + 1, pos_2 - pos_1 - 1); // // Output a list of IDs so we can locate these stacks in the final results. // for (s = 0; s < keys.size(); s++) data << "/* " << keys[s] << ": " << unique[merged[keys[s]]->utags[0]]->map[0] << "; depth: " << merged[keys[s]]->count << " */\n"; // // Output a specification to visualize the stack graph using graphviz: // http://www.graphviz.org/ // data << "graph " << title.c_str() << " {\n" << "rankdir=LR\n" << "size=\"20!\"\n" << "overlap=false\n" << "node [shape=circle style=filled fillcolor=\"#3875d7\" fontname=\"Arial\"];\n" << "edge [fontsize=8.0 fontname=\"Arial\" color=\"#aaaaaa\"];\n"; colors.push_back("red"); colors.push_back("blue"); colors.push_back("green"); colors.push_back("brown"); colors.push_back("purple"); map >::iterator c; set::iterator it; int color_index = 0; string color; // Write out the clusters created by R, prior to writing all the nodes and connections. s = 0; for (c = cluster_map.begin(); c != cluster_map.end(); c++) { data << "subgraph " << s << " {\n" << " edge [penwidth=5 fontsize=12.0 fontcolor=\"black\" color=\"black\"]\n"; if ((*c).second.size() == 1) { color = "white"; data << " node [fillcolor=" << color.c_str() << " fontcolor=\"black\"]\n"; } else { color = colors[color_index % colors.size()]; data << " node [fillcolor=" << color.c_str() << " fontcolor=\"white\"]\n"; color_index++; } for (it = (*c).second.begin(); it != (*c).second.end(); it++) { data << " " << *it << "\n"; } if ((*c).second.size() > 1) { uint j = 0; for (it = (*c).second.begin(); it != (*c).second.end(); it++) { data << *it; if (j < (*c).second.size() - 1) data << " -- "; j++; } } data << "}\n"; s++; } // // Scale the graph to display on a 10 inch canvas. Find the largest edge weight // and scale the edge lengths to fit the canvas. // for (s = 0; s < keys.size(); s++) for (t = s+1; t < keys.size(); t++) scale = dist_map[keys[s]][keys[t]] > scale ? dist_map[keys[s]][keys[t]] : scale; scale = scale / 20; for (s = 0; s < keys.size(); s++) { for (t = s+1; t < keys.size(); t++) { d = dist_map[keys[s]][keys[t]]; scaled_d = d / scale; scaled_d = scaled_d < 0.75 ? 0.75 : scaled_d; sprintf(label, "%.1f", d); data << keys[s] << " -- " << keys[t] << " [len=" << scaled_d << ", label=" << label << "];\n"; } } data << "}\n"; data.close(); return 0; } int dump_unique_tags(map &u) { map::iterator it; vector >::iterator pit; vector::iterator mit; char *c; for (it = u.begin(); it != u.end(); it++) { c = (*it).second->seq->seq(); cerr << "UniqueTag UID: " << (*it).second->id << "\n" << " Seq: " << c << "\n" << " IDs: "; for (uint j = 0; j < it->second->map.size(); j++) cerr << it->second->map[j] << " "; cerr << "\n\n"; delete [] c; } return 0; } int dump_merged_tags(map &m) { map::iterator it; vector >::iterator pit; vector::iterator fit; for (it = m.begin(); it != m.end(); it++) { cerr << "MergedStack ID: " << it->second->id << "\n" << " Consensus: "; if (it->second->con != NULL) cerr << it->second->con << "\n"; else cerr << "\n"; cerr << " IDs: "; for (fit = it->second->utags.begin(); fit != it->second->utags.end(); fit++) cerr << (*fit) << " "; cerr << "\n" << " Distances: "; for (pit = it->second->dist.begin(); pit != it->second->dist.end(); pit++) cerr << (*pit).first << ": " << (*pit).second << ", "; cerr << "\n\n"; } return 0; } int load_radtags(string in_file, DNASeqHashMap &radtags, vector &radtags_keys) { Input *fh = NULL; DNASeq *d; if (in_file_type == FileT::fasta) fh = new Fasta(in_file.c_str()); else if (in_file_type == FileT::fastq) fh = new Fastq(in_file.c_str()); else if (in_file_type == FileT::gzfasta) fh = new GzFasta(in_file.c_str()); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(in_file.c_str()); cerr << "Parsing " << in_file.c_str() << "\n"; long int corrected = 0; long int i = 0; short int seql = 0; short int prev_seql = 0; bool len_mismatch = false; Seq c; c.id = new char[id_len]; c.seq = new char[max_len]; c.qual = new char[max_len]; while ((fh->next_seq(c)) != 0) { if (i % 10000 == 0) cerr << " Loading RAD-Tag " << i << " \r"; prev_seql = seql; seql = 0; for (char *p = c.seq; *p != '\0'; p++, seql++) switch (*p) { case 'N': case 'n': case '.': *p = 'A'; corrected++; } if (seql != prev_seql && prev_seql > 0) len_mismatch = true; d = new DNASeq(seql, c.seq); pair r; r = radtags.insert(make_pair(d, HVal())); (*r.first).second.add_id(i); radtags_keys.push_back(d); i++; } cerr << "Loaded " << i << " RAD-Tags; inserted " << radtags.size() << " elements into the RAD-Tags hash map.\n"; if (i == 0) { cerr << "Error: Unable to load data from '" << in_file.c_str() << "'.\n"; exit(1); } cerr << " " << corrected << " reads contained uncalled nucleotides that were modified.\n"; if (len_mismatch) cerr << " Warning: different sequence lengths detected, this will interfere with Stacks algorithms.\n"; // // Close the file and delete the Input object. // delete fh; return 0; } int load_seq_ids(vector &seq_ids) { Input *fh = NULL; if (in_file_type == FileT::fasta) fh = new Fasta(in_file.c_str()); else if (in_file_type == FileT::fastq) fh = new Fastq(in_file.c_str()); else if (in_file_type == FileT::gzfasta) fh = new GzFasta(in_file.c_str()); else if (in_file_type == FileT::gzfastq) fh = new GzFastq(in_file.c_str()); cerr << " Refetching sequencing IDs from " << in_file.c_str() << "... "; char *id; Seq c; c.id = new char[id_len]; c.seq = new char[max_len]; c.qual = new char[max_len]; while ((fh->next_seq(c)) != 0) { id = new char[strlen(c.id) + 1]; strcpy(id, c.id); seq_ids.push_back(id); } cerr << "read " << seq_ids.size() << " sequence IDs.\n"; delete fh; return 0; } int calc_triggers(double cov_mean, double cov_stdev, int &deleverage_trigger, int &removal_trigger) { deleverage_trigger = (int) round(cov_mean + cov_stdev * cov_scale); removal_trigger = (int) round(cov_mean + (cov_stdev * 2) * cov_scale); return 0; // // // // Calculate the deleverage trigger. Assume RAD-Tags are selected from // // the sample for sequencing randomly, forming a poisson distribution // // representing the depths of coverage of RAD-Tags in the sample. Calculate // // the trigger value that is larger than the depth of coverage of 99.9999% of stacks. // // // long double lambda = cov_mean; // int k = 0; // long double d = 0.0; // long double e = 0.0; // long double f = 0.0; // long double g = 0.0; // long double h = 0.0; // long double i = 0.0; // do { // e = exp(-1 * lambda); // g = pow(lambda, k); // f = factorial(k); // h = (e * g); // i = h / f; // d += i; // //cerr << "iteration " << k << "; e: " << e << " h: " << h << " g: " << g << " F: " << f << " i: " << i << " D: " << d << "\n"; // k++; // } while (d < 0.999999); // return k - 1; } long double factorial(int i) { long double f = 1; if (i == 0) return 1; do { f = f * i; i--; } while (i > 0); return f; } int parse_command_line(int argc, char* argv[]) { int c; while (1) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'v'}, {"infile_type", required_argument, NULL, 't'}, {"file", required_argument, NULL, 'f'}, {"outpath", required_argument, NULL, 'o'}, {"id", required_argument, NULL, 'i'}, {"min_cov", required_argument, NULL, 'm'}, {"max_dist", required_argument, NULL, 'M'}, {"max_sec_dist", required_argument, NULL, 'N'}, {"max_locus_stacks", required_argument, NULL, 'K'}, {"k_len", required_argument, NULL, 'k'}, {"num_threads", required_argument, NULL, 'p'}, {"deleverage", no_argument, NULL, 'd'}, {"remove_rep", no_argument, NULL, 'r'}, {"retain_rem", no_argument, NULL, 'R'}, {"graph", no_argument, NULL, 'g'}, {"exp_cov", no_argument, NULL, 'E'}, {"cov_stdev", no_argument, NULL, 's'}, {"cov_scale", no_argument, NULL, 'S'}, {"sec_hapl", no_argument, NULL, 'H'}, {"model_type", required_argument, NULL, 'T'}, {"bc_err_freq", required_argument, NULL, 'e'}, {"bound_low", required_argument, NULL, 'L'}, {"bound_high", required_argument, NULL, 'U'}, {"alpha", required_argument, NULL, 'A'}, {0, 0, 0, 0} }; // getopt_long stores the option index here. int option_index = 0; c = getopt_long(argc, argv, "hHvdrgRA:L:U:f:o:i:m:e:E:s:S:p:t:M:N:K:k:T:", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'h': help(); break; case 't': if (strcmp(optarg, "tsv") == 0) in_file_type = FileT::tsv; else if (strcmp(optarg, "fasta") == 0) in_file_type = FileT::fasta; else if (strcmp(optarg, "fastq") == 0) in_file_type = FileT::fastq; else if (strcasecmp(optarg, "gzfasta") == 0) in_file_type = FileT::gzfasta; else if (strcasecmp(optarg, "gzfastq") == 0) in_file_type = FileT::gzfastq; else in_file_type = FileT::unknown; break; case 'f': in_file = optarg; break; case 'o': out_path = optarg; break; case 'i': sql_id = is_integer(optarg); if (sql_id < 0) { cerr << "SQL ID (-i) must be an integer, e.g. 1, 2, 3\n"; help(); } break; case 'm': min_merge_cov = is_integer(optarg); break; case 'M': max_utag_dist = is_integer(optarg); break; case 'N': max_rem_dist = is_integer(optarg); break; case 'd': deleverage_stacks++; break; case 'r': remove_rep_stacks++; break; case 'K': max_subgraph = is_integer(optarg); break; case 'k': set_kmer_len = false; kmer_len = is_integer(optarg); break; case 'R': retain_rem_reads = true; break; case 'g': dump_graph++; break; case 'E': cov_mean = atof(optarg); break; case 's': cov_stdev = atof(optarg); break; case 'S': cov_scale = atof(optarg); break; case 'T': if (strcmp(optarg, "snp") == 0) { model_type = snp; } else if (strcmp(optarg, "fixed") == 0) { model_type = fixed; } else if (strcmp(optarg, "bounded") == 0) { model_type = bounded; } else { cerr << "Unknown model type specified '" << optarg << "'\n"; help(); } case 'e': barcode_err_freq = atof(optarg); break; case 'L': bound_low = atof(optarg); break; case 'U': bound_high = atof(optarg); break; case 'A': alpha = atof(optarg); break; case 'H': call_sec_hapl = false; break; case 'p': num_threads = is_integer(optarg); break; case 'v': version(); break; case '?': // getopt_long already printed an error message. help(); break; default: cerr << "Unknown command line option '" << (char) c << "'\n"; help(); abort(); } } if (set_kmer_len == false && (kmer_len < 5 || kmer_len > 31)) { cerr << "Kmer length must be between 5 and 31bp.\n"; help(); } if (alpha != 0.1 && alpha != 0.05 && alpha != 0.01 && alpha != 0.001) { cerr << "SNP model alpha significance level must be either 0.1, 0.05, 0.01, or 0.001.\n"; help(); } if (bound_low != 0 && (bound_low < 0 || bound_low >= 1.0)) { cerr << "SNP model lower bound must be between 0.0 and 1.0.\n"; help(); } if (bound_high != 1 && (bound_high <= 0 || bound_high > 1.0)) { cerr << "SNP model upper bound must be between 0.0 and 1.0.\n"; help(); } if (bound_low > 0 || bound_high < 1.0) { model_type = bounded; } if (in_file.length() == 0 || in_file_type == FileT::unknown) { cerr << "You must specify an input file of a supported type.\n"; help(); } if (out_path.length() == 0) out_path = "."; if (out_path.at(out_path.length() - 1) != '/') out_path += "/"; if (model_type == fixed && barcode_err_freq == 0) { cerr << "You must specify the barcode error frequency.\n"; help(); } return 0; } void version() { std::cerr << "ustacks " << VERSION << "\n\n"; exit(0); } void help() { std::cerr << "ustacks " << VERSION << "\n" << "ustacks -t file_type -f file_path [-d] [-r] [-o path] [-i id] [-m min_cov] [-M max_dist] [-p num_threads] [-R] [-H] [-h]" << "\n" << " t: input file Type. Supported types: fasta, fastq, gzfasta, or gzfastq.\n" << " f: input file path.\n" << " o: output path to write results." << "\n" << " i: SQL ID to insert into the output to identify this sample." << "\n" << " m: Minimum depth of coverage required to create a stack (default 3)." << "\n" << " M: Maximum distance (in nucleotides) allowed between stacks (default 2)." << "\n" << " N: Maximum distance allowed to align secondary reads to primary stacks (default: M + 2).\n" << " R: retain unused reads.\n" << " H: disable calling haplotypes from secondary reads.\n" << " p: enable parallel execution with num_threads threads.\n" << " h: display this help messsage.\n\n" << " Stack assembly options:\n" << " r: enable the Removal algorithm, to drop highly-repetitive stacks (and nearby errors) from the algorithm." << "\n" << " d: enable the Deleveraging algorithm, used for resolving over merged tags." << "\n" << " --max_locus_stacks : maximum number of stacks at a single de novo locus (default 3).\n" << " --k_len : specify k-mer size for matching between alleles and loci (automatically calculated by default).\n\n" << " Model options:\n" << " --model_type: either 'snp' (default), 'bounded', or 'fixed'\n" << " For the SNP or Bounded SNP model:\n" << " --alpha : chi square significance level required to call a heterozygote or homozygote, either 0.1, 0.05 (default), 0.01, or 0.001.\n" << " For the Bounded SNP model:\n" << " --bound_low : lower bound for epsilon, the error rate, between 0 and 1.0 (default 0).\n" << " --bound_high : upper bound for epsilon, the error rate, between 0 and 1.0 (default 1).\n" << " For the Fixed model:\n" << " --bc_err_freq : specify the barcode error frequency, between 0 and 1.0.\n"; exit(0); } stacks-1.35/src/ustacks.h000644 000765 000024 00000011301 12441417455 016122 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __USTACKS_H__ #define __USTACKS_H__ #include "constants.h" #ifdef _OPENMP #include // OpenMP library #endif #include // Process command-line options #include #include #include #include #include using std::pair; using std::make_pair; #include using std::string; #include #include #include using std::ofstream; using std::stringstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include // std::setprecision #include using std::vector; #include using std::map; #include using std::unordered_map; #include using std::queue; #include using std::set; #include #ifdef HAVE_SPARSEHASH #include using google::sparse_hash_map; #endif #include "kmers.h" #include "utils.h" #include "DNASeq.h" // Class for storing two-bit compressed DNA sequences #include "stacks.h" // Major data structures for holding stacks #include "mstack.h" #include "mst.h" // Minimum spanning tree implementation #include "models.h" // Contains maximum likelihood statistical models. #include "FastaI.h" // Reading input files in FASTA format #include "FastqI.h" // Reading input files in FASTQ format #include "gzFasta.h" // Reading gzipped input files in FASTA format #include "gzFastq.h" // Reading gzipped input files in FASTQ format typedef unsigned int uint; const int barcode_size = 5; class HVal { public: vector ids; int count() { return this->ids.size(); } int add_id(int id) { this->ids.push_back(id); return 0; } }; #ifdef HAVE_SPARSEHASH typedef sparse_hash_map DNASeqHashMap; #else typedef unordered_map DNASeqHashMap; #endif void help( void ); void version( void ); int parse_command_line(int, char**); int load_radtags(string, DNASeqHashMap &, vector &); int load_seq_ids(vector &); int reduce_radtags(DNASeqHashMap &, map &, map &); int free_radtags_hash(DNASeqHashMap &, vector &); int populate_merged_tags(map &, map &); int merge_stacks(map &, map &, map &, set &, int); int call_consensus(map &, map &, map &, bool); int call_alleles(MergedStack *, vector &, vector &); int merge_remainders(map &, map &); int write_results(map &, map &, map &); // // Match MergedStacks using a k-mer hashing algorithm // int calc_kmer_distance(map &, int); // // Calculate depth of coverage statistics for stacks // int calc_coverage_distribution(map &, double &, double &); double calc_merged_coverage_distribution(map &, map &); int count_raw_reads(map &, map &, map &); // // Dealing with lumberjack (huge) stacks // int calc_triggers(double, double, int &, int &); int remove_repetitive_stacks(map &, map &); int deleverage(map &, map &, map &, set &, int, vector &); // // Debugging // int dump_unique_tags(map &); int dump_merged_tags(map &); int dump_stack_graph(string, map &, map &, vector &, map > &, map > &); // // Utilities // MergedStack *merge_tags(map &, set &, int); MergedStack *merge_tags(map &, int *, int, int); long double factorial(int); // // Deprecated // int calc_distance(map &, int); #endif // __USTACKS_H__ stacks-1.35/src/utils.cc000644 000765 000024 00000010736 12533677757 015775 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // utils.cc -- common routines needed in multiple object files. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #include "utils.h" char reverse(char c) { switch (c) { case 'A': case 'a': return 'T'; break; case 'C': case 'c': return 'G'; break; case 'G': case 'g': return 'C'; break; case 'T': case 't': return 'A'; break; case 'N': case 'n': case '.': return 'N'; break; case '-': default: return '-'; break; } return 'N'; } char * rev_comp(const char *seq) { int len = strlen(seq); int j = 0; char *com = new char[len + 1]; const char *p; for (p = seq + len - 1; p >= seq; p--) { switch (*p) { case 'A': case 'a': com[j] = 'T'; break; case 'C': case 'c': com[j] = 'G'; break; case 'G': case 'g': com[j] = 'C'; break; case 'T': case 't': com[j] = 'A'; break; case 'N': case 'n': case '.': com[j] = 'N'; break; } j++; } com[len] = '\0'; return com; } void reverse_string(char *seq) { int len = strlen(seq); char *p = seq; char *q = seq + len - 1; char tmp; while (q > p) { tmp = *q; *q = *p; *p = tmp; q--; p++; } return; } int is_integer(const char *str) { // // Adapted from the strtol manpage. // char *endptr; // To distinguish success/failure after call errno = 0; long val = strtol(str, &endptr, 10); // // Check for various possible errors // if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) || (errno != 0 && val == 0)) { return -1; } if (endptr == str || *endptr != '\0') return -1; return (int) val; } double is_double(const char *str) { // // Adapted from the strtol manpage. // char *endptr; // To distinguish success/failure after call errno = 0; double val = strtod(str, &endptr); // // Check for various possible errors // if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) || (errno != 0 && val == 0)) { return -1; } if (endptr == str || *endptr != '\0') return -1; return val; } double factorial(double n) { double fact = 1; for (double i = n; i > 1; i--) fact *= i; return fact; } double reduced_factorial(double n, double d) { double f = n - d; if (f < 0) return 0; else if (f == 0) return 1; else if (f == 1) return n; f = n; n--; while (n > d) { f *= n; n--; } return f; } double log_factorial(double n) { double fact = 0; for (double i = n; i > 1; i--) fact += log(i); return fact; } double reduced_log_factorial(double n, double d) { double f = n - d; if (f < 0) return 0; else if (f == 0) return 0; else if (f == 1) return log(n); f = log(n); n--; while (n > d) { f += log(n); n--; } return f; } bool compare_pair(pair a, pair b) { return (a.second > b.second); } bool compare_pair_intdouble(pair a, pair b) { return (a.second < b.second); } bool compare_ints(int a, int b) { return (a > b); } bool compare_pair_snp(pair a, pair b) { return (a.second->col < b.second->col); } bool compare_pair_haplotype(pair a, pair b) { return (a.second > b.second); } bool compare_pair_haplotype_rev(pair a, pair b) { return (a.second < b.second); } stacks-1.35/src/utils.h000644 000765 000024 00000003774 12533677757 015643 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2010-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __UTILS_H__ #define __UTILS_H__ #include #include #include #include #include using std::cerr; using std::endl; #include using std::pair; using std::make_pair; #include "stacks.h" char reverse(char); char *rev_comp(const char *); void reverse_string(char *); int is_integer(const char *); double is_double(const char *); double factorial(double); double reduced_factorial(double, double); double log_factorial(double); double reduced_log_factorial(double, double); // // Comparison functions for the STL sort routine // bool compare_ints(int, int); bool compare_pair(pair, pair); bool compare_pair_intdouble(pair, pair); bool compare_pair_snp(pair, pair); bool compare_pair_haplotype(pair, pair); bool compare_pair_haplotype_rev(pair, pair); // // Comparison classes for STL sets // struct int_increasing { bool operator() (const int& lhs, const int& rhs) const { return lhs < rhs; } }; struct int_decreasing { bool operator() (const int& lhs, const int& rhs) const { return lhs > rhs; } }; #endif // __UTILS_H__ stacks-1.35/src/write.cc000644 000765 000024 00000016201 12574066143 015742 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013-2015, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // write.cc -- common routines for writing FASTA/FASTQ records to a file.. // // Julian Catchen // jcatchen@uoregon.edu // University of Oregon // #include "write.h" int write_fasta(ofstream *fh, Read *href, bool overhang) { char tile[id_len]; sprintf(tile, "%04d", href->tile); int offset = href->inline_bc_len; offset += overhang ? 1 : 0; if (href->fastq_type != generic_fastq) *fh << ">" << href->lane << "_" << tile << "_" << href->x << "_" << href->y << "_" << href->read << "\n" << href->seq + offset << "\n"; else *fh << ">" << href->machine << "_" << href->read << "\n" << href->seq + offset << "\n"; if (fh->fail()) return -1; return 1; } int write_fasta(gzFile *fh, Read *href, bool overhang) { stringstream sstr; char tile[id_len]; sprintf(tile, "%04d", href->tile); int offset = href->inline_bc_len; offset += overhang ? 1 : 0; if (href->fastq_type != generic_fastq) sstr << ">" << href->lane << "_" << tile << "_" << href->x << "_" << href->y << "_" << href->read << "\n" << href->seq + offset << "\n"; else sstr << ">" << href->machine << "_" << href->read << "\n" << href->seq + offset << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fasta(ofstream *fh, Seq *href) { *fh << ">" << href->id << "\n" << href->seq << "\n"; if (fh->fail()) return -1; return 1; } int write_fasta(gzFile *fh, Seq *href) { stringstream sstr; sstr << ">" << href->id << "\n" << href->seq << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fastq(ofstream *fh, Read *href, bool overhang) { // // Write the sequence and quality scores in FASTQ format. // char tile[id_len]; sprintf(tile, "%04d", href->tile); int offset = href->inline_bc_len; offset += overhang ? 1 : 0; if (href->fastq_type != generic_fastq) *fh << "@" << href->lane << "_" << tile << "_" << href->x << "_" << href->y << "_" << href->read << "\n" << href->seq + offset << "\n" << "+\n" << href->phred + offset << "\n"; else *fh << "@" << href->machine << "_" << href->read << "\n" << href->seq + offset << "\n" << "+\n" << href->phred + offset << "\n"; if (fh->fail()) return -1; return 1; } int write_fastq(gzFile *fh, Read *href, bool overhang) { // // Write the sequence and quality scores in FASTQ format. // stringstream sstr; char tile[id_len]; sprintf(tile, "%04d", href->tile); int offset = href->inline_bc_len; offset += overhang ? 1 : 0; if (href->fastq_type != generic_fastq) sstr << "@" << href->lane << "_" << tile << "_" << href->x << "_" << href->y << "_" << href->read << "\n" << href->seq + offset << "\n" << "+\n" << href->phred + offset << "\n"; else sstr << "@" << href->machine << "_" << href->read << "\n" << href->seq + offset << "\n" << "+\n" << href->phred + offset << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fastq(ofstream *fh, Seq *href, int offset) { *fh << "@" << href->id << "\n" << href->seq + offset << "\n" << "+\n" << href->qual + offset << "\n"; if (fh->fail()) return -1; return 1; } int write_fastq(gzFile *fh, Seq *href, int offset) { stringstream sstr; sstr << "@" << href->id << "\n" << href->seq + offset << "\n" << "+\n" << href->qual + offset << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fasta(ofstream *fh, Seq *href, int offset) { *fh << ">" << href->id << "\n" << href->seq + offset << "\n"; if (fh->fail()) return -1; return 1; } int write_fasta(gzFile *fh, Seq *href, int offset) { stringstream sstr; sstr << ">" << href->id << "\n" << href->seq + offset << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fastq(ofstream *fh, Seq *href) { *fh << "@" << href->id << "\n" << href->seq << "\n" << "+\n" << href->qual << "\n"; if (fh->fail()) return -1; return 1; } int write_fastq(gzFile *fh, Seq *href) { stringstream sstr; sstr << "@" << href->id << "\n" << href->seq << "\n" << "+\n" << href->qual << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fastq(ofstream *fh, Seq *href, string msg) { *fh << "@" << href->id << "|" << msg << "\n" << href->seq << "\n" << "+\n" << href->qual << "\n"; if (fh->fail()) return -1; return 1; } int write_fastq(gzFile *fh, Seq *href, string msg) { stringstream sstr; sstr << "@" << href->id << "|" << msg << "\n" << href->seq << "\n" << "+\n" << href->qual << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fasta(ofstream *fh, Seq *href, string msg) { *fh << ">" << href->id << "|" << msg << "\n" << href->seq << "\n"; if (fh->fail()) return -1; return 1; } int write_fasta(gzFile *fh, Seq *href, string msg) { stringstream sstr; sstr << ">" << href->id << "|" << msg << "\n" << href->seq << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fasta(ofstream *fh, Seq *href, Read *r) { *fh << ">" << href->id << "\n" << r->seq + r->inline_bc_len << "\n"; if (fh->fail()) return -1; return 1; } int write_fasta(gzFile *fh, Seq *href, Read *r) { stringstream sstr; sstr << ">" << href->id << "\n" << r->seq + r->inline_bc_len << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } int write_fastq(ofstream *fh, Seq *href, Read *r) { *fh << "@" << href->id << "\n" << r->seq + r->inline_bc_len << "\n" << "+\n" << r->phred + r->inline_bc_len << "\n"; if (fh->fail()) return -1; return 1; } int write_fastq(gzFile *fh, Seq *href, Read *r) { stringstream sstr; sstr << "@" << href->id << "\n" << r->seq + r->inline_bc_len << "\n" << "+\n" << r->phred + r->inline_bc_len << "\n"; int res = gzputs(*fh, sstr.str().c_str()); return res; } stacks-1.35/src/write.h000644 000765 000024 00000003602 12574066143 015605 0ustar00catchenstaff000000 000000 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- // // Copyright 2013, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // #ifndef __WRITE_H__ #define __WRITE_H__ #include using std::string; #include #include using std::ofstream; using std::cin; using std::cout; using std::cerr; using std::endl; #include #include #include "input.h" #include "clean.h" int write_fastq(ofstream *, Read *, bool); int write_fastq(ofstream *, Seq *); int write_fastq(ofstream *, Seq *, int); int write_fastq(ofstream *, Seq *, string); int write_fasta(ofstream *, Read *, bool); int write_fasta(ofstream *, Seq *); int write_fasta(ofstream *, Seq *, int); int write_fasta(ofstream *, Seq *, string); int write_fastq(ofstream *, Seq *, Read *); int write_fasta(ofstream *, Seq *, Read *); int write_fastq(gzFile *, Read *, bool); int write_fastq(gzFile *, Seq *); int write_fastq(gzFile *, Seq *, int); int write_fastq(gzFile *, Seq *, string); int write_fasta(gzFile *, Read *, bool); int write_fasta(gzFile *, Seq *); int write_fasta(gzFile *, Seq *, int); int write_fasta(gzFile *, Seq *, string); int write_fastq(gzFile *, Seq *, Read *); int write_fasta(gzFile *, Seq *, Read *); #endif // __WRITE_H__ stacks-1.35/sql/catalog_index.sql000644 000765 000024 00000003434 12441417455 017636 0ustar00catchenstaff000000 000000 create table catalog_index ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, cat_id int unsigned not null, tag_id int unsigned not null, snps int unsigned not null, parents int unsigned not null, progeny int unsigned not null, alleles int unsigned not null, marker enum('aa/bb', 'aa/b-', '-a/bb', 'ab/--', '--/ab', 'aa/ab', 'ab/aa', 'ab/a-', '-a/ab', 'ab/ab', 'ab/ac', 'ab/cd', 'ab/cc', 'cc/ab', 'ab/c-', '-c/ab', ''), uncor_marker enum('aa/bb', 'aa/b-', '-a/bb', 'ab/--', '--/ab', 'aa/ab', 'ab/aa', 'ab/a-', '-a/ab', 'ab/ab', 'ab/ac', 'ab/cd', 'ab/cc', 'cc/ab', 'ab/c-', '-c/ab', ''), valid_progeny int unsigned not null default 0, chisq_pval float not null default 1.0, lnl float not null default 0.0, ratio varchar(512), ests int unsigned not null, pe_radtags int unsigned not null, blast_hits int unsigned not null, geno_cnt int unsigned not null, chr varchar(32), bp int unsigned default 0, type enum('genomic', 'exon', 'intron'), ref_id int unsigned not null, INDEX batch_index (batch_id), INDEX tag_index (tag_id), INDEX snps_index (snps), INDEX parents_index (parents), INDEX progeny_index (progeny), INDEX allele_index (alleles), INDEX marker_index (marker), INDEX valid_index (valid_progeny), INDEX lnl_index (lnl), INDEX chisq_pval_index (chisq_pval), INDEX ests_index (ests), INDEX pe_rad_index (pe_radtags), INDEX hits_index (blast_hits), INDEX geno_index (geno_cnt), INDEX chr_index (chr), INDEX bp_index (bp), INDEX type_index (type) ); stacks-1.35/sql/chr_index.sql000644 000765 000024 00000000305 12335173442 016767 0ustar00catchenstaff000000 000000 create table chr_index ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, chr varchar(32), max_len int unsigned not null );stacks-1.35/sql/mysql.cnf.dist000644 000765 000024 00000000115 12533677757 017123 0ustar00catchenstaff000000 000000 [client] user=dbuser password=dbpass host=localhost port=3306 local-infile=1 stacks-1.35/sql/stacks.sql000644 000765 000024 00000026122 12533677757 016343 0ustar00catchenstaff000000 000000 SET default_storage_engine=MYISAM; create table batches ( id int unsigned not null primary key auto_increment, date DATE not null, description tinytext, type enum('map', 'population') ); create table samples ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, sample_id int unsigned not null, type enum('parent', 'progeny', 'sample'), file varchar(128), pop_id varchar(32), group_id varchar(32) ); create table populations ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, pop_id int unsigned not null, pop_name varchar(128) ); create table sumstats ( batch_id int unsigned not null, tag_id int unsigned not null, chr varchar(32), bp int unsigned default 0, col int unsigned default 0, pop_id varchar(32), p_nuc char, q_nuc char, n int unsigned default 0, p double, obs_het double, obs_hom double, exp_het double, exp_hom double, pi double, pi_s double, pi_s_pval double, fis double, fis_s double, fis_s_pval double, private int unsigned default 0, INDEX batch_id_index (batch_id), INDEX tag_id_index (tag_id) ); create table fst ( batch_id int unsigned not null, tag_id int unsigned not null, pop_id_1 varchar(32), pop_id_2 varchar(32), chr varchar(32), bp int unsigned default 0, col int unsigned default 0, pi_o double, fst double, fishers_p double, odds_ratio double, ci_low double, ci_high double, lod double, fst_c double, fst_s double, amova_fst double, amova_fst_c double, amova_fst_s double, amova_fst_s_pval double, snp_cnt int unsigned default 0, INDEX batch_id_index (batch_id), INDEX tag_id_index (tag_id) ); create table hapstats ( batch_id int unsigned not null, tag_id int unsigned not null, chr varchar(32), bp int unsigned default 0, pop_id varchar(32), n int unsigned default 0, hapcnt double, gene_div double, gene_div_s double, gene_div_pval double, hap_div double, hap_div_s double, hap_div_pval double, haplotypes tinytext, INDEX batch_id_index (batch_id), INDEX tag_id_index (tag_id) ); create table phist ( batch_id int unsigned not null, tag_id int unsigned not null, pop_id_1 varchar(32), pop_id_2 varchar(32), chr varchar(32), bp int unsigned default 0, phist double, phist_s double, phist_pval double, fpst double, fpst_s double, fpst_pval double, dest double, dest_s double, dest_pval double, INDEX batch_id_index (batch_id), INDEX tag_id_index (tag_id) ); create table catalog_tags ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, tag_id int unsigned not null, chr varchar(32), bp int unsigned default 0, strand enum('+', '-'), relationship enum('consensus', 'primary', 'secondary', 'tertiary'), sub_id int unsigned not null, merge_type tinytext, seq text, INDEX batch_id_index (batch_id), INDEX tag_id_index (tag_id) ); create table catalog_snps ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, tag_id int unsigned not null, col int unsigned not null, type enum('E', 'O', 'U'), lratio float, rank_1 char(1), rank_2 char(1), rank_3 char(1), rank_4 char(1), INDEX batch_index (batch_id), INDEX tag_index (tag_id) ); create table catalog_alleles ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, tag_id int unsigned not null, allele varchar(32), read_pct float, read_cnt int unsigned, INDEX batch_index (batch_id), INDEX tag_index (tag_id) ); create table catalog_genotypes ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, catalog_id int unsigned not null, sample_id int unsigned not null, genotype char(2), INDEX batch_index (batch_id), INDEX catalog_index (catalog_id), INDEX sample_index (sample_id) ); create table genotype_corrections ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, catalog_id int unsigned not null, sample_id int unsigned not null, genotype char(2), INDEX cat_index (catalog_id), INDEX batch_index (batch_id), INDEX sample_index (sample_id) ); create table catalog_annotations ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, catalog_id int unsigned not null, external_id varchar(64), INDEX batch_index (batch_id), INDEX catalog_index (catalog_id), INDEX external_index (external_id) ); create table pileup ( id int unsigned not null primary key auto_increment, sample_id int unsigned not null, tag_id int unsigned not null, chr varchar(32), bp int unsigned default 0, relationship enum('consensus', 'model', 'primary', 'secondary'), sub_id int unsigned not null, seq_id varchar(32), seq text, INDEX tag_id_index (tag_id), INDEX sample_id_index (sample_id), INDEX rel_index (relationship) ); create table unique_tags ( id int unsigned not null primary key auto_increment, sample_id int unsigned not null, tag_id int unsigned not null, chr varchar(32), bp int unsigned default 0, strand enum('+', '-'), relationship enum('consensus', 'model', 'primary', 'secondary'), sub_id int unsigned not null, seq_id varchar(32), seq text, deleveraged bool default false, blacklisted bool default false, removed bool default false, lnl float, INDEX tag_id_index (tag_id), INDEX sample_id_index (sample_id), INDEX rel_index (relationship) ); create table snps ( id int unsigned not null primary key auto_increment, sample_id int unsigned not null, tag_id int unsigned not null, col int unsigned not null, type enum('E', 'O', 'U'), lratio float, rank_1 char(1), rank_2 char(1), rank_3 char(1), rank_4 char(1), INDEX samp_index (sample_id), INDEX tag_index (tag_id) ); create table alleles ( id int unsigned not null primary key auto_increment, sample_id int unsigned not null, tag_id int unsigned not null, allele varchar(32), read_pct float, read_cnt int unsigned, INDEX samp_index (sample_id), INDEX tag_index (tag_id) ); create table matches ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, catalog_id int unsigned not null, sample_id int unsigned not null, tag_id int unsigned not null, allele varchar(256), depth int unsigned not null, lnl float, INDEX batch_id_index (batch_id), INDEX catalog_id_index (catalog_id), INDEX sample_id_index (sample_id), INDEX tag_id_index (tag_id) ); create table markers ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, catalog_id int unsigned not null, type enum('aa/bb', 'aa/b-', '-a/bb', 'ab/--', '--/ab', 'aa/ab', 'ab/aa', 'ab/a-', '-a/ab', 'ab/ab', 'ab/ac', 'ab/cd', 'ab/cc', 'cc/ab', 'ab/c-', '-c/ab'), progeny int unsigned not null default 0, max_pct float, ratio varchar(512), chisq_pval float, lnl float, geno_map text, uncor_type enum('aa/bb', 'aa/b-', '-a/bb', 'ab/--', '--/ab', 'aa/ab', 'ab/aa', 'ab/a-', '-a/ab', 'ab/ab', 'ab/ac', 'ab/cd', 'ab/cc', 'cc/ab', 'ab/c-', '-c/ab') ); create table sequence ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, catalog_id int unsigned not null, type enum('pe_radtag', 'est'), seq_id varchar(64), seq text, INDEX catalog_id_index (catalog_id) ); create table sequence_blast ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null default 0, catalog_id int unsigned not null default 0, seq_id int unsigned not null default 0, algorithm enum('blastn', 'blastp', 'blastx', 'tblastn', 'tblastx'), query_id varchar(64), query_len int unsigned not null default 0, hit_id varchar(128), hit_len int unsigned not null default 0, score double, e_value double, percent_ident double, hsp_rank int unsigned not null default 0, aln_len int unsigned not null default 0, aln_homology_str text, query_aln text, query_aln_start int unsigned not null default 0, query_aln_end int unsigned not null default 0, hit_aln text, hit_aln_start int unsigned not null default 0, hit_aln_end int unsigned not null default 0 ); create table chr_index ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, chr varchar(32), max_len int unsigned not null ); create table ref_radome ( id int unsigned not null primary key, chr varchar(32), bp int unsigned not null, strand enum('1', '-1'), type enum('genomic', 'exon', 'intron'), gene varchar(32), ext_id varchar(32), exon varchar(32), ex_start int unsigned not null, ex_end int unsigned not null, ex_index int unsigned not null, tran_cnt int unsigned not null, dist int unsigned not null, uniq bool default false, tran varchar(32) ); stacks-1.35/sql/tag_index.sql000644 000765 000024 00000001547 12335173442 016777 0ustar00catchenstaff000000 000000 create table tag_index ( id int unsigned not null primary key auto_increment, batch_id int unsigned not null, sample_id int unsigned not null, tag_id int unsigned not null, con_tag_id int unsigned not null, depth int unsigned not null, snps int unsigned not null, catalog_id int unsigned not null, deleveraged bool default false, blacklisted bool default false, removed bool default false, INDEX batch_index (batch_id), INDEX sample_index (sample_id), INDEX tag_index (tag_id), INDEX con_tag_index (con_tag_id), INDEX depth_index (depth), INDEX snps_index (snps), INDEX delv_index (deleveraged), INDEX black_index (blacklisted), INDEX rem_index (removed), INDEX catalog_index (catalog_id) ); stacks-1.35/scripts/convert_stacks.pl000644 000765 000024 00000015222 12441417455 020567 0ustar00catchenstaff000000 000000 #!/usr/bin/perl # # Copyright 2014, Julian Catchen # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # use strict; use constant stacks_version => "_VERSION_"; use constant true => 1; use constant false => 0; my $tags_midpt = 11; my $tags_len = 13; my $snps_midpt = 3; my $snps_len = 9; my $debug = 0; my $in_path = ""; my $out_path = ""; my $gzipped = false; parse_command_line(); my (@files, @catalog_files); build_file_list(\@files, \@catalog_files); my ($file, $num_files, $i, $key); $num_files = scalar(@catalog_files); $i = 1; foreach $file (@catalog_files) { printf(STDERR "Converting catalog files, file % 2s of % 2s [%s]\n", $i, $num_files, $file); convert_tags_file($in_path, $out_path, $file . ".catalog"); convert_snps_file($in_path, $out_path, $file . ".catalog"); # # Just copy the unchanged *.alleles.tsv files. # if ($gzipped == true) { `cp $in_path/${file}.catalog.alleles.tsv.gz $out_path/.`; } else { `cp $in_path/${file}.catalog.alleles.tsv $out_path/.`; } $i++; } $num_files = scalar(@files); $i = 1; foreach $file (@files) { printf(STDERR "Converting sample files, file % 2s of % 2s [%s]\n", $i, $num_files, $file); convert_tags_file($in_path, $out_path, $file); convert_snps_file($in_path, $out_path, $file); convert_matches_file($in_path, $out_path, $file); # # Just copy the unchanged *.alleles.tsv files. # if ($gzipped == true) { `cp $in_path/${file}.alleles.tsv.gz $out_path/.`; } else { `cp $in_path/${file}.alleles.tsv $out_path/.`; } $i++; } sub convert_matches_file { my ($in_path, $out_path, $file) = @_; my ($path, $in_fh, $out_fh, $line, @parts); $path = $in_path . "/" . $file . ".matches.tsv"; if ($gzipped) { $path .= ".gz"; open($in_fh, "gunzip -c $path |") or die("Unable to open matches file '$path', $!\n"); } else { open($in_fh, "<$path") or die("Unable to open matches file '$path', $!\n"); } $path = $out_path . "/" . $file . ".matches.tsv"; open($out_fh, ">$path") or die("Unable to open tags output file '$path', $!\n"); while ($line = <$in_fh>) { chomp $line; @parts = split(/\t/, $line); print $out_fh join("\t", @parts), "\t", "0.0\n"; # Missing column } close($in_fh); close($out_fh); `gzip -f $path` if ($gzipped); } sub convert_tags_file { my ($in_path, $out_path, $file) = @_; my ($path, $in_fh, $out_fh, $line, @parts); $path = $in_path . "/" . $file . ".tags.tsv"; if ($gzipped) { $path .= ".gz"; open($in_fh, "gunzip -c $path |") or die("Unable to open tags file '$path', $!\n"); } else { open($in_fh, "<$path") or die("Unable to open tags file '$path', $!\n"); } $path = $out_path . "/" . $file . ".tags.tsv"; open($out_fh, ">$path") or die("Unable to open tags output file '$path', $!\n"); while ($line = <$in_fh>) { chomp $line; if ($parts[6] eq "consensus") { print $out_fh $line, "\t", "0.0\n"; # Missing column } else { print $out_fh $line, "\t", "\n", # Missing column } } close($in_fh); close($out_fh); `gzip -f $path` if ($gzipped); } sub convert_snps_file { my ($in_path, $out_path, $file) = @_; my ($path, $in_fh, $out_fh, $line, @parts); $path = $in_path . "/" . $file . ".snps.tsv"; if ($gzipped) { $path .= ".gz"; open($in_fh, "gunzip -c $path |") or die("Unable to open tags file '$path', $!\n"); } else { open($in_fh, "<$path") or die("Unable to open tags file '$path', $!\n"); } $path = $out_path . "/" . $file . ".snps.tsv"; open($out_fh, ">$path") or die("Unable to open tags output file '$path', $!\n"); while ($line = <$in_fh>) { chomp $line; @parts = split(/\t/, $line); print $out_fh join("\t", @parts[0 .. $snps_midpt]), "\t", "E\t", # Missing column join("\t", @parts[$snps_midpt+1 .. $snps_len-1]), "\n"; } close($in_fh); close($out_fh); `gzip -f $path` if ($gzipped); } sub build_file_list { my ($files, $catalog_files) = @_; my (@wl, @ls, $line, $prefix); @ls = `ls -1 $in_path/*.tags.tsv* 2> /dev/null`; if (scalar(@ls) == 0) { print STDERR "Unable to locate any input files to process within '$in_path'\n"; usage(); } foreach $line (@ls) { chomp $line; if ($line =~ /\.tags\.tsv\.gz$/) { $gzipped = true; ($prefix) = ($line =~ /$in_path\/(.+)\.tags\.tsv\.gz/); } else { ($prefix) = ($line =~ /$in_path\/(.+)\.tags\.tsv/); } next if ($prefix =~ /catalog/); push(@{$files}, $prefix); } @ls = `ls -1 $in_path/*.catalog.tags.tsv* 2> /dev/null`; if (scalar(@ls) == 0) { print STDERR "Unable to locate any catalog input files to process within '$in_path'\n"; usage(); } foreach $line (@ls) { chomp $line; if ($line =~ /\.catalog\.tags\.tsv\.gz$/) { $gzipped = true; ($prefix) = ($line =~ /$in_path\/(.+)\.catalog\.tags\.tsv\.gz/); } else { ($prefix) = ($line =~ /$in_path\/(.+)\.catalog\.tags\.tsv/); } push(@{$catalog_files}, $prefix); } } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-p$/) { $in_path = shift @ARGV; } elsif ($_ =~ /^-o$/) { $out_path = shift @ARGV; } elsif ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } $in_path = substr($in_path, 0, -1) if (substr($in_path, -1) eq "/"); $out_path = substr($out_path, 0, -1) if (substr($out_path, -1) eq "/"); if ($in_path eq $out_path) { print STDERR "Input and output paths cannot be the same.\n"; usage(); } } sub version { print STDERR "convert_stacks.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR < # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Process the data for a genetic map: build stacks in parents and progeny, # create a catalog from the parents, and match progeny against the catatlog. # Call genotypes, and load all data into an MySQL database along the way. # # For the database interactions to work, the 'mysql' program is expected to be # on the path and sufficient permissions set to access the specified database. # use strict; use POSIX; use File::Temp qw/ mktemp /; use File::Spec; use constant stacks_version => "_VERSION_"; my $dry_run = 0; my $sql = 1; my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf"; my $exe_path = "_BINDIR_"; my $out_path = ""; my $popmap_path = ""; my $db = ""; my $data_type = "map"; my $min_cov = 0; my $min_rcov = 0; my $batch_id = -1; my $sample_id = 1; my $desc = ""; # Database description of this dataset my $date = ""; # Date relevent to this data, formatted for SQL: 2009-05-31 my @parents; my @progeny; my @samples; my (@_ustacks, @_cstacks, @_sstacks, @_genotypes, @_populations); my $cmd_str = $0 . " " . join(" ", @ARGV); parse_command_line(); check_input_files(\@parents, \@progeny, \@samples); my $cnf = (-e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config; # # Check for the existence of the necessary pipeline programs # die ("Unable to find '" . $exe_path . "ustacks'.\n") if (!-e $exe_path . "ustacks" || !-x $exe_path . "ustacks"); die ("Unable to find '" . $exe_path . "cstacks'.\n") if (!-e $exe_path . "cstacks" || !-x $exe_path . "cstacks"); die ("Unable to find '" . $exe_path . "sstacks'.\n") if (!-e $exe_path . "sstacks" || !-x $exe_path . "sstacks"); die ("Unable to find '" . $exe_path . "genotypes'.\n") if (!-e $exe_path . "genotypes" || !-x $exe_path . "genotypes"); die ("Unable to find '" . $exe_path . "populations'.\n") if (!-e $exe_path . "populations" || !-x $exe_path . "populations"); die ("Unable to find '" . $exe_path . "index_radtags.pl'.\n") if (!-e $exe_path . "index_radtags.pl" || !-x $exe_path . "index_radtags.pl"); my ($i, $log, $log_fh, $pipe_fh, $pfile, $file, $num_files, $parent, $sample, %map); $i = 1; $num_files = scalar(@parents) + scalar(@progeny) + scalar(@samples); my (@types, $type, @pop_ids, $pop, %pops, @grp_ids, $grp, %grps); parse_population_map(\@samples, \@pop_ids, \%pops, \@grp_ids, \%grps) if ($data_type eq "population"); foreach $parent (@parents) { push(@types, "parent"); push(@pop_ids, "1"); push(@grp_ids, "1"); } foreach $parent (@progeny) { push(@types, "progeny"); push(@pop_ids, "1"); push(@grp_ids, "1"); } foreach $parent (@samples) { push(@types, "sample"); } my (@results, $minc, $minrc, $cmd, $ppath, $pop_cnt); $pop_cnt = scalar(keys %pops); $minc = $min_cov > 0 ? "-m $min_cov" : ""; $minrc = $min_rcov > 0 ? "-m $min_rcov" : $minc; # # Open the log file # $log = "$out_path/denovo_map.log"; open($log_fh, ">$log") or die("Unable to open log file '$log'; $!\n"); print $log_fh "denovo_map.pl version ", stacks_version, " started at ", strftime("%Y-%m-%d %H:%M:%S",(localtime(time))), "\n", $cmd_str, "\n"; if ($sql == 1) { # # SQL Batch ID for this set of Radtags, along with description and date of # sequencing. Insert this batch data into the database. # `mysql --defaults-file=$cnf $db -e "INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$data_type'"` if ($dry_run == 0); print $log_fh "mysql --defaults-file=$cnf $db -e \"INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$data_type'\"\n"; } my $gzip = 0; foreach $sample (@parents, @progeny, @samples) { my ($ftype, $pfile) = ""; my ($prefix, $suffix) = ($sample =~ /^(.+)\.(.+)$/); if ($suffix eq "gz") { $gzip = 1; ($prefix, $suffix) = ($prefix =~ /^(.+)\.(.+)$/); } if ($prefix =~ /^.*\/.+$/) { ($pfile) = ($prefix =~ /^.*\/(.+)$/); } else { $pfile = $prefix; } if ($gzip == 1) { if ($suffix =~ /^fa_?\d?$/ || $suffix =~ /^fasta_?\d?$/) { $ftype = "gzfasta"; } elsif ($suffix =~ /^fq_?\d?$/ || $suffix =~ /^fastq_?\d?$/) { $ftype = "gzfastq"; } else { die("Unknown input file type.\n"); } } else { if ($suffix =~ /^fa_?\d?$/ || $suffix =~ /^fasta_?\d?$/) { $ftype = "fasta"; } elsif ($suffix =~ /^fq_?\d?$/ || $suffix =~ /^fastq_?\d?$/) { $ftype = "fastq"; } else { die("Unknown input file type.\n"); } } $type = shift @types; $pop = shift @pop_ids; $grp = shift @grp_ids; printf("Identifying unique stacks; file % 3s of % 3s [%s]\n", $i, $num_files, $pfile); printf($log_fh "Identifying unique stacks; file % 3s of % 3s [%s]\n", $i, $num_files, $pfile); if ($sql == 1) { if ($dry_run == 0) { `mysql --defaults-file=$cnf $db -e "INSERT INTO samples SET sample_id=$i, batch_id=$batch_id, type='$type', file='$pfile', pop_id='$pop', group_id='$grp'"`; @results = `mysql --defaults-file=$cnf $db -N -B -e "SELECT id FROM samples WHERE sample_id=$i AND batch_id=$batch_id AND type='$type' AND file='$pfile'"`; chomp $results[0]; $sample_id = $results[0]; } print $log_fh "mysql --defaults-file=$cnf $db -e \"INSERT INTO samples SET sample_id=$i, batch_id=$batch_id, type='$type', file='$pfile', pop_id='$pop', group_id='$grp'\"\n"; } $map{$pfile} = $sample_id; if ($type eq "parent" || $type eq "sample") { $cmd = $exe_path . "ustacks -t $ftype -f $sample -o $out_path -i $sample_id $minc " . join(" ", @_ustacks) . " 2>&1"; } elsif ($type eq "progeny") { $cmd = $exe_path . "ustacks -t $ftype -f $sample -o $out_path -i $sample_id $minrc " . join(" ", @_ustacks) . " 2>&1"; } print STDERR " $cmd\n"; print $log_fh "$cmd\n"; @results = `$cmd` if ($dry_run == 0); write_results(\@results, $log_fh); print STDERR " Loading ustacks output to $db..." if ($sql == 1); if ($gzip == 1) { $file = "$out_path/$pfile" . ".tags.tsv.gz"; import_gzsql_file($log_fh, $file, "unique_tags", 1); $file = "$out_path/$pfile" . ".snps.tsv.gz"; import_gzsql_file($log_fh, $file, "snps", 1); $file = "$out_path/$pfile" . ".alleles.tsv.gz"; import_gzsql_file($log_fh, $file, "alleles", 1); } else { $file = "$out_path/$pfile" . ".tags.tsv"; import_sql_file($log_fh, $file, "unique_tags", 1); $file = "$out_path/$pfile" . ".snps.tsv"; import_sql_file($log_fh, $file, "snps", 1); $file = "$out_path/$pfile" . ".alleles.tsv"; import_sql_file($log_fh, $file, "alleles", 1); } print STDERR "done.\n" if ($sql == 1); $i++; $sample_id++ if ($sql == 0); } my ($rid, $pfile, $parents, $cat_file); # # Generate catalog of RAD-Tags # print STDERR "Generating catalog...\n"; foreach $sample (@parents, @samples) { my ($prefix, $suffix) = ($sample =~ /^(.+)\.(.+)$/); if ($suffix eq "gz") { ($prefix, $suffix) = ($prefix =~ /^(.+)\.(.+)$/); } if ($prefix =~ /^.*\/.+$/) { ($pfile) = ($prefix =~ /^.*\/(.+)$/); } else { $pfile = $prefix; } $parents .= "-s $out_path/$pfile "; } $cat_file = "batch_" . $batch_id; $cmd = $exe_path . "cstacks -b $batch_id -o $out_path $parents " . join(" ", @_cstacks) . " 2>&1"; print STDERR " $cmd\n"; print $log_fh "$cmd\n"; if ($dry_run == 0) { open($pipe_fh, "$cmd |"); while (<$pipe_fh>) { print $log_fh $_; if ($_ =~ /failed/i) { print STDERR "Catalog construction failed.\n"; exit(1); } } close($pipe_fh); } print STDERR " Importing catalog to MySQL database..." if ($sql == 1); if ($gzip == 1) { $file = "$out_path/$cat_file" . ".catalog.tags.tsv.gz"; import_gzsql_file($log_fh, $file, "catalog_tags", 1); $file = "$out_path/$cat_file" . ".catalog.snps.tsv.gz"; import_gzsql_file($log_fh, $file, "catalog_snps", 1); $file = "$out_path/$cat_file" . ".catalog.alleles.tsv.gz"; import_gzsql_file($log_fh, $file, "catalog_alleles", 1); } else { $file = "$out_path/$cat_file" . ".catalog.tags.tsv"; import_sql_file($log_fh, $file, "catalog_tags", 1); $file = "$out_path/$cat_file" . ".catalog.snps.tsv"; import_sql_file($log_fh, $file, "catalog_snps", 1); $file = "$out_path/$cat_file" . ".catalog.alleles.tsv"; import_sql_file($log_fh, $file, "catalog_alleles", 1); } print STDERR "done.\n" if ($sql == 1); # # Match parents and progeny to the catalog # $i = 1; $num_files = scalar(@parents) + scalar(@progeny) + scalar(@samples); foreach $sample (@parents, @progeny, @samples) { my ($prefix, $suffix) = ($sample =~ /^(.+)\.(.+)$/); if ($suffix eq "gz") { ($prefix, $suffix) = ($prefix =~ /^(.+)\.(.+)$/); } if ($prefix =~ /^.*\/.+$/) { ($pfile) = ($prefix =~ /^.*\/(.+)$/); } else { $pfile = $prefix; } printf(STDERR "Matching samples to catalog; file % 3s of % 3s [%s]\n", $i, $num_files, $pfile); $cmd = $exe_path . "sstacks -b $batch_id -c $out_path/$cat_file -s $out_path/$pfile -o $out_path " . join(" ", @_sstacks) . " 2>&1"; print STDERR " $cmd\n"; print $log_fh "$cmd\n"; @results = `$cmd` if ($dry_run == 0); print $log_fh @results; print STDERR " Loading sstacks output to $db..." if ($sql == 1); if ($gzip == 1) { $file = "$out_path/" . $pfile . ".matches.tsv.gz"; import_gzsql_file($log_fh, $file, "matches", 1); } else { $file = "$out_path/" . $pfile . ".matches.tsv"; import_sql_file($log_fh, $file, "matches", 1); } print STDERR "done.\n" if ($sql == 1); $i++; } if ($data_type eq "map") { # # Generate a set of observed haplotypes and a set of markers and generic genotypes # printf(STDERR "Generating genotypes...\n"); $cmd = $exe_path . "genotypes -b $batch_id -P $out_path -r 1 -c -s " . join(" ", @_genotypes) . " 2>&1"; print STDERR "$cmd\n"; print $log_fh "$cmd\n"; if ($dry_run == 0) { open($pipe_fh, "$cmd |"); while (<$pipe_fh>) { print $log_fh $_; } close($pipe_fh); } $file = "$out_path/batch_" . $batch_id . ".markers.tsv"; import_sql_file($log_fh, $file, "markers", 1); $file = "$out_path/batch_" . $batch_id . ".genotypes_1.txt"; import_sql_file($log_fh, $file, "catalog_genotypes", 1); } else { printf(STDERR "Calculating population-level summary statistics\n"); $cmd = $exe_path . "populations -b $batch_id -P $out_path -s " . join(" ", @_populations) . " 2>&1"; print STDERR "$cmd\n"; print $log_fh "$cmd\n"; if ($dry_run == 0) { open($pipe_fh, "$cmd |"); while (<$pipe_fh>) { print $log_fh $_; } close($pipe_fh); } $file = "$out_path/batch_" . $batch_id . ".markers.tsv"; import_sql_file($log_fh, $file, "markers", 1); $file = "$out_path/batch_" . $batch_id . ".sumstats.tsv"; import_sql_file($log_fh, $file, "sumstats", $pop_cnt+1); $file = "$out_path/batch_" . $batch_id . ".hapstats.tsv"; import_sql_file($log_fh, $file, "hapstats", $pop_cnt+1); # # Import the Fst files. # my $fst_cnt = 0; my (@keys, $m, $n); @keys = sort keys %pops; for ($m = 0; $m < scalar(@keys); $m++) { for ($n = 0; $n < scalar(@keys); $n++) { $file = "$out_path/batch_" . $batch_id . ".fst_" . $keys[$m] . "-" . $keys[$n] . ".tsv"; if (-e $file) { import_sql_file($log_fh, $file, "fst", 1); $fst_cnt++; } } } print STDERR "Imported $fst_cnt Fst file(s).\n"; # # Import the Phi_st files. # $fst_cnt = 0; for ($m = 0; $m < scalar(@keys); $m++) { for ($n = 0; $n < scalar(@keys); $n++) { $file = "$out_path/batch_" . $batch_id . ".phistats_" . $keys[$m] . "-" . $keys[$n] . ".tsv"; if (-e $file) { import_sql_file($log_fh, $file, "phist", 3); $fst_cnt++; } } } print STDERR "Imported $fst_cnt Haplotype Fst file(s).\n"; } if ($sql) { # # Index the radtags database # print STDERR "Indexing the database...\n"; $cmd = $exe_path . "index_radtags.pl -D $db -t -c 2>&1"; print STDERR "$cmd\n"; print $log_fh "$cmd\n"; @results = `$cmd` if ($dry_run == 0); print $log_fh @results; } print $log_fh "denovo_map.pl completed at ", strftime("%Y-%m-%d %H:%M:%S",(localtime(time))), "\n"; close($log_fh); sub parse_population_map { my ($samples, $pop_ids, $pops, $grp_ids, $grps) = @_; my ($fh, @parts, $line, %ids, $file, $path); if (length($popmap_path) == 0) { foreach $path (@{$samples}) { push(@{$pop_ids}, "1"); push(@{$grp_ids}, "1"); $pops->{"1"}++; $grps->{"1"}++; } return; } open($fh, "<$popmap_path") or die("Unable to open population map, '$popmap_path', $!\n"); while ($line = <$fh>) { chomp $line; @parts = split(/\t/, $line); if (scalar(@parts) > 3) { die("Unable to parse population map, '$popmap_path' (map should contain no more than three columns).\n"); } $ids{$parts[0]} = $parts[1]; if (scalar(@parts) > 2) { push(@{$grp_ids}, $parts[2]); $grps->{$parts[2]}++; } } if (scalar(keys %{$grps}) == 0) { $grps->{"1"}++; } foreach $path (@{$samples}) { my ($prefix, $suffix); if ($path =~ /^.+\..+\.gz$/) { ($prefix, $suffix) = ($path =~ /^(.+)\.(.+)\.gz$/); } else { ($prefix, $suffix) = ($path =~ /^(.+)\.(.+)$/); } if ($prefix =~ /^.*\/.+$/) { ($file) = ($prefix =~ /^.*\/(.+)$/); } else { $file = $prefix; } if (!defined($ids{$file})) { die("Unable to find '$file' in the population map, '$popmap_path'.\n"); } push(@{$pop_ids}, $ids{$file}); $pops->{$ids{$file}}++; } print STDERR "Parsed population map: ", scalar(@{$samples}), " files in ", scalar(keys %{$pops}); scalar(keys %{$pops}) == 1 ? print STDERR " population" : print STDERR " populations"; print STDERR " and ", scalar(keys %{$grps}); scalar(keys %{$grps}) == 1 ? print STDERR " group.\n" : print STDERR " groups.\n"; close($fh); } sub check_input_files { my ($parents, $progeny, $samples) = @_; # # Check that no duplicate files were specified. # my (%files, $file); foreach $file (@{$parents}, @{$progeny}, @{$samples}) { $files{$file}++; } foreach $file (keys %files) { if ($files{$file} > 1) { print STDERR "A duplicate file was specified which may create undefined results, '$file'\n"; usage(); } } # # Check that all the files exist and are accessible. # foreach $file (@{$parents}) { if (!-e $file) { print STDERR "Unable to locate parental file '$file'\n"; usage(); } } print STDERR "Found ", scalar(@{$parents}), " parental file(s).\n" if (scalar(@{$parents}) > 0); foreach $file (@{$progeny}) { if (!-e $file) { print STDERR "Unable to locate progeny file '$file'\n"; usage(); } } print STDERR "Found ", scalar(@{$progeny}), " progeny file(s).\n" if (scalar(@{$progeny}) > 0); foreach $file (@{$samples}) { if (!-e $file) { print STDERR "Unable to locate sample file '$file'\n"; usage(); } } print STDERR "Found ", scalar(@{$samples}), " sample file(s).\n" if (scalar(@{$samples}) > 0); } sub write_results { my ($results, $log_fh) = @_; my $line; foreach $line (@{$results}) { if ($line =~ /\r/) { $line =~ s/^.+\r(.*\n)$/\1/; } print $log_fh $line; } } sub import_sql_file { my ($log_fh, $file, $table, $skip_lines) = @_; my (@results, $ignore); $ignore = "IGNORE $skip_lines LINES" if ($skip_lines > 0); @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table $ignore"` if ($sql == 1 && $dry_run == 0); if ($sql == 1) { print $log_fh "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$file' INTO TABLE $table $ignore\"\n", @results; } } sub import_gzsql_file { my ($log_fh, $file, $table, $skip_lines) = @_; my (@results, $ignore); $ignore = "IGNORE $skip_lines LINES" if ($skip_lines > 0); # # Get a temporary file name and create a named pipe. # my $tmpdir = File::Spec->tmpdir(); my $named_pipe = mktemp($tmpdir . "/denovo_map_XXXXXX"); if ($sql == 1 && $dry_run == 0) { mkfifo($named_pipe, 0700) || die("Unable to create named pipe for loading gzipped data: $named_pipe, $!"); print $log_fh "Streaming $file into named pipe $named_pipe.\n"; } # # Dump our gzipped data onto the named pipe. # system("gunzip -c $file > $named_pipe &") if ($sql == 1 && $dry_run == 0); @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore"` if ($sql == 1 && $dry_run == 0); if ($sql == 1) { print $log_fh "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore\"\n", @results; } # # Remove the pipe. # unlink($named_pipe) if ($sql == 1 && $dry_run == 0); } sub parse_command_line { my ($arg); while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-p$/) { push(@parents, shift @ARGV); } elsif ($_ =~ /^-r$/) { push(@progeny, shift @ARGV); } elsif ($_ =~ /^-s$/) { push(@samples, shift @ARGV); } elsif ($_ =~ /^-d$/) { $dry_run++; } elsif ($_ =~ /^-o$/) { $out_path = shift @ARGV; } elsif ($_ =~ /^-D$/) { $desc = shift @ARGV; } elsif ($_ =~ /^-e$/) { $exe_path = shift @ARGV; } elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; } elsif ($_ =~ /^-i$/) { $sample_id = shift @ARGV; } elsif ($_ =~ /^-a$/) { $date = shift @ARGV; } elsif ($_ =~ /^-S$/) { $sql = 0; } elsif ($_ =~ /^-B$/) { $db = shift @ARGV; } elsif ($_ =~ /^-m$/) { $min_cov = shift @ARGV; } elsif ($_ =~ /^-P$/) { $min_rcov = shift @ARGV; } elsif ($_ =~ /^-O$/) { $popmap_path = shift @ARGV; push(@_populations, "-M " . $popmap_path); } elsif ($_ =~ /^-A$/) { $arg = shift @ARGV; push(@_genotypes, "-t " . $arg); $arg = lc($arg); if ($arg ne "gen" && $arg ne "cp" && $arg ne "f2" && $arg ne "bc1" && $arg ne "dh") { print STDERR "Unknown genetic mapping cross specified: '$arg'\n"; usage(); } } elsif ($_ =~ /^-t$/) { push(@_ustacks, "-d -r"); } elsif ($_ =~ /^-T$/) { $arg = shift @ARGV; push(@_ustacks, "-p " . $arg); push(@_cstacks, "-p " . $arg); push(@_sstacks, "-p " . $arg); push(@_populations, "-t " . $arg); } elsif ($_ =~ /^-M$/) { push(@_ustacks, "-M " . shift @ARGV); } elsif ($_ =~ /^-N$/) { push(@_ustacks, "-N " . shift @ARGV); } elsif ($_ =~ /^-n$/) { push(@_cstacks, "-n " . shift @ARGV); } elsif ($_ =~ /^-H$/) { push(@_ustacks, "-H "); } elsif ($_ =~ /^--bound_low$/) { push(@_ustacks, "--bound_low " . shift @ARGV); push(@_ustacks, "--model_type bounded"); } elsif ($_ =~ /^--bound_high$/) { push(@_ustacks, "--bound_high " . shift @ARGV); push(@_ustacks, "--model_type bounded"); } elsif ($_ =~ /^--alpha$/) { push(@_ustacks, "--alpha " . shift @ARGV); } elsif ($_ =~ /^-X$/) { # # Pass an arbitrary command-line option to a pipeline program. # # Command line option must be of the form '-X "program:option"' # $arg = shift @ARGV; my ($prog, $opt) = ($arg =~ /^(\w+):(.+)$/); if ($prog eq "ustacks") { push(@_ustacks, $opt); } elsif ($prog eq "cstacks") { push(@_cstacks, $opt); } elsif ($prog eq "sstacks") { push(@_sstacks, $opt); } elsif ($prog eq "genotypes") { push(@_genotypes, $opt); } elsif ($prog eq "populations") { push(@_populations, $opt); } else { print STDERR "Unknown pipeline program, '$arg'\n"; usage(); } } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } $exe_path = $exe_path . "/" if (substr($exe_path, -1) ne "/"); $out_path = substr($out_path, 0, -1) if (substr($out_path, -1) eq "/"); if ($batch_id !~ /^\d+$/ || $batch_id < 0) { print STDERR "You must specify a batch ID and it must be an integer (e.g. 1, 2, 3).\n"; usage(); } if ($sql > 0 && length($date) == 0) { $date = strftime("%Y-%m-%d", (localtime(time))); } if (scalar(@parents) > 0 && scalar(@samples) > 0) { print STDERR "You must specify either parent or sample files, but not both.\n"; usage(); } if (scalar(@parents) == 0 && scalar(@samples) == 0) { print STDERR "You must specify at least one parent or sample file.\n"; usage(); } if (scalar(@samples) > 0) { $data_type = "population"; } else { $data_type = "map"; } } sub version { print STDERR "denovo_map.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR <: lower bound for epsilon, the error rate, between 0 and 1.0 (default 0). --bound_high : upper bound for epsilon, the error rate, between 0 and 1.0 (default 1). --alpha : chi square significance level required to call a heterozygote or homozygote, either 0.1, 0.05 (default), 0.01, or 0.001. Arbitrary command line options: -X "program:option": pass a command line option to one of the pipeline components, e.g.'-X "ustacks:--max_locus_stacks 4"'. EOQ exit(0); } stacks-1.35/scripts/exec_velvet.pl000644 000765 000024 00000021334 12441417455 020051 0ustar00catchenstaff000000 000000 #!/usr/bin/env perl # # Copyright 2011, Julian Catchen # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # use strict; use constant stacks_version => "_VERSION_"; my $debug = 0; my $paired = 0; my $amos = 0; my $ins_len_dist = 0; my $single_path = ""; my $paired_path = ""; my $sanger_path = ""; my $out_path = "."; my $white_list = ""; my $hash_len = 27; my $insert_len = 0; my $exp_cov = 0; my $cov_cut = 0.0; my $min_len = 0; my $read_trk = 0; my $clean = 1; my $collate = 0; my $exe_path = ""; my $velveth = "velveth"; my $velvetg = "velvetg"; parse_command_line(); if (length($exe_path) > 0) { $velveth = $exe_path . "/" . $velveth; $velvetg = $exe_path . "/" . $velvetg; } # # Test that we can execute the velvet programs # die ("Unable to find '" . $velveth . "'.\n") if (!-e $velveth || !-x $velveth); die ("Unable to find '" . $velvetg . "'.\n") if (!-e $velvetg || !-x $velvetg); my (@locus_files, $num_files, $file, $input_file, $output_file, $hres_file, $gres_file, $collate_fh); build_file_list(\@locus_files); $num_files = scalar(@locus_files); if ($collate) { open($collate_fh, ">$out_path/collated.fa") or die("Unable to open collate file, $!\n"); } my ($sing_data, $pair_data, $sang_data, $ins, $cov, $afg, $cut, $min, $read, $cln); $ins = $paired > 0 ? "-ins_length $insert_len" : "-ins_length auto"; $cov = $paired > 0 ? "-exp_cov $exp_cov" : "-exp_cov auto"; $cut = $cov_cut > 0 ? "-cov_cutoff $cov_cut" : "-cov_cutoff auto"; $read = $read_trk > 0 ? "-read_trkg yes" : ""; $cln = $clean > 0 ? "-very_clean yes" : ""; #$min = $min_len > 0 ? "-min_contig_lgth $min_len" : "" # # Write out the parameters for this assembly # open(PARAM, "> $out_path/velvet_parameters.txt") or die("Unable to open parameter file: $!\n"); print PARAM "Single-end Path: ", $single_path, "\n", "Paired-end Path: ", $paired_path, "\n", "Sanger Path: ", $sanger_path, "\n", "Hash Length: ", $hash_len, "\n", "Insert Length: ", $ins, "\n", "Coverage: ", $cov, "\n", "Coverage Cutoff: ", $cut, "\n", "Miniumum contig length: ", $min, "\n", "Read tracking: ", $read, "\n", "Very Clean: ", $cln, "\n"; close(PARAM); my $i = 1; foreach $file (@locus_files) { ($file) = ($file =~ /(.+)\.fa/); $output_file = $out_path . "/" . $file; $hres_file = $out_path . "/" . $file . "-h.output"; $gres_file = $out_path . "/" . $file . "-g.output"; $sing_data = length($single_path) > 0 ? '-short ' . $single_path . "/" . $file . ".fa" : ""; $pair_data = length($paired_path) > 0 ? '-shortPaired ' . $paired_path . "/" . $file . ".fa" : ""; $sang_data = length($sanger_path) > 0 ? '-long ' . $sanger_path . "/" . $file . ".fa" : ""; print STDERR "Assembling locus '$file'; run $i of $num_files \r"; # Execute velveth to build hash table, then velvetg to assemble print STDERR "$velveth $output_file $hash_len -fasta $sing_data $pair_data $sang_data &> $hres_file\n" if ($debug); `$velveth $output_file $hash_len -fasta $sing_data $pair_data $sang_data &> $hres_file`; print STDERR "$velvetg $output_file $ins $cov $cut $min $read $cln &> $gres_file\n" if ($debug); `$velvetg $output_file $ins $cov $cut $min $read $cln &> $gres_file`; collate_and_clean($out_path, $file, $collate_fh) if ($collate); $i++; } close($collate_fh) if ($collate); sub collate_and_clean { my ($out_path, $file, $collate_fh) = @_; my (@seqs, $seq); parse_fasta("$out_path/$file/contigs.fa", \@seqs); foreach $seq (@seqs) { next if (length($seq->{'seq'}) < $min_len); $seq->{'id'} = $file . "|" . $seq->{'id'}; print_fasta($collate_fh, $seq); } `rm $out_path/$file-g.output`; `rm $out_path/$file-h.output`; `rm -r $out_path/$file`; } sub parse_fasta { my ($file, $seqs) = @_; my ($fh, $line, $buf, $id, $seq); open($fh, "<$file") or die("Unable to open Velvet output file: $file, $!\n"); while ($line = <$fh>) { chomp $line; if (substr($line, 0, 1) eq ">") { if (length($buf) > 0) { $seq = {}; $seq->{'id'} = $id; $seq->{'seq'} = $buf; push(@{$seqs}, $seq); $buf = ""; } $id = substr($line, 1); } else { $buf .= $line; } } if (length($buf) > 0 && length($id) > 0) { $seq = {}; $seq->{'id'} = $id; $seq->{'seq'} = $buf; push(@{$seqs}, $seq); } close($fh); } sub print_fasta { my ($fh, $seq) = @_; my ($s); print $fh ">", $seq->{'id'}, "\n"; $s = $seq->{'seq'}; while (length($s) > 60) { print $fh substr($s, 0, 60), "\n"; $s = substr($s, 60); } print $fh $s, "\n" if (length($s) > 0); } sub build_file_list { my ($files) = @_; my (@ls, $line, $file, $path); # Load a white list of files to process if it is supplied. my @wl; if (length($white_list) > 0) { load_white_list(\@wl); } $path = length($paired_path) > 0 ? $paired_path : $single_path; @ls = `ls -1 $path/`; foreach $line (@ls) { chomp $line; next if (length($line) == 0); next if ($line !~ /.+\.fa$/ && $line !~ /.+\.fasta$/); ($file) = ($line =~ /^(.+\.fas?t?a?)/); if (scalar(@wl) > 0) { next if (!grep(/^$file$/, @wl)); } push(@{$files}, $file); } } sub load_white_list { my ($wl) = @_; open(WHITE, "<" . $white_list) or die("Unable to open white list file '$white_list': $!\n"); my $line = ""; while ($line = ) { chomp $line; next if (length($line) == 0); push(@{$wl}, $line); } close(WHITE); } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-s$/) { $single_path = shift @ARGV; } elsif ($_ =~ /^-p$/) { $paired_path = shift @ARGV; } elsif ($_ =~ /^-l$/) { $sanger_path = shift @ARGV; } elsif ($_ =~ /^-o$/) { $out_path = shift @ARGV; } elsif ($_ =~ /^-c$/) { $collate++; } elsif ($_ =~ /^-W$/) { $white_list = shift @ARGV; } elsif ($_ =~ /^-I$/) { $insert_len = shift @ARGV; } elsif ($_ =~ /^-C$/) { $exp_cov = shift @ARGV; } elsif ($_ =~ /^-T$/) { $cov_cut = shift @ARGV; } elsif ($_ =~ /^-R$/) { $read_trk = shift @ARGV; } elsif ($_ =~ /^-M$/) { $min_len = shift @ARGV; } elsif ($_ =~ /^-H$/) { $hash_len = shift @ARGV; } elsif ($_ =~ /^-P$/) { $paired++; } elsif ($_ =~ /^-L$/) { $clean = 0; } elsif ($_ =~ /^-e$/) { $exe_path = shift @ARGV; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option '$_'\n"; usage(); } } $single_path = substr($single_path, 0, -1) if (substr($single_path, -1) eq "/"); $paired_path = substr($paired_path, 0, -1) if (substr($paired_path, -1) eq "/"); $out_path = substr($out_path, 0, -1) if (substr($out_path, -1) eq "/"); $exe_path = substr($exe_path, 0, -1) if (substr($exe_path, -1) eq "/"); } sub version { print STDERR "exec_velvet.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR < # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Read in a set of filtering paramters, query a Stacks pipeline database based on # those filters and write the results into a compact tab-separated values or excel file. # use strict; use DBI; #use Excel::Writer::XLSX; use Spreadsheet::WriteExcel; use constant stacks_version => "_VERSION_"; my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf"; my $out_file = ""; my $type = "tsv"; my $batch_id = 0; my $data_type = "haplo"; my $map_type = "gen"; my $all_depth = 0; my $allele_depth_lim = 1; my $locus_depth_lim = 0; my $locus_lnl_lim = -10000.0; my $man_cor = 0; my $db = ""; my $translate_genotypes = {'dh' => \&trans_dh_map, 'cp' => \&trans_cp_map, 'bc1' => \&trans_bc1_map, 'f2' => \&trans_f2_map, 'gen' => \&trans_gen_map}; my @valid_filters = ("cata", "alle_l", "alle_u", "snps_l", "snps_u", "pare_l", "pare_u", "prog", "vprog", "mark", "est", "pe", "blast", "gcnt", "chisq_l", "chisq_u", "lnl_l", "lnl_u", "ref", "loc"); parse_command_line(); my (%sth, %loci, %samples, %depths, %filters); prepare_sql_handles(\%sth, \%filters); populate(\%sth, \%loci, \%samples, \%depths, \%filters); apply_corrected_genotypes(\%sth, \%loci) if ($man_cor > 0); if ($data_type eq "haplo") { write_observed_haplotypes(\%loci, \%samples); } elsif ($data_type eq "geno") { write_genotypes(\%loci, \%samples, \%depths); } print "Success\n"; sub populate { my ($sth, $loci, $samples, $depths, $filters) = @_; my (%delev); my ($row, $snp_row, $all_row, $gen_row, $locus); my @params; # # Cache all the stacks that were deleveraged. # $sth->{'delev'}->execute($batch_id); while ($row = $sth->{'delev'}->fetchrow_hashref()) { $delev{$row->{'sample_id'} . "_" . $row->{'tag_id'}}++; } # # Pull list of samples for this batch # $sth->{'samp'}->execute($batch_id); while ($row = $sth->{'samp'}->fetchrow_hashref()) { $samples->{$row->{'file'}} = $row->{'id'}; } prepare_filter_parameters(\@params, $filters); # # Fetch the results and populate the array of groups. # $sth->{'tag'}->execute(@params); while ($row = $sth->{'tag'}->fetchrow_hashref()) { $locus = {}; $locus->{'id'} = $row->{'tag_id'}; $locus->{'annotation'} = defined($row->{'external_id'}) ? $row->{'external_id'} : ""; $locus->{'chr'} = $row->{'chr'}; $locus->{'bp'} = $row->{'bp'}; $locus->{'delev'} = 0; $locus->{'marker'} = $row->{'marker'}; $locus->{'seq'} = $row->{'seq'}; $locus->{'alleles'} = ""; $locus->{'snps'} = ""; $locus->{'gtypes'} = {}; $locus->{'num_alleles'} = $row->{'alleles'}; $locus->{'num_snps'} = $row->{'snps'}; $locus->{'num_parents'} = $row->{'parents'}; $locus->{'num_progeny'} = $row->{'progeny'}; $locus->{'valid_progeny'} = $row->{'valid_progeny'}; $locus->{'num_ests'} = $row->{'ests'}; $locus->{'num_pe_tags'} = $row->{'pe_radtags'}; $locus->{'num_blast'} = $row->{'blast_hits'}; $locus->{'gcnt'} = $row->{'geno_cnt'}; $loci->{$row->{'tag_id'}} = $locus; } if ($data_type eq "haplo") { # # Add observed haplotypes # $sth->{'mat'}->execute($batch_id, $allele_depth_lim); while ($gen_row = $sth->{'mat'}->fetchrow_hashref()) { next if (!defined($loci->{$gen_row->{'catalog_id'}})); $locus = $loci->{$gen_row->{'catalog_id'}}; if (!defined($locus->{'gtypes'}->{$gen_row->{'file'}})) { $locus->{'gtypes'}->{$gen_row->{'file'}} = []; } push(@{$locus->{'gtypes'}->{$gen_row->{'file'}}}, {'file' => $gen_row->{'file'}, 'allele' => $gen_row->{'allele'}, 'tag_id' => $gen_row->{'tag_id'}, 'depth' => $gen_row->{'depth'}, 'lnl' => $gen_row->{'lnl'}}); # # Check if this particular sample was deleveraged # if (defined($delev{$gen_row->{'id'} . "_" . $gen_row->{'tag_id'}}) && $delev{$gen_row->{'id'} . "_" . $gen_row->{'tag_id'}} >= 1) { $locus->{'delev'}++; } } } elsif ($data_type eq "geno") { # # Add genotypes # $sth->{'gtypes'}->execute($batch_id); while ($gen_row = $sth->{'gtypes'}->fetchrow_hashref()) { next if (!defined($loci->{$gen_row->{'catalog_id'}})); $locus = $loci->{$gen_row->{'catalog_id'}}; if (!defined($locus->{'gtypes'}->{$gen_row->{'file'}})) { $locus->{'gtypes'}->{$gen_row->{'file'}} = []; } push(@{$locus->{'gtypes'}->{$gen_row->{'file'}}}, {'file' => $gen_row->{'file'}, 'gtype' => $gen_row->{'genotype'}}); } } # # Fetch SNPs and Alleles # $sth->{'snp'}->execute($batch_id); while ($snp_row = $sth->{'snp'}->fetchrow_hashref()) { next if (!defined($loci->{$snp_row->{'tag_id'}})); $loci->{$snp_row->{'tag_id'}}->{'snps'} .= $snp_row->{'col'} . "," . $snp_row->{'rank_1'} . ">" . $snp_row->{'rank_2'} . ";"; } $sth->{'allele'}->execute($batch_id); while ($all_row = $sth->{'allele'}->fetchrow_hashref()) { next if (!defined($loci->{$all_row->{'tag_id'}})); $loci->{$all_row->{'tag_id'}}->{'alleles'} .= $all_row->{'allele'} . ";"; } # # If exporting genotypes and a locus depth limit was specified, fetch locus depths. # if ($data_type eq "geno" && $locus_depth_lim > 0) { $sth->{'depths'}->execute($batch_id); while ($row = $sth->{'depths'}->fetchrow_hashref()) { next if (!defined($loci->{$row->{'catalog_id'}})); if (!defined($depths->{$row->{'catalog_id'}})) { $depths->{$row->{'catalog_id'}} = {}; } $depths->{$row->{'catalog_id'}}->{$row->{'file'}} += $row->{'depth'}; } } } sub apply_corrected_genotypes { my ($sth, $loci) = @_; my (%corrections, $locus, $key, $row, $sample); #print STDERR "Applying manually corrected genotypes to export data...\n"; # # Fetch the manual corrections from the database. # $sth->{'corr'}->execute($batch_id) or die("Unable to select results from $db.\n"); while ($row = $sth->{'corr'}->fetchrow_hashref()) { if (!defined($corrections{$row->{'catalog_id'}})) { $corrections{$row->{'catalog_id'}} = {}; } $corrections{$row->{'catalog_id'}}->{$row->{'file'}} = $row->{'genotype'}; } foreach $key (keys %{$loci}) { $locus = $loci->{$key}; next if (!defined($corrections{$locus->{'id'}})); foreach $sample (keys %{$corrections{$locus->{'id'}}}) { @{$locus->{'gtypes'}->{$sample}} = (); push(@{$locus->{'gtypes'}->{$sample}}, {'file' => $sample, 'gtype' => $corrections{$locus->{'id'}}->{$sample}}); } } } sub prepare_filter_parameters { my ($params, $filters) = @_; my ($filter); push(@{$params}, $batch_id); foreach $filter (keys %{$filters}) { if ($filter eq "snps") { push(@{$params}, $filters->{'snps_l'}); push(@{$params}, $filters->{'snps_u'}); } elsif ($filter eq "alle") { push(@{$params}, $filters->{'alle_l'}); push(@{$params}, $filters->{'alle_u'}); } elsif ($filter eq "pare") { push(@{$params}, $filters->{'pare_l'}); push(@{$params}, $filters->{'pare_u'}); } elsif ($filter eq "lnl") { push(@{$params}, $filters->{'lnl_l'}); push(@{$params}, $filters->{'lnl_u'}); } elsif ($filter eq "chisq") { push(@{$params}, $filters->{'chisq_l'}); push(@{$params}, $filters->{'chisq_u'}); } elsif ($filter eq "prog") { push(@{$params}, $filters->{'prog'}); } elsif ($filter eq "vprog") { push(@{$params}, $filters->{'vprog'}); } elsif ($filter eq "cata") { push(@{$params}, $filters->{'cata'}); } elsif ($filter eq "gcnt") { push(@{$params}, $filters->{'gcnt'}); } elsif ($filter eq "est") { push(@{$params}, 0); } elsif ($filter eq "pe") { push(@{$params}, 0); } elsif ($filter eq "blast") { push(@{$params}, 0); } elsif ($filter eq "ref") { push(@{$params}, $filters->{'ref'}); } elsif ($filter eq "loc") { push(@{$params}, $filters->{'chr'}); push(@{$params}, $filters->{'sbp'} * 1000000); push(@{$params}, $filters->{'ebp'} * 1000000); } elsif ($filter eq "mark") { if ($filters->{'mark'} eq "Any") { push(@{$params}, "%/%"); } else { push(@{$params}, $filters->{'mark'}); } } } } sub apply_query_filters { my ($filters) = @_; my ($query, $filter) = ""; my %sql_filters = ("cata" => "(catalog_index.tag_id = ?)", "alle" => "(alleles >= ? AND alleles <= ?)", "snps" => "(snps >= ? AND snps <= ?)", "pare" => "(parents >= ? AND parents <= ?)", "prog" => "(progeny >= ?)", "vprog" => "(valid_progeny >= ?)", "lnl" => "(lnl >= ? AND lnl <= ?)", "mark" => "(marker LIKE ?)", "est" => "(ests > ?)", "pe" => "(pe_radtags > ?)", "blast" => "(blast_hits > ?)", "gcnt" => "(geno_cnt >= ?)", "chisq" => "(chisq_pval >= ? AND chisq_pval <= ?)", "ref" => "(catalog_index.type = ?)", "loc" => "(catalog_index.chr = ? && catalog_index.bp >= ? && catalog_index.bp <= ?)"); if (scalar(keys %{$filters}) > 0) { foreach $filter (keys %{$filters}) { next if (!defined($sql_filters{$filter})); $query .= " AND "; $query .= $sql_filters{$filter}; } } return $query; } sub write_observed_haplotypes { my ($loci, $samples, $filters) = @_; my ($workbook, $worksheet); my ($out_fh, $str, $cat_id, $id, $locus, $gtypes, $types, $tot_depth); if ($type eq "xls") { $workbook = Spreadsheet::WriteExcel->new($out_file) or die("Unable to initiate excel spreadsheet.\n"); $worksheet = $workbook->add_worksheet() or die("Unable to add a worksheet to our excel spreadsheet.\n"); } else { open($out_fh, ">$out_file") or die("Unable to open output file '$out_file'\n"); } # # Order the samples by sample ID # my @ordered_sam = sort {$samples->{$a} <=> $samples->{$b}} keys %{$samples}; # # Print the heading out for the spreadsheet # my $i = 0; $str = "# " if ($type ne "xls"); $str .= "Catalog ID\t" . "Annotation\t" . "Chr\t" . "BP\t" . "Consensus Sequence\t" . "Num Parents\t" . "Num Progeny\t" . "Num SNPs\t" . "SNPs\t" . "Num Alleles\t" . "Alleles\t" . "Deleveraged\t"; foreach $id (@ordered_sam) { $str .= $id . "\t"; } $str = substr($str, 0, -1); $str .= "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; foreach $cat_id (keys %{$loci}) { $locus = $loci->{$cat_id}; $locus->{'snps'} = substr($locus->{'snps'}, 0, -1) if (length($locus->{'snps'}) > 0); $locus->{'alleles'} = substr($locus->{'alleles'}, 0, -1) if (length($locus->{'alleles'}) > 0); $str = $cat_id . "\t" . $locus->{'annotation'} . "\t" . $locus->{'chr'} . "\t" . $locus->{'bp'} . "\t" . $locus->{'seq'} . "\t" . $locus->{'num_parents'} . "\t" . $locus->{'num_progeny'} . "\t" . $locus->{'num_snps'} . "\t" . $locus->{'snps'} . "\t" . $locus->{'num_alleles'} . "\t" . $locus->{'alleles'} . "\t" . $locus->{'delev'} . "\t"; foreach $id (@ordered_sam) { $types = $locus->{'gtypes'}->{$id}; if (!defined($types)) { $str .= "\t"; next; } # # Check total locus depth. # $tot_depth = 0; foreach $type (@{$types}) { $tot_depth += $type->{'depth'}; } if ($tot_depth < $locus_depth_lim) { $str .= "\t"; next; } if ($types->[0]->{'lnl'} < $locus_lnl_lim) { $str .= "\t"; next; } foreach $type (@{$types}) { $str .= $all_depth ? $type->{'depth'} : $type->{'allele'}; $str .= "/"; } $str = substr($str, 0, -1); $str .= "\t"; } $str = substr($str, 0, -1); $str .= "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; } $str = "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; foreach $id (@ordered_sam) { $str = "\t" . $samples->{$id} . "\t" . $id . "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; } $type eq "xls" ? $workbook->close() : close($out_fh); } sub write_genotypes { my ($loci, $samples, $depths) = @_; my ($workbook, $worksheet); my ($out_fh, $str, $cat_id, $id, $locus, $gtypes, $types); if ($type eq "xls") { $workbook = Spreadsheet::WriteExcel->new($out_file) or die("Unable to initiate excel spreadsheet.\n"); $worksheet = $workbook->add_worksheet() or die("Unable to add a worksheet to our excel spreadsheet.\n"); } else { open($out_fh, ">$out_file") or die("Unable to open output file '$out_file'\n"); } # # Order the samples by sample ID # my @ordered_sam = sort {$samples->{$a} <=> $samples->{$b}} keys %{$samples}; # # Print the heading out for the spreadsheet # my $i = 0; $str = "Catalog ID\t" . "Annotation\t" . "Chr\t" . "BP\t" . "Marker\t"; foreach $id (@ordered_sam) { $str .= $id . "\t"; } $str = substr($str, 0, -1); $str .= "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; my ($trans_marker); foreach $cat_id (keys %{$loci}) { $locus = $loci->{$cat_id}; $trans_marker = translate_marker($map_type, $locus->{'marker'}); $str = $cat_id . "\t" . $locus->{'annotation'} . "\t" . $locus->{'chr'} . "\t" . $locus->{'bp'} . "\t" . $trans_marker . "\t"; foreach $id (@ordered_sam) { $types = $locus->{'gtypes'}->{$id}; # # Check that there is a genotype for this sample. # if (!defined($types)) { $str .= "\t"; next; } # # Check the depth of coverage for this locus, if requested. # if ($locus_depth_lim > 0) { if (!defined($depths->{$cat_id}->{$id}) || $depths->{$cat_id}->{$id} < $locus_depth_lim) { $str .= "\t"; next; } } my $trans_gtype = $translate_genotypes->{$map_type}->($trans_marker, $types->[0]->{'gtype'}); $str .= $trans_gtype . "\t"; } $str = substr($str, 0, -1); $str .= "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; } $str = "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; foreach $id (@ordered_sam) { $str = $samples->{$id} . "\t" . $id . "\n"; $type eq "xls" ? write_excel($worksheet, $i, $str) : print $out_fh $str; $i++; } $type eq "xls" ? $workbook->close() : close($out_fh); } sub write_excel { my ($worksheet, $i, $str) = @_; chomp $str; my @row = split(/\t/, $str); my $j = 0; foreach my $r (@row) { $worksheet->write($i, $j, $r); $j++; } } sub translate_marker { my ($map_type, $in_marker) = @_; my %dictionary; return $in_marker if ($map_type eq "gen"); $dictionary{"dh"} = {}; $dictionary{"cp"} = {}; $dictionary{"f2"} = {}; $dictionary{"bc1"} = {}; $dictionary{"dh"}->{"ab/--"} = "abx--"; $dictionary{"dh"}->{"--/ab"} = "--xab"; $dictionary{"cp"}->{"ab/--"} = "lmx--"; $dictionary{"cp"}->{"--/ab"} = "--xnp"; $dictionary{"cp"}->{"ab/aa"} = "lmxll"; $dictionary{"cp"}->{"aa/ab"} = "nnxnp"; $dictionary{"cp"}->{"ab/ab"} = "hkxhk"; $dictionary{"cp"}->{"ab/ac"} = "efxeg"; $dictionary{"cp"}->{"ab/cd"} = "abxcd"; $dictionary{"f2"}->{"aa/bb"} = "aaxbb"; $dictionary{"f2"}->{"ab/cd"} = "abxcd"; $dictionary{"f2"}->{"ab/aa"} = "abxaa"; $dictionary{"f2"}->{"aa/ab"} = "aaxab"; $dictionary{"f2"}->{"ab/cc"} = "abxcc"; $dictionary{"f2"}->{"cc/ab"} = "ccxab"; $dictionary{"bc1"}->{"aa/bb"} = "aaxbb"; $dictionary{"bc1"}->{"bb/aa"} = "bbxaa"; $dictionary{"bc1"}->{"ab/cc"} = "abxcc"; $dictionary{"bc1"}->{"cc/ab"} = "ccxab"; return defined($dictionary{$map_type}->{$in_marker}) ? $dictionary{$map_type}->{$in_marker} : ""; } sub trans_bc1_map { my ($marker, $in_gtype) = @_; my (%types, %dictionary); $dictionary{"aaxbb"} = {}; $dictionary{"bbxaa"} = {}; $dictionary{"abxcc"} = {}; $dictionary{"ccxab"} = {}; $dictionary{"aaxbb"}->{"--"} = "-"; $dictionary{"aaxbb"}->{"aa"} = "b"; $dictionary{"aaxbb"}->{"ab"} = "h"; $dictionary{"aaxbb"}->{"bb"} = "h"; $dictionary{"bbxaa"}->{"--"} = "-"; $dictionary{"bbxaa"}->{"aa"} = "h"; $dictionary{"bbxaa"}->{"ab"} = "h"; $dictionary{"bbxaa"}->{"bb"} = "a"; $dictionary{"abxcc"}->{"--"} = "-"; $dictionary{"abxcc"}->{"ac"} = "h"; $dictionary{"abxcc"}->{"bc"} = "h"; $dictionary{"abxcc"}->{"ab"} = "b"; $dictionary{"abxcc"}->{"aa"} = "b"; $dictionary{"abxcc"}->{"bb"} = "b"; $dictionary{"ccxab"}->{"--"} = "-"; $dictionary{"ccxab"}->{"ac"} = "h"; $dictionary{"ccxab"}->{"bc"} = "h"; $dictionary{"ccxab"}->{"ab"} = "a"; $dictionary{"ccxab"}->{"aa"} = "a"; $dictionary{"ccxab"}->{"bb"} = "a"; my $out_gtype = defined($dictionary{$marker}->{lc($in_gtype)}) ? $dictionary{$marker}->{lc($in_gtype)} : "-"; if (lc($in_gtype) ne $in_gtype) { return uc($out_gtype); } else { return $out_gtype; } } sub trans_dh_map { my ($marker, $in_gtype) = @_; my (%types, %dictionary); $dictionary{"abx--"} = {}; $dictionary{"--xab"} = {}; $dictionary{"abx--"}->{"aa"} = "a"; $dictionary{"abx--"}->{"bb"} = "b"; $dictionary{"abx--"}->{"--"} = "-"; $dictionary{"--xab"}->{"aa"} = "a"; $dictionary{"--xab"}->{"bb"} = "b"; $dictionary{"--xab"}->{"--"} = "-"; my $out_gtype = defined($dictionary{$marker}->{lc($in_gtype)}) ? $dictionary{$marker}->{lc($in_gtype)} : "-"; if (lc($in_gtype) ne $in_gtype) { return uc($out_gtype); } else { return $out_gtype; } } sub trans_f2_map { my ($marker, $in_gtype) = @_; my (%types, %dictionary); $dictionary{"aaxbb"} = {}; $dictionary{"abxcd"} = {}; $dictionary{"abxaa"} = {}; $dictionary{"aaxab"} = {}; $dictionary{"abxcc"} = {}; $dictionary{"ccxab"} = {}; $dictionary{"aaxbb"}->{"aa"} = "a"; $dictionary{"aaxbb"}->{"ab"} = "h"; $dictionary{"aaxbb"}->{"bb"} = "b"; $dictionary{"aaxbb"}->{"--"} = "-"; $dictionary{"abxcd"}->{"aa"} = "a"; $dictionary{"abxcd"}->{"ab"} = "a"; $dictionary{"abxcd"}->{"bb"} = "a"; $dictionary{"abxcd"}->{"cc"} = "b"; $dictionary{"abxcd"}->{"cd"} = "b"; $dictionary{"abxcd"}->{"dd"} = "b"; $dictionary{"abxcd"}->{"ac"} = "h"; $dictionary{"abxcd"}->{"ad"} = "h"; $dictionary{"abxcd"}->{"bc"} = "h"; $dictionary{"abxcd"}->{"bd"} = "h"; $dictionary{"abxcd"}->{"--"} = "-"; $dictionary{"abxaa"}->{"aa"} = "-"; $dictionary{"abxaa"}->{"ab"} = "-"; $dictionary{"abxaa"}->{"bb"} = "a"; $dictionary{"abxaa"}->{"--"} = "-"; $dictionary{"aaxab"}->{"aa"} = "-"; $dictionary{"aaxab"}->{"ab"} = "-"; $dictionary{"aaxab"}->{"bb"} = "b"; $dictionary{"aaxab"}->{"--"} = "-"; $dictionary{"abxcc"}->{"a"} = "a"; $dictionary{"abxcc"}->{"ab"} = "a"; $dictionary{"abxcc"}->{"bb"} = "a"; $dictionary{"abxcc"}->{"cc"} = "b"; $dictionary{"abxcc"}->{"ac"} = "-"; $dictionary{"abxcc"}->{"bc"} = "-"; $dictionary{"abxcc"}->{"--"} = "-"; $dictionary{"ccxab"}->{"aa"} = "b"; $dictionary{"ccxab"}->{"ab"} = "b"; $dictionary{"ccxab"}->{"bb"} = "b"; $dictionary{"ccxab"}->{"cc"} = "a"; $dictionary{"ccxab"}->{"ac"} = "-"; $dictionary{"ccxab"}->{"bc"} = "-"; $dictionary{"ccxab"}->{"--"} = "-"; my $out_gtype = defined($dictionary{$marker}->{lc($in_gtype)}) ? $dictionary{$marker}->{lc($in_gtype)} : "-"; if (lc($in_gtype) ne $in_gtype) { return uc($out_gtype); } else { return $out_gtype; } } sub trans_cp_map { my ($marker, $in_gtype) = @_; my (%types, %dictionary); $dictionary{"lmx--"} = {}; $dictionary{"--xnp"} = {}; $dictionary{"lmxll"} = {}; $dictionary{"nnxnp"} = {}; $dictionary{"hkxhk"} = {}; $dictionary{"efxeg"} = {}; $dictionary{"abxcd"} = {}; $dictionary{"lmx--"}->{"--"} = "--"; $dictionary{"lmx--"}->{"aa"} = "ll"; $dictionary{"lmx--"}->{"bb"} = "lm"; $dictionary{"lmx--"}->{"ab"} = "lm"; $dictionary{"--xnp"}->{"--"} = "--"; $dictionary{"--xnp"}->{"aa"} = "nn"; $dictionary{"--xnp"}->{"bb"} = "np"; $dictionary{"--xnp"}->{"ab"} = "np"; $dictionary{"lmxll"}->{"--"} = "--"; $dictionary{"lmxll"}->{"aa"} = "ll"; $dictionary{"lmxll"}->{"ab"} = "lm"; $dictionary{"nnxnp"}->{"--"} = "--"; $dictionary{"nnxnp"}->{"aa"} = "nn"; $dictionary{"nnxnp"}->{"ab"} = "np"; $dictionary{"hkxhk"}->{"--"} = "--"; $dictionary{"hkxhk"}->{"ab"} = "hk"; $dictionary{"hkxhk"}->{"aa"} = "hh"; $dictionary{"hkxhk"}->{"bb"} = "kk"; $dictionary{"efxeg"}->{"--"} = "--"; $dictionary{"efxeg"}->{"ab"} = "ef"; $dictionary{"efxeg"}->{"ac"} = "eg"; $dictionary{"efxeg"}->{"bc"} = "fg"; $dictionary{"efxeg"}->{"aa"} = "ee"; $dictionary{"abxcd"}->{"--"} = "--"; $dictionary{"abxcd"}->{"ac"} = "ac"; $dictionary{"abxcd"}->{"ad"} = "ad"; $dictionary{"abxcd"}->{"bc"} = "bc"; $dictionary{"abxcd"}->{"bd"} = "bd"; my $out_gtype = defined($dictionary{$marker}->{lc($in_gtype)}) ? $dictionary{$marker}->{lc($in_gtype)} : "-"; if (lc($in_gtype) ne $in_gtype) { return uc($out_gtype); } else { return $out_gtype; } } sub trans_gen_map { my ($marker, $in_gtype) = @_; return $in_gtype; } sub prepare_sql_handles { my ($sth, $filters) = @_; # # Connect to the database, check for the existence of a MySQL config file in the home # directory first, otherwise use the stacks-distributed one. # my $cnf = (defined($ENV{"HOME"}) && -e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config; $sth->{'dbh'} = DBI->connect("DBI:mysql:$db:mysql_read_default_file=$cnf") or die("Unable to connect to the $db MySQL Database!\n" . $DBI::errstr); my $query; $query = "SELECT sample_id, tag_id FROM tag_index " . "WHERE batch_id=? AND deleveraged=true"; $sth->{'delev'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT id, file FROM samples " . "WHERE batch_id=? ORDER BY id"; $sth->{'samp'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT tag_id, allele FROM catalog_alleles " . "WHERE batch_id=?"; $sth->{'allele'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT tag_id, col, rank_1, rank_2 FROM catalog_snps " . "WHERE batch_id=?"; $sth->{'snp'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT catalog_id, samples.id as id, samples.sample_id, samples.type, file, tag_id, allele, depth, lnl " . "FROM matches " . "JOIN samples ON (matches.sample_id=samples.id) " . "WHERE matches.batch_id=? AND matches.depth>?"; $sth->{'mat'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT catalog_id, file, depth " . "FROM matches " . "JOIN samples ON (matches.sample_id=samples.id) " . "WHERE matches.batch_id=?"; $sth->{'depths'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT catalog_id, samples.id as id, samples.sample_id, samples.type, file, genotype " . "FROM catalog_genotypes " . "JOIN samples ON (catalog_genotypes.sample_id=samples.id) " . "WHERE catalog_genotypes.batch_id=?"; $sth->{'gtypes'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT gc.catalog_id, gc.sample_id, gc.genotype, file " . "FROM genotype_corrections as gc " . "JOIN samples ON (gc.sample_id=samples.id) " . "WHERE gc.batch_id=?"; $sth->{'corr'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT catalog_index.tag_id as tag_id, catalog_index.chr, catalog_index.bp, " . "snps, alleles, parents, progeny, valid_progeny, " . "seq, marker, chisq_pval, lnl, ratio, ests, pe_radtags, blast_hits, geno_cnt, external_id " . "FROM catalog_index " . "JOIN catalog_tags ON (catalog_index.cat_id=catalog_tags.id) " . "LEFT JOIN catalog_annotations ON " . "(" . "catalog_index.batch_id=catalog_annotations.batch_id AND " . "catalog_index.tag_id=catalog_annotations.catalog_id" . ") " . "WHERE catalog_index.batch_id=?"; $query .= apply_query_filters($filters); $sth->{'tag'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); } sub close_sql_handles { my ($sth) = @_; my $key; foreach $key (keys %{$sth}) { next if ($key =~ /dbh/); $sth->{$key}->finish(); } foreach $key (keys %{$sth}) { next if ($key !~ /dbh/); $sth->{$key}->disconnect(); } } sub parse_command_line { my ($filter, $name, $value); while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-f$/) { $out_file = shift @ARGV; } elsif ($_ =~ /^-o$/) { $type = shift @ARGV; } elsif ($_ =~ /^-a$/) { $data_type = lc(shift @ARGV); } elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; } elsif ($_ =~ /^-D$/) { $db = shift @ARGV; } elsif ($_ =~ /^-m$/) { $map_type = lc(shift @ARGV); } elsif ($_ =~ /^-A$/) { $allele_depth_lim = shift @ARGV; } elsif ($_ =~ /^-L$/) { $locus_depth_lim = shift @ARGV; } elsif ($_ =~ /^-I$/) { $locus_lnl_lim = shift @ARGV; } elsif ($_ =~ /^-d$/) { $all_depth++; } elsif ($_ =~ /^-c$/) { $man_cor++; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } elsif ($_ =~ /^-F$/) { $filter = shift @ARGV; ($name, $value) = split(/=/, $filter); if (length($name) == 0 || length($value) == 0) { print STDERR "Error parsing filter '$filter'\n"; usage(); } $filters{$name} = $value; } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } if (defined($filters{'chr'})) { $filters{'loc'} = 1; $filters{'sbp'} = 0 if (!defined($filters{'sbp'})); $filters{'ebp'} = 500000000 if (!defined($filters{'ebp'})); } if (defined($filters{'snps_l'}) || defined($filters{'snps_u'})) { $filters{'snps'} = 1; $filters{'snps_l'} = 1 if (!defined($filters{'snps_l'})); $filters{'snps_u'} = 100 if (!defined($filters{'snps_u'})); } if (defined($filters{'alle_l'}) || defined($filters{'alle_u'})) { $filters{'alle'} = 1; $filters{'alle_l'} = 1 if (!defined($filters{'alle_l'})); $filters{'alle_u'} = 100 if (!defined($filters{'alle_u'})); } if (defined($filters{'pare_l'}) || defined($filters{'pare_u'})) { $filters{'pare'} = 1; $filters{'pare_l'} = 1 if (!defined($filters{'pare_l'})); $filters{'pare_u'} = 1000 if (!defined($filters{'pare_u'})); } if (defined($filters{'lnl_l'}) || defined($filters{'lnl_u'})) { $filters{'lnl'} = 1; $filters{'lnl_l'} = -500 if (!defined($filters{'lnl_l'})); $filters{'lnl_u'} = 0 if (!defined($filters{'lnl_u'})); } if ($out_file eq "") { print STDERR "You must specify the file to write data to!\n"; usage(); } if ($type ne "tsv" && $type ne "xls") { print STDERR "Unknown output file type specified '$type'.\n"; usage(); } if ($data_type ne "haplo" && $data_type ne "geno") { print STDERR "Unknown data type specified, 'haplo' and 'geno' are currently accepted.\n"; usage(); } if ($data_type eq "geno" && $map_type ne "bc1" && $map_type ne "dh" && $map_type ne "f2" && $map_type ne "cp" && $map_type ne "gen") { print STDERR "Unknown map type specified, 'bc1', 'dh', 'f2', 'cp', and 'gen' are currently accepted.\n"; usage(); } if ($data_type ne "geno" && $man_cor > 0) { print STDERR "You can only specify manual corrections when exporting genotypes.\n"; usage(); } if ($data_type ne "haplo" && $all_depth > 0) { print STDERR "You must use a data type of 'haplo' to export allele depths.\n"; usage(); } } sub version { print STDERR "export_sql.pl ", stacks_version, "\n"; } sub usage { version(); my $filt; my $i = 1; foreach my $f (@valid_filters) { $filt .= $f . ", "; $filt .= "\n " if ($i % 10 == 0); $i++; } $filt = substr($filt, 0, -11); print STDERR "export_sql.pl -D db -b batch_id -a type -f file -o tsv|xls [-m type -c] [-F filter=value ...] [-L lim] [-d] [-h]\n", " D: database to export from.\n", " b: batch ID of the dataset to export.\n", " a: type of data to export, either 'geno' or 'haplo', for genotypes or observed haplotypes.\n", " f: file to output data.\n", " o: type of data to export: 'tsv' or 'xls'.\n", " d: output depths of alleles instead of the allele values (must use 'haplo' data type).\n", " m: map type. If genotypes are to be exported, specify the map type.\n", " c: include manual corrections if exporting genotypes.\n", " Filters that are applied to select among the catalog loci:\n", " F: one or more filters in the format name=value.\n", " Supported filters: \n", " $filt\n\n", " Filters to be applied to individual sets of haplotype calls (for those selected catalog loci):\n", " A: specify an minimum allele depth limit.\n", " L: specify a minimum locus depth limit.\n", " I: specify a minimum locus log likelihood limit.\n", " h: display this help message.\n\n"; exit(0); } stacks-1.35/scripts/extract_interpop_chars.pl000644 000765 000024 00000017251 12441417455 022315 0ustar00catchenstaff000000 000000 #!/usr/bin/env perl # # Copyright 2010-2013, Julian Catchen # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # use strict; use Bio::SeqIO; use constant true => 1; use constant false => 0; my $debug = 0; my $in_file = ""; my $barcode_list = ""; my $out_file = ""; my $pop_limit = 10; parse_command_line(); my (@barcodes, %barcode_key, %ids, %characters, %polymorph_level, %site_dist); load_barcode_list(\@barcodes, \%barcode_key); my ($id, $barcode); # # Initialize the characters hash. # foreach $barcode (@barcodes) { $characters{$barcode} = ""; $polymorph_level{$barcode} = {}; $polymorph_level{$barcode}->{'poly_loci'} = 0; $polymorph_level{$barcode}->{'loci'} = 0; $polymorph_level{$barcode}->{'poly'} = 0; $polymorph_level{$barcode}->{'sites'} = 0; } # # Obtain a list of unique tags to process # parse_interspecies_tags($in_file, \%ids); my $i = 1; my $num_ids = keys %ids; foreach $id (keys %ids) { print STDERR "Processing tag $i of $num_ids. \r" if ($i % 1000 == 0); process_tag($ids{$id}, \@barcodes, \%characters, \%polymorph_level, \%site_dist); $i++; } print_results(\%characters, \%barcode_key, \%polymorph_level, \%site_dist); print STDERR "\n"; sub process_tag { my ($tag, $barcodes, $characters, $poly_level, $dist) = @_; my (%substack, $bc, $len, $col); return if (check_tag_counts($tag) == false); # if (scalar(keys %{$tag->{'count'}}) >= 10) { # print STDERR "Keeping tag ", $tag->{'id'}, " with tags from ", scalar(keys %{$tag->{'count'}}), " populations.\n"; # } # # Make sure enough populations have this marker before recording it. # return if (scalar(keys %{$tag->{'seqs'}}) < $pop_limit); foreach $bc (@{$barcodes}) { $substack{$bc} = []; } foreach $bc (keys %{$tag->{'seqs'}}) { my $aref; # # Store the individual reads as a two-dimensional array. # @{$aref} = split(//, $tag->{'seqs'}->{$bc}); $substack{$bc} = $aref; # # Tally the number of non-homozygous sites and the total # number of sites at each locus. # $len = scalar(@{$aref}); $poly_level->{$bc}->{'loci'}++; $poly_level->{$bc}->{'sites'} += scalar(@{$aref}); my $poly = 0; foreach $col (0..$len - 1) { if ($aref->[$col] eq "N") { $poly_level->{$bc}->{'poly'}++; $poly++; } } if ($poly > 0) { $poly_level->{$bc}->{'poly_loci'}++; } } $len = 0; foreach $bc (@{$barcodes}) { $len = scalar(@{$substack{$bc}}); last if ($len > 0); } foreach $col (0..$len - 1) { if (homozygous(\%substack, $col) == false) { record_character(\%substack, $col, $characters, $dist); } } } sub check_tag_counts { my ($tag) = @_; # # Check to make sure that there is only a single tag from # each individual sample. # my ($bc); foreach $bc (keys %{$tag->{'count'}}) { return false if ($tag->{'count'}->{$bc} > 1); } return true; } sub homozygous { my ($substack, $col) = @_; my ($bc, %nuc); foreach $bc (keys %{$substack}) { if (scalar(@{$substack->{$bc}}) > 0) { $nuc{$substack->{$bc}->[$col]}++; } } my @keys = sort {$nuc{$b} <=> $nuc{$a}} keys(%nuc); return true if (scalar(@keys) == 1); return true if (scalar(@keys) == 2 && ($keys[0] eq "N" || $keys[1] eq "N")); return false; } sub record_character { my ($stack, $col, $characters, $dist) = @_; my ($bc, $cnt); $cnt = 0; foreach $bc (keys %{$stack}) { if (scalar(@{$stack->{$bc}}) == 0) { $characters->{$bc} .= "N"; } else { $characters->{$bc} .= $stack->{$bc}->[$col]; $cnt++ if ($stack->{$bc}->[$col] ne "N"); } } $dist->{$cnt}++; } sub print_results { my ($characters, $key, $poly_level, $dist) = @_; my ($out, $seq, $log_fh, $barcode); $out = Bio::SeqIO->new(-file => ">$out_file", -format => "fasta"); foreach $barcode (sort keys %{$characters}) { $seq = Bio::Seq->new('-seq' => $characters->{$barcode}, '-display_id' => $key->{$barcode}, '-alphabet' => 'dna'); print STDERR "Writing sequence for population '", $key->{$barcode}, " / ", $barcode, "' with a length ", $seq->length(), "\n"; $out->write_seq($seq); } print STDERR "Pop\tTotal Loci\tNon-hom Loci\tTotal Sites\tNon-homozygous sites\n"; foreach $barcode (sort keys %{$poly_level}) { print STDERR $key->{$barcode}, "\t", $poly_level->{$barcode}->{'loci'}, "\t", $poly_level->{$barcode}->{'poly_loci'}, "\t", $poly_level->{$barcode}->{'sites'}, "\t", $poly_level->{$barcode}->{'poly'}, "\n"; } print STDERR "Number of Populations\tSites\n"; foreach $barcode (sort {$b <=> $a} keys %{$dist}) { print STDERR $barcode, "\t", $dist->{$barcode}, "\n"; } } sub parse_interspecies_tags { my ($in_path, $ids) = @_; my (@parts, $line, $id, $barcode, $catalog_id); open(IN, "<$in_path") or die("Unable to open input file '$in_path'; $!\n"); while ($line = ) { chomp $line; @parts = split(/\t/, $line); next if ($parts[5] eq "consensus"); $id = $parts[2]; ($barcode, $catalog_id) = ($parts[7] =~ /^(\d+)\_(\d+)/); #($barcode, $catalog_id) = ($parts[4] =~ /^(\d+)\_(\d+)/); #print STDERR "Barcode_key: $barcode, $barcode_key{$barcode}\n"; #next if ($barcode_key{$barcode} eq "CB2"); if (!defined($ids->{$id})) { $ids->{$id} = {}; $ids->{$id}->{'id'} = $parts[2]; $ids->{$id}->{'seqs'} = {}; $ids->{$id}->{'count'} = {}; $ids->{$id}->{'batch_id'} = $parts[1]; } $ids->{$id}->{'seqs'}->{$barcode} = $parts[8]; #$ids->{$id}->{'seqs'}->{$barcode} = $parts[5]; $ids->{$id}->{'count'}->{$barcode}++; #print STDERR "Adding '$parts[8]' with barcode $barcode to $id\n"; } } sub load_barcode_list { my ($bl, $bk) = @_; open(BC, "<" . $barcode_list) or die("Unable to open barcodes file '$barcode_list': $!\n"); my ($line, $pop_id, $sample_id); while ($line = ) { chomp $line; next if (length($line) == 0 || substr($line, 0, 1) eq "#"); ($pop_id, $sample_id) = ($line =~ /^(\w+)\t(\d+)$/); $bk->{$sample_id} = $pop_id; push(@{$bl}, $sample_id); } close(BC); if (scalar(@{$bl}) == 0) { print STDERR "Unable to load any barcodes from '$barcode_list'\n"; usage(); } } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-f$/) { $in_file = shift @ARGV; } elsif ($_ =~ /^-b$/) { $barcode_list = shift @ARGV; } elsif ($_ =~ /^-o$/) { $out_file = shift @ARGV; } elsif ($_ =~ /^-p$/) { $pop_limit = shift @ARGV; } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line options received: $_\n"; usage(); } } } sub usage { print << "EOQ"; extract-interpop-chars.pl -f in_file -b barcodes [-o path] [-p limit] [-d] [-h] f: input file containing interspecies tags. b: list of barcodes to process (one barcode per population). o: output file. p: minimum number of populations required before recording a marker. h: display this help message. d: turn on debug output. EOQ exit(0); } stacks-1.35/scripts/index_radtags.pl000644 000765 000024 00000047636 12533677757 020410 0ustar00catchenstaff000000 000000 #!/usr/bin/env perl # # Copyright 2010, Julian Catchen # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Written by Julian Catchen # use strict; use File::Temp qw/tempfile/; use DBI; use constant stacks_version => "_VERSION_"; my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf"; my $sql_path = "_PKGDATADIR_" . "sql/"; my $sql_tag_table = ""; my $sql_cat_table = ""; my $sql_chr_table = ""; my $db = ""; my $debug = 0; my $catalog_index = 0; my $tag_index = 0; my $radome_index = 0; parse_command_line(); my $cnf = (-e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config; # # Make sure the SQL definition files are available # if ($catalog_index && !-e $sql_cat_table) { print STDERR "Unable to locate catalog SQL definition.\n"; usage(); } if ($tag_index && !-e $sql_tag_table) { print STDERR "Unable to locate tag_index SQL definition.\n"; usage(); } # # Connect to the database and prepare our queries. # my (%sth); prepare_sql_handles(\%sth); if ($catalog_index) { gen_cat_index(\%sth); } if ($tag_index) { gen_tag_index(\%sth); } close_sql_handles(\%sth); sub gen_cat_index { my ($sth) = @_; die ("Unable to find SQL file: '$sql_cat_table'\n") if (!-e $sql_tag_table); print STDERR "Generating catalog tag index\n"; my ($fh, $catalog_file) = tempfile("catalog_index_XXXXXXXX", UNLINK => 1, TMPDIR => 1); my ($row, $tag, $count, $par_cnt, $pro_cnt, $allele_cnt, $marker, $uncor_marker, $valid_pro, $chisq_pval, $lnl, $ratio, $ests, $pe_radtags, $blast_hits, $geno_cnt, $ref_type, $ref_id, $bp); my (%snps, %markers, %genotypes, %seqs, %hits, %parents, %progeny, %alleles, %chrs, %radome); print STDERR " Fetching catalog SNPs..."; fetch_catalog_snps(\%sth, \%snps); print STDERR "done.\n"; print STDERR " Fetching markers..."; fetch_markers(\%sth, \%markers); print STDERR "done.\n"; print STDERR " Fetching genotypes..."; fetch_genotypes(\%sth, \%genotypes); print STDERR "done.\n"; print STDERR " Fetching associated sequences..."; sequence_matches(\%sth, \%seqs, \%hits); print STDERR "done.\n"; print STDERR " Fetching catalog matches..."; catalog_matches(\%sth, \%parents, \%progeny, \%alleles); print STDERR "done.\n"; $ref_type = ""; $ref_id = 0; if ($radome_index) { print STDERR " Fetching reference RAD data..."; radome_ref(\%sth, \%radome); print STDERR "done.\n"; } print STDERR " Assembling catalog tags at the database..."; $sth->{'cat_tags'}->execute() or die("Unable to select results from $db.\n"); print STDERR "done.\n"; #my $num_rows = $sth->{'cat_tags'}->rows(); #my $i = 1; print STDERR " Processing catalog tags\n"; while ($row = $sth->{'cat_tags'}->fetchrow_hashref()) { # # Determine the number of SNPs contained within this RAD-Tag # if (defined($snps{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $count = $snps{$row->{'batch_id'}}->{$row->{'tag_id'}}; } else { $count = 0; } # # Determine the number of parental samples that match this catalog RAD-Tag # if (defined($parents{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $par_cnt = scalar(keys %{$parents{$row->{'batch_id'}}->{$row->{'tag_id'}}}); } else { $par_cnt = 0; } # # Determine the number of progeny samples that match this catalog RAD-Tag # if (defined($progeny{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $pro_cnt = scalar(keys %{$progeny{$row->{'batch_id'}}->{$row->{'tag_id'}}}); } else { $pro_cnt = 0; } # # Determine the number of genotypes associated with this RAD-Tag # if (defined($genotypes{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $geno_cnt = $genotypes{$row->{'batch_id'}}->{$row->{'tag_id'}}; } else { $geno_cnt = 0; } # # Determine the number of alleles for this catalog RAD-Tag # if (defined($alleles{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $allele_cnt = scalar(keys %{$alleles{$row->{'batch_id'}}->{$row->{'tag_id'}}}); } else { $allele_cnt = 0; } # # Annotate the RAD site # if ($radome_index) { my $key = $row->{'chr'} . "|" . $row->{'bp'} . "|" . $row->{'strand'}; if (defined($radome{$key})) { $ref_type = $radome{$key}->{'type'}; $ref_id = $radome{$key}->{'id'}; } else { # # Check for a read aligned on the other strand. # $bp = $row->{'bp'} - 4; $key = $row->{'chr'} . "|" . $bp . "|" . $row->{'strand'}; if (defined($radome{$key})) { $ref_type = $radome{$key}->{'type'}; $ref_id = $radome{$key}->{'id'} } else { $ref_type = "genomic"; $ref_id = 0; } } } # # Determine if there are any sequences associated with this marker # $ests = 0; $pe_radtags = 0; $blast_hits = 0; if (defined($seqs{$row->{'batch_id'}}->{$row->{'tag_id'}}->{'est'})) { $ests = $seqs{$row->{'batch_id'}}->{$row->{'tag_id'}}->{'est'}; } if (defined($seqs{$row->{'batch_id'}}->{$row->{'tag_id'}}->{'pe_radtag'})) { $pe_radtags = $seqs{$row->{'batch_id'}}->{$row->{'tag_id'}}->{'pe_radtag'}; } if (defined($hits{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $blast_hits = $hits{$row->{'batch_id'}}->{$row->{'tag_id'}}; } # # Does this RAD-Tag have a mappable marker? # if (defined($markers{$row->{'batch_id'}}->{$row->{'tag_id'}})) { $tag = $markers{$row->{'batch_id'}}->{$row->{'tag_id'}}; $marker = $tag->{'marker'}; $uncor_marker = $tag->{'uncor_marker'}; $chisq_pval = $tag->{'chisq_pval'}; $valid_pro = $tag->{'valid_pro'}; $ratio = $tag->{'ratio'}; $lnl = $tag->{'lnl'}; } else { $marker = ""; $uncor_marker = ""; $valid_pro = 0; $chisq_pval = 1.0; $ratio = ""; $lnl = 0.0; } # # Record the chromosomes present in this dataset (if any). # if (length($row->{'chr'}) > 0) { $chrs{$row->{'batch_id'}}->{$row->{'chr'}} = $row->{'bp'} > $chrs{$row->{'batch_id'}}->{$row->{'chr'}} ? $row->{'bp'} : $chrs{$row->{'batch_id'}}->{$row->{'chr'}}; } print $fh "0\t", $row->{'batch_id'}, "\t", $row->{'id'}, "\t", $row->{'tag_id'}, "\t", $count, "\t", $par_cnt, "\t", $pro_cnt, "\t", $allele_cnt, "\t", $marker, "\t", $uncor_marker, "\t", $valid_pro, "\t", $chisq_pval, "\t", $lnl, "\t", $ratio, "\t", $ests, "\t", $pe_radtags, "\t", $blast_hits, "\t", $geno_cnt, "\t", $row->{'chr'}, "\t", $row->{'bp'}, "\t", $ref_type, "\t", $ref_id, "\n"; } close($fh); `mysql --defaults-file=$cnf $db -e "DROP TABLE IF EXISTS catalog_index"`; `mysql --defaults-file=$cnf $db < $sql_cat_table`; import_sql_file($catalog_file, 'catalog_index'); if (scalar(keys %chrs) > 0) { my ($batch_id, $chr, $max); my ($fh, $chr_file) = tempfile("chr_index_XXXXXXXX", UNLINK => 1, TMPDIR => 1); foreach $batch_id (sort keys %chrs) { foreach $chr (sort keys %{$chrs{$batch_id}}) { # # Round up the maximum chromosome length to the nearest megabase. # $max = int($chrs{$batch_id}->{$chr} / 1000000); $max += $chrs{$batch_id}->{$chr} % 1000000 > 0 ? 1 : 0; print $fh "0\t", $batch_id, "\t", $chr, "\t", $max, "\n"; } } `mysql --defaults-file=$cnf $db -e "DROP TABLE IF EXISTS chr_index"`; `mysql --defaults-file=$cnf $db < $sql_chr_table`; import_sql_file($chr_file, 'chr_index'); close($fh); } } sub fetch_catalog_snps { my ($sth, $snps) = @_; my ($row); $sth->{'cat_snps'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'cat_snps'}->fetchrow_hashref()) { if (!defined($snps->{$row->{'batch_id'}})) { $snps->{$row->{'batch_id'}} = {}; } $snps->{$row->{'batch_id'}}->{$row->{'tag_id'}}++; } } sub fetch_genotypes { my ($sth, $genotypes) = @_; my ($row); $sth->{'cat_geno'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'cat_geno'}->fetchrow_hashref()) { if (!defined($genotypes->{$row->{'batch_id'}})) { $genotypes->{$row->{'batch_id'}} = {}; } if ($row->{'genotype'} ne "-" && $row->{'genotype'} ne "--") { $genotypes->{$row->{'batch_id'}}->{$row->{'tag_id'}}++; } } } sub fetch_markers { my ($sth, $markers) = @_; my ($row, $tag); $sth->{'marker'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'marker'}->fetchrow_hashref()) { $tag = {}; $tag->{'marker'} = $row->{'type'}; $tag->{'uncor_marker'} = $row->{'uncor_type'}; $tag->{'chisq_pval'} = $row->{'chisq_pval'}; $tag->{'valid_pro'} = $row->{'progeny'}; $tag->{'ratio'} = $row->{'ratio'}; $tag->{'lnl'} = $row->{'lnl'}; if (!defined($markers->{$row->{'batch_id'}})) { $markers->{$row->{'batch_id'}} = {}; } $markers->{$row->{'batch_id'}}->{$row->{'catalog_id'}} = $tag; } } sub radome_ref { my ($sth, $radome) = @_; my ($row, $key, $strand); $sth->{'radome'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'radome'}->fetchrow_hashref()) { $strand = $row->{'strand'} == 1 ? "+" : "-"; $key = $row->{'chr'} . "|" . $row->{'bp'} . "|" . $strand; if (!defined($radome->{$key}) || $row->{'type'} eq "exon") { $radome->{$key} = {'type' => $row->{'type'}, 'id' => $row->{'id'}}; } } } sub catalog_matches { my ($sth, $parents, $progeny, $alleles) = @_; my ($row, $key); $sth->{'cat_matches'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'cat_matches'}->fetchrow_hashref()) { $key = $row->{'sample_id'} . "_" . $row->{'tag_id'}; if ($row->{'type'} eq "parent" || $row->{'type'} eq "sample") { if (!defined($parents->{$row->{'batch_id'}})) { $parents->{$row->{'batch_id'}} = {}; } if (!defined($parents->{$row->{'batch_id'}}->{$row->{'catalog_id'}})) { $parents->{$row->{'batch_id'}}->{$row->{'catalog_id'}} = {}; } $parents->{$row->{'batch_id'}}->{$row->{'catalog_id'}}->{$key}++; } elsif ($row->{'type'} eq "progeny") { if (!defined($progeny->{$row->{'batch_id'}})) { $progeny->{$row->{'batch_id'}} = {}; } if (!defined($progeny->{$row->{'batch_id'}}->{$row->{'catalog_id'}})) { $progeny->{$row->{'batch_id'}}->{$row->{'catalog_id'}} = {}; } $progeny->{$row->{'batch_id'}}->{$row->{'catalog_id'}}->{$key}++; } if (!defined($alleles->{$row->{'batch_id'}})) { $alleles->{$row->{'batch_id'}} = {}; } if (!defined($alleles->{$row->{'batch_id'}}->{$row->{'catalog_id'}})) { $alleles->{$row->{'batch_id'}}->{$row->{'catalog_id'}} = {}; } $alleles->{$row->{'batch_id'}}->{$row->{'catalog_id'}}->{$row->{'allele'}}++; } } sub sequence_matches { my ($sth, $seqs, $hits) = @_; my ($row); $sth->{'cat_seqs'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'cat_seqs'}->fetchrow_hashref()) { if (!defined($seqs->{$row->{'batch_id'}})) { $seqs->{$row->{'batch_id'}} = {}; } if (!defined($seqs->{$row->{'batch_id'}}->{$row->{'catalog_id'}})) { $seqs->{$row->{'batch_id'}}->{$row->{'catalog_id'}} = {}; } $seqs->{$row->{'batch_id'}}->{$row->{'catalog_id'}}->{$row->{'type'}}++; } $sth->{'cat_hits'}->execute() or die("Unable to select results from $db.\n"); while ($row = $sth->{'cat_hits'}->fetchrow_hashref()) { if (!defined($hits->{$row->{'batch_id'}})) { $hits->{$row->{'batch_id'}} = {}; } $hits->{$row->{'batch_id'}}->{$row->{'catalog_id'}}++; } } sub gen_tag_index { my ($sth) = @_; die ("Unable to find SQL file: '$sql_tag_table'\n") if (!-e $sql_tag_table); my ($fh, $tag_file) = tempfile("tag_index_XXXXXXXX", UNLINK => 1, TMPDIR => 1); print STDERR "Generating unique tag index...\n"; my ($sample_row, $tags_row, $row, $catalog_id, $i, $num_samples); $sth->{'sample'}->execute() or die("Unable to select results from $db.\n"); $num_samples = $sth->{'sample'}->rows(); $i = 1; while ($sample_row = $sth->{'sample'}->fetchrow_hashref()) { print STDERR "Processing sample $i of $num_samples \r"; my (%depth, %snps, %cats); $sth->{'tags'}->execute($sample_row->{'batch_id'}, $sample_row->{'sample_id'}) or die("Unable to select results from $db.\n"); fetch_depth_counts($sth, \%depth, $sample_row->{'sample_id'}); fetch_snp_counts($sth, \%snps, $sample_row->{'sample_id'}); fetch_catalog_ids($sth, \%cats, $sample_row->{'batch_id'}, $sample_row->{'sample_id'}); while ($tags_row = $sth->{'tags'}->fetchrow_hashref()) { print $fh "0\t", $sample_row->{'batch_id'}, "\t", $sample_row->{'sample_id'}, "\t", $tags_row->{'tag_id'}, "\t", $tags_row->{'id'}, "\t", $depth{$tags_row->{'tag_id'}}, "\t", defined($snps{$tags_row->{'tag_id'}}) ? $snps{$tags_row->{'tag_id'}} : 0, "\t", $cats{$tags_row->{'tag_id'}}, "\t", $tags_row->{'deleveraged'}, "\t", $tags_row->{'blacklisted'}, "\t", $tags_row->{'removed'}, "\n"; } $i++; } print STDERR "\n"; close($fh); `mysql --defaults-file=$cnf $db -e "DROP TABLE IF EXISTS tag_index"`; `mysql --defaults-file=$cnf $db < $sql_tag_table`; import_sql_file($tag_file, 'tag_index'); } sub fetch_depth_counts { my ($sth, $depths, $sample_id) = @_; my ($row); # # Determine the depth of coverage for the RAD-Tags in this sample # $sth->{'depth'}->execute($sample_id) or die("Unable to select results from $db.\n"); while ($row = $sth->{'depth'}->fetchrow_hashref()) { $depths->{$row->{'tag_id'}}++; } } sub fetch_snp_counts { my ($sth, $snps, $sample_id) = @_; my ($row); # # Determine the number of SNPs contained within each RAD-Tag in this sample. # $sth->{'snps'}->execute($sample_id) or die("Unable to select results from $db.\n"); while ($row = $sth->{'snps'}->fetchrow_hashref()) { $snps->{$row->{'tag_id'}}++; } } sub fetch_catalog_ids { my ($sth, $cats, $batch_id, $sample_id) = @_; my ($row); # # Determine the catalog ID that corresponds to the RAD-Tags in this sample # $sth->{'match'}->execute($batch_id, $sample_id) or die("Unable to select results from $db.\n"); while ($row = $sth->{'match'}->fetchrow_hashref()) { $cats->{$row->{'tag_id'}} = $row->{'catalog_id'}; } } sub import_sql_file { my ($file, $table) = @_; my (@results); @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table"`; print STDERR "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$file' INTO TABLE $table\"\n", @results; } sub prepare_sql_handles { my ($sth, $outg) = @_; # # Connect to the database, check for the existence of a MySQL config file in the home # directory first, otherwise use the stacks-distributed one. # $sth->{'dbh'} = DBI->connect("DBI:mysql:$db:mysql_read_default_file=$cnf") or die("Unable to connect to the $db MySQL Database!\n" . $DBI::errstr); my $query; $query = "SELECT batch_id, id as sample_id, type FROM samples"; $sth->{'sample'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT unique_tags.id as id, tag_id, seq, deleveraged, blacklisted, removed FROM unique_tags " . "JOIN samples ON (unique_tags.sample_id=samples.id) " . "WHERE relationship='consensus' AND samples.batch_id=? AND unique_tags.sample_id=?"; $sth->{'tags'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT tag_id FROM unique_tags " . "WHERE relationship!='consensus' AND relationship != 'model' AND unique_tags.sample_id=?"; $sth->{'depth'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT tag_id FROM snps " . "JOIN samples ON (snps.sample_id=samples.id) " . "WHERE snps.type='E' AND samples.id=?"; $sth->{'snps'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT tag_id, catalog_id FROM matches " . "WHERE batch_id=? AND sample_id=?"; $sth->{'match'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT catalog_tags.id as id, tag_id, batch_id, chr, bp, strand, seq FROM catalog_tags " . "JOIN batches ON (catalog_tags.batch_id=batches.id) WHERE relationship='consensus'"; $sth->{'cat_tags'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT batch_id, tag_id FROM catalog_snps WHERE type='E'"; $sth->{'cat_snps'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT batch_id, catalog_id as tag_id, sample_id, genotype FROM catalog_genotypes"; $sth->{'cat_geno'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT batch_id, catalog_id, type, uncor_type, progeny, chisq_pval, ratio, lnl FROM markers"; $sth->{'marker'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT samples.batch_id, catalog_id, tag_id, matches.sample_id, allele, type FROM matches " . "JOIN samples ON (samples.id=matches.sample_id)"; $sth->{'cat_matches'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); # "JOIN samples ON (samples.sample_id=matches.sample_id AND samples.batch_id=matches.batch_id)"; $query = "SELECT batch_id, catalog_id, type FROM sequence"; $sth->{'cat_seqs'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT batch_id, catalog_id FROM sequence_blast"; $sth->{'cat_hits'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); $query = "SELECT id, chr, bp, strand, type FROM ref_radome"; $sth->{'radome'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); } sub close_sql_handles { my ($sth) = @_; my $key; foreach $key (keys %{$sth}) { next if ($key =~ /dbh/); $sth->{$key}->finish(); } foreach $key (keys %{$sth}) { next if ($key !~ /dbh/); $sth->{$key}->disconnect(); } } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-D$/) { $db = shift @ARGV; } elsif ($_ =~ /^-s$/) { $sql_path = shift @ARGV; } elsif ($_ =~ /^-c$/) { $catalog_index++; } elsif ($_ =~ /^-t$/) { $tag_index++; } elsif ($_ =~ /^-r$/) { $radome_index++; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line options received: $_\n"; usage(); } } if (length($db) == 0) { print STDERR "You must specify a database to index.\n"; usage(); } $sql_path .= "/" if (substr($sql_path, -1, 1) ne "/"); $sql_tag_table = $sql_path . "tag_index.sql"; $sql_cat_table = $sql_path . "catalog_index.sql"; $sql_chr_table = $sql_path . "chr_index.sql"; } sub version { print STDERR "index_radtags.pl ", stacks_version, "\n"; } sub usage { version(); print << "EOQ"; index_radtags.pl -D db [-c] [-t] [-s path] [-d] [-h] D: radtag database to examine. c: generate a catalog index. t: generate a unique tags index. s: path to SQL definition files for catalog/tag index tables (if not in default, installed location). h: display this help message. d: turn on debug output. EOQ exit(0); } stacks-1.35/scripts/load_radtags.pl000644 000765 000024 00000035362 12533677757 020211 0ustar00catchenstaff000000 000000 #!/usr/bin/env perl # # Copyright 2011-2014, Julian Catchen # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Load a set of output files from the Stacks pipeline into a Stacks MySQL database. # # By Julian Catchen # use strict; use POSIX; use File::Temp qw/ mktemp /; use File::Spec; use constant stacks_version => "_VERSION_"; my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf"; my $dry_run = 0; my $db = ""; my $in_path = "."; my $sample_id = 0; my $desc = ""; my $date = ""; my $batch_id = 0; my $batch = 0; my $catalog = 0; my $stacks_type = ""; my $popmap_path = ""; my $ignore_tags = 0; my $white_list = ""; parse_command_line(); my $cnf = (-e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config; if (length($date) == 0) { $date = strftime("%Y-%m-%d", (localtime(time))); } my (@results, @files, @catalog, @parent_ids, @pop_ids, %pops, %sample_ids); build_file_list(\@files, \@catalog); extract_parental_ids(scalar(@files), \@catalog, \@parent_ids); extract_sample_ids(\@files, \%sample_ids); parse_population_map(\@files, \%sample_ids, \@pop_ids, \%pops); print STDERR "Stacks pipeline type: '", $stacks_type, "'\n", scalar(@files), " files to process: ", join(", ", @files), "\n", scalar(@catalog), " catalog files to process: ", join(", ", @catalog), "\n"; if ($stacks_type eq "map") { print STDERR scalar(@parent_ids), " parent IDs identified: ", join(", ", @parent_ids), "\n"; } if ($batch) { if (!$dry_run) { @results = `mysql --defaults-file=$cnf $db -e "INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$stacks_type'"`; } print STDERR "mysql --defaults-file=$cnf $db ", "-e \"INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$stacks_type'\"\n", @results; } my ($file, $f, $i, $cnt, $type, $pop_id); # # Import the catalog # if ($catalog) { foreach $file (@catalog) { $f = $in_path . "/$file" . ".catalog.tags.tsv"; if (-e $f) { import_sql_file($f, "catalog_tags", 1); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".catalog.tags.tsv.gz"; import_gzsql_file($f, "catalog_tags", 1); } $f = $in_path . "/$file" . ".catalog.snps.tsv"; if (-e $f) { import_sql_file($f, "catalog_snps", 1); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".catalog.snps.tsv.gz"; import_gzsql_file($f, "catalog_snps", 1); } $f = $in_path . "/$file" . ".catalog.alleles.tsv"; if (-e $f) { import_sql_file($f, "catalog_alleles", 1); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".catalog.alleles.tsv.gz"; import_gzsql_file($f, "catalog_alleles", 1); } } } if ($stacks_type eq "map") { $f = "$in_path/batch_" . $batch_id . ".markers.tsv"; import_sql_file($f, "markers", 1); $f = "$in_path/batch_" . $batch_id . ".genotypes_1.txt"; import_sql_file($f, "catalog_genotypes", 1); } elsif ($stacks_type eq "population") { $f = "$in_path/batch_" . $batch_id . ".markers.tsv"; import_sql_file($f, "markers", 1); $f = "$in_path/batch_" . $batch_id . ".sumstats.tsv"; import_sql_file($f, "sumstats", scalar(keys %pops) + 1); $f = "$in_path/batch_" . $batch_id . ".hapstats.tsv"; import_sql_file($f, "hapstats", scalar(keys %pops) + 1); # # Import the Fst files. # my $fst_cnt = 0; my (@keys, $m, $n); @keys = sort keys %pops; for ($m = 0; $m < scalar(@keys); $m++) { for ($n = 0; $n < scalar(@keys); $n++) { $f = "$in_path/batch_" . $batch_id . ".fst_" . $keys[$m] . "-" . $keys[$n] . ".tsv"; if (-e $file) { import_sql_file($f, "fst", 1); $fst_cnt++; } } } print STDERR "Imported $fst_cnt SNP Fst file(s).\n"; # # Import the Phi_st files. # $fst_cnt = 0; for ($m = 0; $m < scalar(@keys); $m++) { for ($n = 0; $n < scalar(@keys); $n++) { $f = "$in_path/batch_" . $batch_id . ".phistats_" . $keys[$m] . "-" . $keys[$n] . ".tsv"; if (-e $file) { import_sql_file($f, "phist", 3); $fst_cnt++; } } } print STDERR "Imported $fst_cnt Haplotype Fst file(s).\n"; } $i = 1; $cnt = scalar(@files); foreach $file (sort {$sample_ids{$a} <=> $sample_ids{$b}} @files) { print STDERR "Processing sample $i of $cnt\n"; $f = $in_path . "/$file" . ".matches.tsv"; if (-e $f) { import_sql_file($f, "matches", 1); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".matches.tsv.gz"; import_gzsql_file($f, "matches", 1); } $i++; } $i = 1; foreach $file (sort {$sample_ids{$a} <=> $sample_ids{$b}} @files) { print STDERR "Processing sample $i of $cnt\n"; # # Pull out the sample ID and insert it into the database # $sample_id = $sample_ids{$file}; if ($stacks_type eq "map") { $type = (grep(/^$sample_id$/, @parent_ids) > 0) ? 'parent' : 'progeny'; } else { $type = "sample"; } $pop_id = shift(@pop_ids); if (!$dry_run) { @results = `mysql --defaults-file=$cnf $db -e "INSERT INTO samples SET id=$sample_id, sample_id=$sample_id, batch_id=$batch_id, type='$type', file='$file', pop_id='$pop_id'"`; } print STDERR "mysql --defaults-file=$cnf $db ", "-e \"INSERT INTO samples SET id=$sample_id, sample_id=$sample_id, batch_id=$batch_id, type='$type', file='$file', pop_id='$pop_id'\"\n", @results; $f = $in_path . "/$file" . ".tags.tsv"; if (-e $f) { import_sql_file($f, "unique_tags", 1) if ($ignore_tags == 0); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".tags.tsv.gz"; import_gzsql_file($f, "unique_tags", 1) if ($ignore_tags == 0); } $f = $in_path . "/$file" . ".snps.tsv"; if (-e $f) { import_sql_file($f, "snps", 1); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".snps.tsv.gz"; import_gzsql_file($f, "snps", 1); } $f = $in_path . "/$file" . ".alleles.tsv"; if (-e $f) { import_sql_file($f, "alleles", 1); } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".alleles.tsv.gz"; import_gzsql_file($f, "alleles", 1); } $i++; } print STDERR "\nDon't forget to index your Stacks database -- run index_radtags.pl\n\n"; sub parse_population_map { my ($samples, $sample_ids, $pop_ids, $pops) = @_; my ($fh, @parts, $line, %ids, $file, $path); if (length($popmap_path) == 0) { foreach $path (@{$samples}) { push(@{$pop_ids}, 1); $pops->{1}++; } return; } open($fh, "<$popmap_path") or die("Unable to open population map, '$popmap_path', $!\n"); while ($line = <$fh>) { chomp $line; @parts = split(/\t/, $line); if (scalar(@parts) > 3) { die("Unable to parse population map, '$popmap_path' (map should contain no more than three columns).\n"); } $ids{$parts[0]} = $parts[1]; } foreach $file (sort {$sample_ids->{$a} <=> $sample_ids->{$b}} @{$samples}) { if (!defined($ids{$file})) { die("Unable to find '$file' in the population map, '$popmap_path'.\n"); } push(@{$pop_ids}, $ids{$file}); $pops->{$ids{$file}}++; } print STDERR "Parsed population map: ", scalar(@{$samples}), " files in ", scalar(keys %pops), " populations.\n"; close($fh); } sub extract_parental_ids { my ($sample_cnt, $catalog, $parental_ids) = @_; my ($fh, $prefix, $path, $line, @parts, $tag_id, @tag_ids, $id, $tag, %ids); print STDERR "Scanning catalog for sample IDs..."; foreach $prefix (@catalog) { $path = $in_path . "/" . $prefix . ".catalog.tags.tsv"; if (-e $path) { open($fh, "<$path") or die("Unable to open catalog file: '$path', $!\n"); } elsif (-e $path . ".gz") { open($fh, "gunzip -c " . $path . ".gz |") or die("Unable to open catalog file: '$path', $!\n"); } while ($line = <$fh>) { chomp $line; @parts = split(/\t/, $line); @tag_ids = split(/,/, $parts[8]); foreach $tag_id (@tag_ids) { ($id, $tag) = split(/_/, $tag_id); $ids{$id}++; } } close($fh); } @{$parental_ids} = keys %ids; # # Determine the type of pipeline run: either a 'map' or a 'population' type. # If all samples are parental, i.e. in the catalog, then this is a population type # otherwise, it is a map type. # if (length($stacks_type) == 0) { $stacks_type = (scalar(@{$parental_ids}) == $sample_cnt) ? "population" : "map"; } print STDERR "done.\n"; } sub extract_sample_ids { my ($files, $sample_ids) = @_; my ($file, $f, $line, @results, @parts); print STDERR "Collecting sample IDs from Stacks output files..."; foreach $file (@{$files}) { $f = $in_path . "/$file" . ".tags.tsv"; if (-e $f) { @results = `head -n 2 $f | tail -n 1`; } elsif (-e $f . ".gz") { $f = $in_path . "/$file" . ".tags.tsv.gz"; @results = `gunzip -c $f | head -n 2 | tail -n 1`; } else { die("Unable to find file $f\n"); } chomp $results[0]; @parts = split(/\t/, $results[0]); # # Sample ID is expected to be the first column in the *.tags.tsv file. # $sample_ids->{$file} = $parts[1]; } print STDERR "done.\n"; } sub import_sql_file { my ($file, $table, $skip_lines) = @_; my (@results, $ignore); if (!-e $file) { print STDERR "File '$file' does not exist.\n"; return; } $ignore = " IGNORE $skip_lines LINES" if ($skip_lines > 0); if (!$dry_run) { @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table$ignore"`; } print STDERR "mysql --defaults-file=$cnf $db ", "-e \"LOAD DATA LOCAL INFILE '$file' INTO TABLE $table$ignore\"\n", @results; } sub import_gzsql_file { my ($file, $table, $skip_lines) = @_; my (@results, $ignore); if (!-e $file) { print STDERR "File '$file' does not exist.\n"; return; } $ignore = "IGNORE $skip_lines LINES" if ($skip_lines > 0); # # Get a temporary file name and create a named pipe. # my $tmpdir = File::Spec->tmpdir(); my $named_pipe = mktemp($tmpdir . "/denovo_map_XXXXXX"); if ($dry_run == 0) { mkfifo($named_pipe, 0700) || die("Unable to create named pipe for loading gzipped data: $named_pipe, $!"); } print STDERR "Streaming $file into named pipe $named_pipe.\n"; # # Dump our gzipped data onto the named pipe. # system("gunzip -c $file > $named_pipe &") if ($dry_run == 0); @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore"` if ($dry_run == 0); print STDERR "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore\"\n", @results; # # Remove the pipe. # unlink($named_pipe) if ($dry_run == 0); } sub build_file_list { my ($files, $catalog_files) = @_; my (@wl, @ls, $line, $prefix); # Load a white list of files to process if it is supplied. if (length($white_list) > 0) { load_white_list(\@wl); } @ls = `ls -1 $in_path/*.tags.tsv* 2> /dev/null`; if (scalar(@ls) == 0) { print STDERR "Unable to locate any input files to process within '$in_path'\n"; usage(); } foreach $line (@ls) { chomp $line; if ($line =~ /\.tags\.tsv\.gz$/) { ($prefix) = ($line =~ /$in_path\/(.+)\.tags\.tsv\.gz/); } else { ($prefix) = ($line =~ /$in_path\/(.+)\.tags\.tsv/); } next if ($prefix =~ /catalog/); if (scalar(@wl) > 0) { next if (!grep(/^$prefix$/, @wl)); } push(@{$files}, $prefix); } if ($catalog > 0) { @ls = `ls -1 $in_path/*.catalog.tags.tsv* 2> /dev/null`; if (scalar(@ls) == 0) { print STDERR "Unable to locate any catalog input files to process within '$in_path'\n"; usage(); } foreach $line (@ls) { chomp $line; if ($line =~ /\.catalog\.tags\.tsv\.gz$/) { ($prefix) = ($line =~ /$in_path\/(.+)\.catalog\.tags\.tsv\.gz/); } else { ($prefix) = ($line =~ /$in_path\/(.+)\.catalog\.tags\.tsv/); } if (scalar(@wl) > 0) { next if (!grep(/^$prefix$/, @wl)); } push(@{$catalog_files}, $prefix); } } } sub load_white_list { my ($wl) = @_; open(WHITE, "<" . $white_list) or die("Unable to open white list file '$white_list': $!\n"); my $line = ""; while ($line = ) { chomp $line; next if (length($line) == 0); push(@{$wl}, $line); } close(WHITE); } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-p$/) { $in_path = shift @ARGV; } elsif ($_ =~ /^-D$/) { $db = shift @ARGV; } elsif ($_ =~ /^-c$/) { $catalog++; } elsif ($_ =~ /^-B$/) { $batch++; } elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; } elsif ($_ =~ /^-e$/) { $desc = shift @ARGV; } elsif ($_ =~ /^-a$/) { $date = shift @ARGV; } elsif ($_ =~ /^-W$/) { $white_list = shift @ARGV; } elsif ($_ =~ /^-t$/) { $stacks_type = shift @ARGV; } elsif ($_ =~ /^-M$/) { $popmap_path = shift @ARGV; } elsif ($_ =~ /^-U$/) { $ignore_tags++; } elsif ($_ =~ /^-d$/) { $dry_run++; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } $in_path = substr($in_path, 0, -1) if (substr($in_path, -1) eq "/"); if (length($db) == 0) { print STDERR "You must specify a database.\n"; usage(); } } sub version { print STDERR "load_radtags.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR < # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # use strict; use DBI; use constant stacks_version => "_VERSION_"; my $debug = 0; my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf"; my $db = ""; my $batch_id = 0; my $in_path = ""; my $seq_path = ""; my $seq_type = ""; parse_command_line(); my (%sth); prepare_sql_handles(\%sth); if ($seq_type eq "est") { load_ests(\%sth); } elsif ($seq_type eq "pe_radtag") { load_pe_radtags(\%sth); } else { print STDERR "Unknown sequence type specified '$seq_type'\n"; } sub load_pe_radtags { my ($sth) = @_; # # Open the file containing sequences and load them into the database. # my ($fh, $line, $id, $buf, $catalog_id, $seq_id, $count); open($fh, "<$seq_path") or die("Unable to open FASTA input file: '$seq_path', $!\n"); $count = 0; while ($line = <$fh>) { chomp $line; if (substr($line, 0, 1) eq ">") { if (length($buf) > 0) { ($catalog_id, $seq_id) = split(/\|/, $id); $sth->{'load'}->execute($batch_id, $catalog_id, $seq_type, $seq_id, $buf) or die("Unable to insert results into $db.\n"); $count++; $buf = ""; } $id = substr($line, 1); } else { $buf .= $line; } } if (length($buf) > 0 && length($id) > 0) { ($catalog_id, $seq_id) = split(/\|/, $id); $sth->{'load'}->execute($batch_id, $catalog_id, $seq_type, $seq_id, $buf) or die("Unable to insert results into $db.\n"); $count++; } print STDERR "Loaded $count RAD-Tag paired-end contigs into the database.\n"; close($fh); close_sql_handles(\%sth); } sub load_ests { my ($sth) = @_; # # Load a TSV file of BLAST hits linking sequences to markers in the catalog. # my %hits; load_blast_hits(\%hits); my ($fh, $line, $id, $buf, $catalog_id, $count); # # Open the file containing sequences. For each sequence that has a BLAST # hit to a marker in the catalog, load the sequence into the database. # open($fh, "<$seq_path") or die("Unable to open FASTA input file: '$seq_path', $!\n"); $count = 0; while ($line = <$fh>) { chomp $line; if (substr($line, 0, 1) eq ">") { if (length($buf) > 0) { if (defined($hits{$id})) { foreach $catalog_id (@{$hits{$id}}) { $sth->{'load'}->execute($batch_id, $catalog_id, $seq_type, $id, $buf) or die("Unable to insert results into $db.\n"); $count++; } } $buf = ""; } $id = substr($line, 1); } else { $buf .= $line; } } if (length($buf) > 0 && length($id) > 0) { if (defined($hits{$id})) { foreach $catalog_id (@{$hits{$id}}) { $sth->{'load'}->execute($batch_id, $catalog_id, $seq_type, $id, $buf) or die("Unable to insert results into $db.\n"); $count++; } } } print STDERR "Loaded $count EST sequences into the database.\n"; close($fh); close_sql_handles(\%sth); } sub load_blast_hits { my ($hits) = @_; my (@parts, $line); open(HITS, "<" . $in_path) or die("Unable to open BLAST hits file '$in_path' $!\n"); # # First line is a header. # $line = ; while ($line = ) { chomp $line; @parts = split(/\t/, $line); if (!defined($hits->{$parts[7]})) { $hits->{$parts[7]} = []; } if (!grep(/^$parts[2]$/, @{$hits->{$parts[7]}})) { push(@{$hits->{$parts[7]}}, $parts[2]); } } print STDERR "Loaded ", scalar(keys %{$hits}), " distinct BLAST hits.\n"; close(HITS); } sub prepare_sql_handles { my ($sth) = @_; my $cnf = (defined($ENV{"HOME"}) && -e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config; $sth->{'dbh'} = DBI->connect("DBI:mysql:$db:mysql_read_default_file=$cnf") or die("Unable to connect to the $db MySQL Database!\n" . $DBI::errstr); my $query; $query = "INSERT INTO sequence SET batch_id=?, catalog_id=?, type=?, seq_id=?, seq=?"; $sth->{'load'} = $sth->{'dbh'}->prepare($query) or die($sth->{'dbh'}->errstr()); } sub close_sql_handles { my ($sth) = @_; my $key; foreach $key (keys %{$sth}) { next if ($key =~ /dbh/); $sth->{$key}->finish(); } foreach $key (keys %{$sth}) { next if ($key !~ /dbh/); $sth->{$key}->disconnect(); } } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-p$/) { $in_path = shift @ARGV; } elsif ($_ =~ /^-f$/) { $seq_path = shift @ARGV; } elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; } elsif ($_ =~ /^-D$/) { $db = shift @ARGV; } elsif ($_ =~ /^-t$/) { $seq_type = shift @ARGV; } elsif ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } if (length($seq_type) == 0) { print STDERR "You must specify the sequence type.\n"; usage(); } if ($batch_id == 0) { print STDERR "You must specify the batch ID.\n"; usage(); } } sub version { print STDERR "load_sequences.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR < # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Process the data for a genetic map: build stacks in parents and progeny, # create a catalog from the parents, and match progeny against the catatlog. # Call genotypes, and load all data into an MySQL database along the way. # # For the database interactions to work, the 'mysql' program is expected to be # on the path and sufficient permissions set to access the specified database. # use strict; use POSIX; use File::Temp qw/ mktemp /; use File::Spec; use constant stacks_version => "_VERSION_"; my $dry_run = 0; my $sql = 1; my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf"; my $exe_path = "_BINDIR_"; my $out_path = ""; my $popmap_path = ""; my $db = ""; my $data_type = "map"; my $batch_id = -1; my $sample_id = 1; my $desc = ""; # Database description of this dataset my $date = ""; # Date relevent to this data, formatted for SQL: 2009-05-31 my @parents; my @progeny; my @samples; my (@_pstacks, @_cstacks, @_sstacks, @_genotypes, @_populations); my $cmd_str = $0 . " " . join(" ", @ARGV); parse_command_line(); check_input_files(\@parents, \@progeny, \@samples); my $cnf = (-e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config; # # Check for the existence of the necessary pipeline programs # die ("Unable to find '" . $exe_path . "pstacks'.\n") if (!-e $exe_path . "pstacks" || !-x $exe_path . "pstacks"); die ("Unable to find '" . $exe_path . "cstacks'.\n") if (!-e $exe_path . "cstacks" || !-x $exe_path . "cstacks"); die ("Unable to find '" . $exe_path . "sstacks'.\n") if (!-e $exe_path . "sstacks" || !-x $exe_path . "sstacks"); die ("Unable to find '" . $exe_path . "genotypes'.\n") if (!-e $exe_path . "genotypes" || !-x $exe_path . "genotypes"); die ("Unable to find '" . $exe_path . "populations'.\n") if (!-e $exe_path . "populations" || !-x $exe_path . "populations"); die ("Unable to find '" . $exe_path . "index_radtags.pl'.\n") if (!-e $exe_path . "index_radtags.pl" || !-x $exe_path . "index_radtags.pl"); my ($i, $log, $log_fh, $pipe_fh, $pfile, $file, $num_files, $parent, $sample, %map); $i = 1; $num_files = scalar(@parents) + scalar(@progeny) + scalar(@samples); my (@types, $type, @pop_ids, $pop, %pops, @grp_ids, $grp, %grps); parse_population_map(\@samples, \@pop_ids, \%pops, \@grp_ids, \%grps) if ($data_type eq "population"); foreach $parent (@parents) { push(@types, "parent"); push(@pop_ids, "1"); push(@grp_ids, "1"); } foreach $parent (@progeny) { push(@types, "progeny"); push(@pop_ids, "1"); push(@grp_ids, "1"); } foreach $parent (@samples) { push(@types, "sample"); } my (@results, $cmd, $pop_cnt); $pop_cnt = scalar(keys %pops); # # Open the log file # $log = "$out_path/ref_map.log"; open($log_fh, ">$log") or die("Unable to open log file '$log'; $!\n"); print $log_fh "ref_map.pl version ", stacks_version, " started at ", strftime("%Y-%m-%d %H:%M:%S",(localtime(time))), "\n", $cmd_str, "\n"; if ($sql == 1) { # # SQL Batch ID for this set of Radtags, along with description and date of # sequencing. Insert this batch data into the database. # `mysql --defaults-file=$cnf $db -e "INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$data_type'"` if ($dry_run == 0); print $log_fh "mysql --defaults-file=$cnf $db -e \"INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$data_type'\"\n"; } my $gzip = 0; foreach $sample (@parents, @progeny, @samples) { my ($ftype, $pfile) = ""; my ($prefix, $suffix) = ($sample =~ /^(.+)\.(.+)$/); if ($prefix =~ /^.*\/.+$/) { ($pfile) = ($prefix =~ /^.*\/(.+)$/); } else { $pfile = $prefix; } if ($suffix =~ /^bowtie$/) { $ftype = "bowtie"; } elsif ($suffix =~ /^sam$/) { $ftype = "sam"; } elsif ($suffix =~ /^bam$/) { $ftype = "bam"; $gzip = 1; } elsif ($suffix =~ /^map$/) { $ftype = "tsv"; } else { die("Unknown input file type.\n"); } $type = shift @types; $pop = shift @pop_ids; $grp = shift @grp_ids; printf("Identifying unique stacks; file % 3s of % 3s [%s]\n", $i, $num_files, $pfile); printf($log_fh "Identifying unique stacks; file % 3s of % 3s [%s]\n", $i, $num_files, $pfile); if ($sql == 1) { if ($dry_run == 0) { `mysql --defaults-file=$cnf $db -e "INSERT INTO samples SET sample_id=$i, batch_id=$batch_id, type='$type', file='$pfile', pop_id='$pop', group_id='$grp'"`; @results = `mysql --defaults-file=$cnf $db -N -B -e "SELECT id FROM samples WHERE sample_id=$i AND batch_id=$batch_id AND type='$type' AND file='$pfile'"`; chomp $results[0]; $sample_id = $results[0]; } print $log_fh "mysql --defaults-file=$cnf $db -e \"INSERT INTO samples SET sample_id=$i, batch_id=$batch_id, type='$type', file='$pfile', pop_id='$pop', group_id='$grp'\"\n"; } $map{$pfile} = $sample_id; $cmd = $exe_path . "pstacks -t $ftype -f $sample -o $out_path -i $sample_id " . join(" ", @_pstacks) . " 2>&1"; print STDERR " $cmd\n"; print $log_fh "$cmd\n"; @results = `$cmd` if ($dry_run == 0); write_results(\@results, $log_fh); print STDERR " Loading pstacks output to $db..." if ($sql == 1); if ($gzip == 1) { $file = "$out_path/$pfile" . ".tags.tsv.gz"; import_gzsql_file($log_fh, $file, "unique_tags", 1); $file = "$out_path/$pfile" . ".snps.tsv.gz"; import_gzsql_file($log_fh, $file, "snps", 1); $file = "$out_path/$pfile" . ".alleles.tsv.gz"; import_gzsql_file($log_fh, $file, "alleles", 1); } else { $file = "$out_path/$pfile" . ".tags.tsv"; import_sql_file($log_fh, $file, "unique_tags", 1); $file = "$out_path/$pfile" . ".snps.tsv"; import_sql_file($log_fh, $file, "snps", 1); $file = "$out_path/$pfile" . ".alleles.tsv"; import_sql_file($log_fh, $file, "alleles", 1); } print STDERR "done.\n" if ($sql == 1); $i++; $sample_id++ if ($sql == 0); } my ($rid, $pfile, $parents, $cat_file); # # Generate catalog of RAD-Tags # print STDERR "Generating catalog...\n"; foreach $sample (@parents, @samples) { my ($prefix, $suffix) = ($sample =~ /^(.+)\.(.+)$/); if ($prefix =~ /^.*\/.+$/) { ($pfile) = ($prefix =~ /^.*\/(.+)$/); } else { $pfile = $prefix; } $parents .= "-s $out_path/$pfile "; } $cat_file = "batch_" . $batch_id; $cmd = $exe_path . "cstacks -g -b $batch_id -o $out_path $parents " . join(" ", @_cstacks) . " 2>&1"; print STDERR " $cmd\n"; print $log_fh "$cmd\n"; if ($dry_run == 0) { open($pipe_fh, "$cmd |"); while (<$pipe_fh>) { print $log_fh $_; if ($_ =~ /failed/i) { print STDERR "Catalog construction failed.\n"; exit(1); } } close($pipe_fh); } print STDERR " Importing catalog to MySQL database..." if ($sql == 1); if ($gzip == 1) { $file = "$out_path/$cat_file" . ".catalog.tags.tsv.gz"; import_gzsql_file($log_fh, $file, "catalog_tags", 1); $file = "$out_path/$cat_file" . ".catalog.snps.tsv.gz"; import_gzsql_file($log_fh, $file, "catalog_snps", 1); $file = "$out_path/$cat_file" . ".catalog.alleles.tsv.gz"; import_gzsql_file($log_fh, $file, "catalog_alleles", 1); } else { $file = "$out_path/$cat_file" . ".catalog.tags.tsv"; import_sql_file($log_fh, $file, "catalog_tags", 1); $file = "$out_path/$cat_file" . ".catalog.snps.tsv"; import_sql_file($log_fh, $file, "catalog_snps", 1); $file = "$out_path/$cat_file" . ".catalog.alleles.tsv"; import_sql_file($log_fh, $file, "catalog_alleles", 1); } print STDERR "done.\n" if ($sql == 1); # # Match parents and progeny to the catalog # $i = 1; $num_files = scalar(@parents) + scalar(@progeny) + scalar(@samples); foreach $sample (@parents, @progeny, @samples) { my ($prefix, $suffix) = ($sample =~ /^(.+)\.(.+)$/); if ($prefix =~ /^.*\/.+$/) { ($pfile) = ($prefix =~ /^.*\/(.+)$/); } else { $pfile = $prefix; } printf(STDERR "Matching samples to catalog; file % 3s of % 3s [%s]\n", $i, $num_files, $pfile); $rid = $map{$pfile}; $cmd = $exe_path . "sstacks -g -b $batch_id -c $out_path/$cat_file -s $out_path/$pfile -o $out_path " . join(" ", @_sstacks) . " 2>&1"; print STDERR " $cmd\n"; print $log_fh "$cmd\n"; @results = `$cmd` if ($dry_run == 0); print $log_fh @results; print STDERR " Loading sstacks output to $db..." if ($sql == 1); if ($gzip == 1) { $file = "$out_path/" . $pfile . ".matches.tsv.gz"; import_gzsql_file($log_fh, $file, "matches", 1); } else { $file = "$out_path/" . $pfile . ".matches.tsv"; import_sql_file($log_fh, $file, "matches", 1); } print STDERR "done.\n" if ($sql == 1); $i++; } if ($data_type eq "map") { # # Generate a set of observed haplotypes and a set of markers and generic genotypes # printf(STDERR "Generating genotypes...\n"); $cmd = $exe_path . "genotypes -b $batch_id -P $out_path -r 1 -c -s " . join(" ", @_genotypes) . " 2>&1"; print STDERR "$cmd\n"; print $log_fh "$cmd\n"; if ($dry_run == 0) { open($pipe_fh, "$cmd |"); while (<$pipe_fh>) { print $log_fh $_; } close($pipe_fh); } $file = "$out_path/batch_" . $batch_id . ".markers.tsv"; import_sql_file($log_fh, $file, "markers", 1); $file = "$out_path/batch_" . $batch_id . ".genotypes_1.txt"; import_sql_file($log_fh, $file, "catalog_genotypes", 1); } else { printf(STDERR "Calculating population-level summary statistics\n"); $cmd = $exe_path . "populations -b $batch_id -P $out_path -s " . join(" ", @_populations) . " 2>&1"; print STDERR "$cmd\n"; print $log_fh "$cmd\n"; if ($dry_run == 0) { open($pipe_fh, "$cmd |"); while (<$pipe_fh>) { print $log_fh $_; } close($pipe_fh); } $file = "$out_path/batch_" . $batch_id . ".markers.tsv"; import_sql_file($log_fh, $file, "markers", 1); $file = "$out_path/batch_" . $batch_id . ".sumstats.tsv"; import_sql_file($log_fh, $file, "sumstats", $pop_cnt+1); $file = "$out_path/batch_" . $batch_id . ".hapstats.tsv"; import_sql_file($log_fh, $file, "hapstats", $pop_cnt+1); # # Import the Fst files. # my $fst_cnt = 0; my (@keys, $m, $n); @keys = sort keys %pops; for ($m = 0; $m < scalar(@keys); $m++) { for ($n = 0; $n < scalar(@keys); $n++) { $file = "$out_path/batch_" . $batch_id . ".fst_" . $keys[$m] . "-" . $keys[$n] . ".tsv"; if (-e $file) { import_sql_file($log_fh, $file, "fst", 1); $fst_cnt++; } } } print STDERR "Imported $fst_cnt SNP Fst file(s).\n"; # # Import the Phi_st files. # $fst_cnt = 0; for ($m = 0; $m < scalar(@keys); $m++) { for ($n = 0; $n < scalar(@keys); $n++) { $file = "$out_path/batch_" . $batch_id . ".phistats_" . $keys[$m] . "-" . $keys[$n] . ".tsv"; if (-e $file) { import_sql_file($log_fh, $file, "phist", 3); $fst_cnt++; } } } print STDERR "Imported $fst_cnt Haplotype Fst file(s).\n"; } if ($sql) { # # Index the radtags database # print STDERR "Indexing the database...\n"; $cmd = $exe_path . "index_radtags.pl -D $db -t -c 2>&1"; print STDERR "$cmd\n"; print $log_fh "$cmd\n"; @results = `$cmd` if ($dry_run == 0); print $log_fh @results; } print $log_fh "refmap_map.pl completed at ", strftime("%Y-%m-%d %H:%M:%S",(localtime(time))), "\n"; close($log_fh); sub parse_population_map { my ($samples, $pop_ids, $pops, $grp_ids, $grps) = @_; my ($fh, @parts, $line, %ids, $file, $path); if (length($popmap_path) == 0) { foreach $path (@{$samples}) { push(@{$pop_ids}, "1"); push(@{$grp_ids}, "1"); $pops->{"1"}++; $grps->{"1"}++; } return; } open($fh, "<$popmap_path") or die("Unable to open population map, '$popmap_path', $!\n"); while ($line = <$fh>) { chomp $line; @parts = split(/\t/, $line); if (scalar(@parts) > 3) { die("Unable to parse population map, '$popmap_path' (map should contain no more than three columns).\n"); } $ids{$parts[0]} = $parts[1]; if (scalar(@parts) > 2) { push(@{$grp_ids}, $parts[2]); $grps->{$parts[2]}++; } } if (scalar(keys %{$grps}) == 0) { $grps->{"1"}++; } foreach $path (@{$samples}) { my ($prefix, $suffix); if ($path =~ /^.+\..+\.gz$/) { ($prefix, $suffix) = ($path =~ /^(.+)\.(.+)\.gz$/); } else { ($prefix, $suffix) = ($path =~ /^(.+)\.(.+)$/); } if ($prefix =~ /^.*\/.+$/) { ($file) = ($prefix =~ /^.*\/(.+)$/); } else { $file = $prefix; } if (!defined($ids{$file})) { die("Unable to find '$file' in the population map, '$popmap_path'.\n"); } push(@{$pop_ids}, $ids{$file}); $pops->{$ids{$file}}++; } print STDERR "Parsed population map: ", scalar(@{$samples}), " files in ", scalar(keys %{$pops}); scalar(keys %{$pops}) == 1 ? print STDERR " population" : print STDERR " populations"; print STDERR " and ", scalar(keys %{$grps}); scalar(keys %{$grps}) == 1 ? print STDERR " group.\n" : print STDERR " groups.\n"; close($fh); } sub check_input_files { my ($parents, $progeny, $samples) = @_; # # Check that no duplicate files were specified. # my (%files, $file); foreach $file (@{$parents}, @{$progeny}, @{$samples}) { $files{$file}++; } foreach $file (keys %files) { if ($files{$file} > 1) { print STDERR "A duplicate file was specified which may create undefined results, '$file'\n"; usage(); } } # # Check that all the files exist and are accessible. # foreach $file (@{$parents}) { if (!-e $file) { print STDERR "Unable to locate parental file '$file'\n"; usage(); } } print STDERR "Found ", scalar(@{$parents}), " parental file(s).\n" if (scalar(@{$parents}) > 0); foreach $file (@{$progeny}) { if (!-e $file) { print STDERR "Unable to locate progeny file '$file'\n"; usage(); } } print STDERR "Found ", scalar(@{$progeny}), " progeny file(s).\n" if (scalar(@{$progeny}) > 0); foreach $file (@{$samples}) { if (!-e $file) { print STDERR "Unable to locate sample file '$file'\n"; usage(); } } print STDERR "Found ", scalar(@{$samples}), " sample file(s).\n" if (scalar(@{$samples}) > 0); } sub write_results { my ($results, $log_fh) = @_; my $line; foreach $line (@{$results}) { if ($line =~ /\r/) { $line =~ s/^.+\r(.*\n)$/\1/; } print $log_fh $line; } } sub import_sql_file { my ($log_fh, $file, $table, $skip_lines) = @_; my (@results, $ignore); $ignore = "IGNORE $skip_lines LINES" if ($skip_lines > 0); @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table $ignore"` if ($sql == 1 && $dry_run == 0); if ($sql == 1) { print $log_fh "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$file' INTO TABLE $table $ignore\"\n", @results; } } sub import_gzsql_file { my ($log_fh, $file, $table, $skip_lines) = @_; my (@results, $ignore); $ignore = "IGNORE $skip_lines LINES" if ($skip_lines > 0); # # Get a temporary file name and create a named pipe. # my $tmpdir = File::Spec->tmpdir(); my $named_pipe = mktemp($tmpdir . "/denovo_map_XXXXXX"); if ($sql == 1 && $dry_run == 0) { mkfifo($named_pipe, 0700) || die("Unable to create named pipe for loading gzipped data: $named_pipe, $!"); print $log_fh "Streaming $file into named pipe $named_pipe.\n"; } # # Dump our gzipped data onto the named pipe. # system("gunzip -c $file > $named_pipe &") if ($sql == 1 && $dry_run == 0); @results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore"` if ($sql == 1 && $dry_run == 0); if ($sql == 1) { print $log_fh "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore\"\n", @results; } # # Remove the pipe. # unlink($named_pipe) if ($sql == 1 && $dry_run == 0); } sub parse_command_line { my $arg; while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-p$/) { push(@parents, shift @ARGV); } elsif ($_ =~ /^-r$/) { push(@progeny, shift @ARGV); } elsif ($_ =~ /^-s$/) { push(@samples, shift @ARGV); } elsif ($_ =~ /^-o$/) { $out_path = shift @ARGV; } elsif ($_ =~ /^-D$/) { $desc = shift @ARGV; } elsif ($_ =~ /^-e$/) { $exe_path = shift @ARGV; } elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; } elsif ($_ =~ /^-i$/) { $sample_id = shift @ARGV; } elsif ($_ =~ /^-a$/) { $date = shift @ARGV; } elsif ($_ =~ /^-S$/) { $sql = 0; } elsif ($_ =~ /^-B$/) { $db = shift @ARGV; } elsif ($_ =~ /^-d$/) { $dry_run++; } elsif ($_ =~ /^-O$/) { $popmap_path = shift @ARGV; push(@_populations, "-M " . $popmap_path); } elsif ($_ =~ /^-A$/) { $arg = shift @ARGV; push(@_genotypes, "-t " . $arg); $arg = lc($arg); if ($arg ne "gen" && $arg ne "cp" && $arg ne "f2" && $arg ne "bc1" && $arg ne "dh") { print STDERR "Unknown genetic mapping cross specified: '$arg'\n"; usage(); } } elsif ($_ =~ /^-T$/) { $arg = shift @ARGV; push(@_pstacks, "-p " . $arg); push(@_cstacks, "-p " . $arg); push(@_sstacks, "-p " . $arg); push(@_populations, "-t " . $arg); } elsif ($_ =~ /^-m$/) { push(@_pstacks, "-m " . shift @ARGV); } elsif ($_ =~ /^-n$/) { push(@_cstacks, "-n " . shift @ARGV); } elsif ($_ =~ /^--bound_low$/) { push(@_pstacks, "--bound_low " . shift @ARGV); push(@_pstacks, "--model_type bounded"); } elsif ($_ =~ /^--bound_high$/) { push(@_pstacks, "--bound_high " . shift @ARGV); push(@_pstacks, "--model_type bounded"); } elsif ($_ =~ /^--alpha$/) { push(@_pstacks, "--alpha " . shift @ARGV); } elsif ($_ =~ /^-X$/) { # # Pass an arbitrary command-line option to a pipeline program. # # Command line option must be of the form '-X "program:option"' # $arg = shift @ARGV; my ($prog, $opt) = ($arg =~ /^(\w+):(.+)$/); if ($prog eq "pstacks") { push(@_pstacks, $opt); } elsif ($prog eq "cstacks") { push(@_cstacks, $opt); } elsif ($prog eq "sstacks") { push(@_sstacks, $opt); } elsif ($prog eq "genotypes") { push(@_genotypes, $opt); } elsif ($prog eq "populations") { push(@_populations, $opt); } else { print STDERR "Unknown pipeline program, '$arg'\n"; usage(); } } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } $exe_path = $exe_path . "/" if (substr($exe_path, -1) ne "/"); $out_path = substr($out_path, 0, -1) if (substr($out_path, -1) eq "/"); if ($batch_id !~ /^\d+$/ || $batch_id < 0) { print STDERR "You must specify a batch ID and it must be an integer (e.g. 1, 2, 3).\n"; usage(); } if ($sql > 0 && length($date) == 0) { $date = strftime("%Y-%m-%d", (localtime(time))); } if (scalar(@parents) > 0 && scalar(@samples) > 0) { print STDERR "You must specify either parent or sample files, but not both.\n"; usage(); } if (scalar(@parents) == 0 && scalar(@samples) == 0) { print STDERR "You must specify at least one parent or sample file.\n"; usage(); } if (scalar(@samples) > 0) { $data_type = "population"; } else { $data_type = "map"; } } sub version { print STDERR "ref_map.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR <: lower bound for epsilon, the error rate, between 0 and 1.0 (default 0). --bound_high : upper bound for epsilon, the error rate, between 0 and 1.0 (default 1). --alpha : chi square significance level required to call a heterozygote or homozygote, either 0.1, 0.05 (default), 0.01, or 0.001. Arbitrary command line options: -X "program:option": pass a command line option to one of the pipeline components, e.g.'-X "sstacks:-x"'. EOQ exit(0); } stacks-1.35/scripts/sort_read_pairs.pl000644 000765 000024 00000035110 12533677757 020734 0ustar00catchenstaff000000 000000 #!/usr/bin/env perl # # Copyright 2011-2015, Julian Catchen # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Sort paired-end sequences according to the stacks the non-paired-end # was found in. # # By Julian Catchen # use strict; use constant stacks_version => "_VERSION_"; use constant true => 1; use constant false => 0; my $debug = 0; my $white_list = ""; my $cat_white_list = ""; my $in_path = ""; my $out_path = ""; my $samp_path = ""; my $out_type = "fasta"; my $in_type = "fastq"; my $gzipped = false; parse_command_line(); my (@files, %matches, %stacks, %reads, %marker_wl); build_file_list(\@files); my ($file, $num_files, $i, $key); if (length($cat_white_list) > 0) { load_white_list($cat_white_list, \%marker_wl); print STDERR "Loaded ", scalar(keys %marker_wl), " catalog IDs from '$cat_white_list'\n"; } $num_files = scalar(@files); $i = 1; foreach $file (@files) { printf(STDERR "Loading catalog matches, file % 2s of % 2s [%s]\n", $i, $num_files, $file->{'prefix'}); # # Load the sstacks file, listing matches between the stacks and the catalog # load_matches($in_path, $file, \%matches, \%marker_wl); $i++; } # # Determine which catalog loci have more than a single match from each sample and blacklist them. # print STDERR "Identifying catalog loci that have more than one match from a single sample..."; my %multiple_matches; check_mult_catalog_matches(\%matches, \%multiple_matches); print STDERR "done\n", " These loci will be excluded when collating paired-end reads;\n", " A list of them has been recorded: $out_path/sort_read_pairs.log\n", scalar(keys %matches), " total catalog loci; ", scalar(keys %multiple_matches), " will be excluded, processing ", scalar(keys %matches) - scalar(keys %multiple_matches), " loci.\n"; # # Check if files already exist, if so, exit to prevent adding data to existing files. # my ($cat_id, $path); foreach $cat_id (keys %matches) { # # Check that this catalog ID only has a single match from each sample. # next if (defined($multiple_matches{$cat_id})); $path = $out_path . "/" . $cat_id; $path .= $out_type eq "fasta" ? ".fa" : ".fq"; if (-e $path) { die("Error: output files already exist. This program will append data to files if\n" . "they already exist. Please delete these files and re-execute sort_read_pairs.pl.\n"); } } $i = 1; foreach $file (@files) { printf(STDERR "Processing file % 2s of % 2s [%s]\n", $i, $num_files, $file->{'prefix'}); # # Load the ustacks tag file for each sample, containing the read-to-stack mappings # $stacks{$file->{'prefix'}} = {}; print STDERR " Loading tag file..."; load_stacks($in_path, $file, $stacks{$file->{'prefix'}}); print STDERR "done.\n"; # # Map the read-pairs to the stack/catalog match they correspond to. # $reads{$file->{'prefix'}} = {}; print STDERR " Loading sample file..."; $in_type eq "fastq" ? process_fastq_read_pairs($samp_path, $file, \%stacks, $reads{$file->{'prefix'}}) : process_fasta_read_pairs($samp_path, $file, \%stacks, $reads{$file->{'prefix'}}); print STDERR "done.\n"; print STDERR " Printing results..."; print_results($out_path, \%matches, \%stacks, \%reads, \%multiple_matches); print STDERR "done.\n"; # # Clean up memory usage. # print STDERR " Clearing memory..."; undef(%{$stacks{$file->{'prefix'}}}); undef(%{$reads{$file->{'prefix'}}}); print STDERR " done.\n"; $i++; } sub load_matches { my ($in_path, $in_file, $matches, $marker_wl) = @_; my ($file, $in_fh, $line, @parts, $key); if ($gzipped == true) { $file = $in_path . "/" . $in_file->{'prefix'} . $in_file->{'suffix'} . ".matches.tsv.gz"; open($in_fh, "gunzip -c $file |") or die("Unable to open catalog matches file '$file', $!\n"); } else { $file = $in_path . "/" . $in_file->{'prefix'} . $in_file->{'suffix'} . ".matches.tsv"; open($in_fh, "<$file") or die("Unable to open catalog matches file '$file', $!\n"); } while ($line = <$in_fh>) { chomp $line; @parts = split(/\t/, $line); if (length($cat_white_list) > 0) { next if (!defined($marker_wl->{$parts[2]})); } if (!defined($matches->{$parts[2]})) { $matches->{$parts[2]} = {}; } # # Index by catalog_ID -> sample_ID|stack_ID # $key = $in_file->{'prefix'} . "|" . $parts[4]; $matches->{$parts[2]}->{$key}++; } close($in_fh); } sub load_stacks { my ($in_path, $in_file, $stacks) = @_; my ($file, $in_fh, $line, @parts); if ($gzipped == true) { $file = $in_path . "/" . $in_file->{'prefix'} . $in_file->{'suffix'} . ".tags.tsv.gz"; open($in_fh, "gunzip -c $file |") or die("Unable to open '$file', $!\n"); } else { $file = $in_path . "/" . $in_file->{'prefix'} . $in_file->{'suffix'} . ".tags.tsv"; open($in_fh, "<$file") or die("Unable to open '$file', $!\n"); } while ($line = <$in_fh>) { chomp $line; @parts = split(/\t/, $line); next if ($parts[6] eq "consensus" || $parts[6] eq "model"); # # Index by sequence ID -> stack ID # $stacks->{substr($parts[8], 0, -2)} = $parts[2]; } close($in_fh); } sub process_fastq_read_pairs { my ($in_path, $in_file, $stacks, $reads) = @_; my ($file, $in_fh, $line, $seq, $qual, $key, $read_id); if ($in_file->{'suffix'} eq ".1") { if ($gzipped == true) { $file = $in_path . "/" . $in_file->{'prefix'} . ".2.fq.gz"; open($in_fh, "gunzip -c $file |") or die("Unable to open paired-end input file '$file'\n"); } else { $file = $in_path . "/" . $in_file->{'prefix'} . ".2.fq"; open($in_fh, "<$file") or die("Unable to open paired-end input file '$file'\n"); } } else { if ($gzipped == true) { $file = $in_path . "/" . $in_file->{'prefix'} . ".fq_2.gz"; open($in_fh, "gunzip -c $file |") or die("Unable to open paired-end input file '$file'\n"); } else { $file = $in_path . "/" . $in_file->{'prefix'} . ".fq_2"; open($in_fh, "<$file") or die("Unable to open paired-end input file '$file'\n"); } } while ($line = <$in_fh>) { next if (substr($line, 0, 1) ne "@"); chomp $line; $read_id = substr($line, 1, -2); $seq = <$in_fh>; chomp $seq; # # Read the repeated ID and the quality scores. # <$in_fh>; $qual = <$in_fh>; chomp $qual; $key = $stacks->{$in_file->{'prefix'}}->{$read_id}; next if (!defined($key)); if (!defined($reads->{$key})) { $reads->{$key} = []; } push(@{$reads->{$key}}, {'id' => $read_id, 'seq' => $seq, 'qual' => $qual}); } } sub process_fasta_read_pairs { my ($in_path, $in_file, $stacks, $reads) = @_; my ($file, $in_fh, $line, $seq, $qual, $key, $read_id); if ($in_file->{'suffix'} eq ".1") { if ($gzipped == true) { $file = $in_path . "/" . $in_file->{'prefix'} . ".2.fa.gz"; open($in_fh, "gunzip -c $file |") or die("Unable to open paired-end input file '$file'\n"); } else { $file = $in_path . "/" . $in_file->{'prefix'} . ".2.fa"; open($in_fh, "<$file") or die("Unable to open paired-end input file '$file'\n"); } } else { if ($gzipped == true) { $file = $in_path . "/" . $in_file->{'prefix'} . ".fa_2.gz"; open($in_fh, "gunzip -c $file |") or die("Unable to open paired-end input file '$file'\n"); } else { $file = $in_path . "/" . $in_file->{'prefix'} . ".fa_2"; open($in_fh, "<$file") or die("Unable to open paired-end input file '$file'\n"); } } while ($line = <$in_fh>) { next if (substr($line, 0, 1) ne ">"); chomp $line; $read_id = substr($line, 1, -2); $seq = <$in_fh>; chomp $seq; $key = $stacks->{$in_file->{'prefix'}}->{$read_id}; next if (!defined($key)); if (!defined($reads->{$key})) { $reads->{$key} = []; } push(@{$reads->{$key}}, {'id' => $read_id, 'seq' => $seq, 'qual' => ""}); } } sub print_results { my ($out_path, $matches, $stacks, $reads, $multiple_matches) = @_; my ($path, $cat_id, $sample, $stack_id, $read, $out_fh, $i, @keys, $count, $key, $mult_hits); # # If a catalog ID matches stacks from multiple samples, print them out together. # foreach $cat_id (keys %{$matches}) { # # Check that this catalog ID only has a single match from each sample. # next if (defined($multiple_matches->{$cat_id})); $path = $out_path . "/" . $cat_id; $path .= $out_type eq "fasta" ? ".fa" : ".fq"; open($out_fh, ">>$path") or die("Unable to open $path; '$!'\n"); foreach $key (keys %{$matches->{$cat_id}}) { ($sample, $stack_id) = split(/\|/, $key); foreach $read (@{$reads->{$sample}->{$stack_id}}) { if ($out_type eq "fasta") { print $out_fh ">", $cat_id, "|", $sample, "|", $stack_id, "|", $read->{'id'}, "\n", $read->{'seq'}, "\n"; } else { print $out_fh "@", $cat_id, "|", $sample, "|", $stack_id, "|", $read->{'id'}, "\n", $read->{'seq'}, "\n", "+\n", $read->{'qual'}, "\n"; } } } close($out_fh); } } sub check_mult_catalog_matches { my ($matches, $multiple_matches) = @_; my ($fh, $key, $sample, $stack_id); # # Find catalog loci that have more than a single match from one or more samples # and log those loci that will be excluded. # open($fh, ">$out_path/sort_read_pairs.log") or die("Unable to open log file, $!\n"); print $fh "# The catalog loci listed below have more than a single match from one or more individuals, indicating undermerged or repetitive loci.\n", "# CatalogLocus Sample1:Locus1,Locus2;Sample2:Locus1,Locus2\n"; foreach $cat_id (keys %{$matches}) { my %samples; foreach $key (keys %{$matches->{$cat_id}}) { ($sample, $stack_id) = split(/\|/, $key); push(@{$samples{$sample}}, $stack_id); } my $mult_hits = 0; my $str = ""; foreach $sample (keys %samples) { if (scalar(@{$samples{$sample}}) > 1) { $mult_hits++; $str .= $sample . ":" . join(",", @{$samples{$sample}}) . "; "; } } if ($mult_hits > 0) { print $fh $cat_id, "\t", substr($str, 0, -1), "\n"; $multiple_matches{$cat_id}++; } } close($fh); } sub count_reads { my ($catalog, $reads) = @_; my ($count, $key, $sample, $stack_id); $count = 0; foreach $key (keys %{$catalog}) { ($sample, $stack_id) = split(/\|/, $key); if (defined($reads->{$sample}->{$stack_id})) { $count += scalar(@{$reads->{$sample}->{$stack_id}}); } } return $count; } sub build_file_list { my ($files) = @_; my (@ls, $line, $file, $prefix, $suffix); # Load a white list of files to process if it is supplied. my %wl; if (length($white_list) > 0) { load_white_list($white_list, \%wl); print STDERR "Loaded ", scalar(keys %wl), " filenames from '$white_list'\n"; } @ls = glob("$in_path/*.tags.tsv"); if (scalar @ls == 0) { @ls = glob("$in_path/*.tags.tsv.gz"); $gzipped = true if (scalar @ls > 0); } foreach $line (@ls) { chomp $line; next if (length($line) == 0); next if ($line =~ /batch_\d+\.catalog/); ($file) = ($line =~ /$in_path\/(.+)\.tags\.tsv\.?g?z?$/); if (length($white_list) > 0) { next if (!defined($wl{$file})); } if ($file =~ /\.1$/) { ($prefix, $suffix) = ($file =~ /^(.+)(\.1)$/); } else { $prefix = $file; $suffix = ""; } push(@{$files}, {'prefix' => $prefix, 'suffix' => $suffix}); } } sub load_white_list { my ($list, $wl) = @_; open(WHITE, "<" . $list) or die("Unable to open white list file '$white_list': $!\n"); my $line = ""; while ($line = ) { chomp $line; next if (length($line) == 0); next if ($line =~ /^\s*#/); $wl->{$line}++; } close(WHITE); } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-p$/) { $in_path = shift @ARGV; } elsif ($_ =~ /^-o$/) { $out_path = shift @ARGV; } elsif ($_ =~ /^-s$/) { $samp_path = shift @ARGV; } elsif ($_ =~ /^-t$/) { $out_type = shift @ARGV; } elsif ($_ =~ /^-i$/) { $in_type = shift @ARGV; } elsif ($_ =~ /^-W$/) { $white_list = shift @ARGV; } elsif ($_ =~ /^-w$/) { $cat_white_list = shift @ARGV; } elsif ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line option: '$_'\n"; usage(); } } if ($out_type ne "fasta" && $out_type ne "fastq") { print STDERR "Output type must be either 'fasta' or 'fastq'.\n"; usage(); } if ($in_type ne "fasta" && $in_type ne "fastq") { print STDERR "Input type must be either 'fasta' or 'fastq'.\n"; usage(); } if (length($in_path) == 0) { print STDERR "You must specify a path to the Stacks output files.\n"; usage(); } if (length($out_path) == 0) { print STDERR "You must specify a path to write the collated output files.\n"; usage(); } if (length($samp_path) == 0) { print STDERR "You must specify a path to the paired-end reads.\n"; usage(); } $in_path = substr($in_path, 0, -1) if (substr($in_path, -1) eq "/"); $out_path = substr($out_path, 0, -1) if (substr($out_path, -1) eq "/"); $samp_path = substr($samp_path, 0, -1) if (substr($samp_path, -1) eq "/"); } sub version { print STDERR "sort_read_pairs.pl ", stacks_version, "\n"; } sub usage { version(); print STDERR < # # This file is part of Stacks. # # Stacks is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stacks is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Stacks. If not, see . # # # Execute the Stacks Pipeline exporter script, wait for it to finish, send # a notification email to the person who submitted the export. # # Written by Julian Catchen # use strict; use File::Temp qw/tempfile/; use Net::SMTP; use constant stacks_version => "_VERSION_"; # # Configuration: # exe_path: Path to the export executable. # output_path: Path to web-accessible directory to output the export data # url: URL to reach the directory specified by output_path # local_host: Name of localhost to present to SMTP server # smtp_host: Name of SMTP server through which to send mail # from: email address to use in the 'From' field of the message # my $exe_path = "_BINDIR_" . "export_sql.pl"; my $output_path = "_PKGDATADIR_" . "php/export/"; my $url = "http://stackshost.edu/stacks/export/"; my $local_host = "localhost"; my $smtp_host = "localhost"; my $from = "stacks\@stackshost.edu"; my $debug = 0; my $db = ""; my $batch_id = 0; my $out_file = ""; # Generated file name for output data my $out_type = "tsv"; # Output format my $email = ""; # Email address to send message to my $data_type = "haplo"; # Type of export: observed haplotypes or a genetic map. my $map_type = "geno"; # If exporting a genetic map, specify map type. my $depth_lim = 1; # Minimum stack depth limit my $man_cor = 0; # Include manual corrections in the export my $filter_str = ""; # Comma separated list of filters parse_command_line(); my (@filters, @cmd_opts, $filter); # # Generate file name # my $template = "stacks_export_XXXXXXXX"; my $suffix = $out_type eq "xls" ? ".xls" : ".tsv"; my (undef, $out_file) = tempfile($template, OPEN => 0, DIR => $output_path, SUFFIX => $suffix); # # Prepare the command line parameters # push(@cmd_opts, "-D $db", "-b $batch_id", "-f $out_file", "-o $out_type", "-a $data_type"); if ($data_type eq "geno") { push(@cmd_opts, "-m $map_type"); } if ($data_type eq "geno" && $man_cor > 0) { push(@cmd_opts, "-c"); } if ($data_type eq "haplo" && $depth_lim > 1) { push(@cmd_opts, "-L $depth_lim"); } if (length($filter_str) > 0) { @filters = split(/,/, $filter_str); foreach $filter (@filters) { push(@cmd_opts, "-F $filter"); } } my $cmd = join(" ", @cmd_opts); $cmd = $exe_path . " " . $cmd; print STDERR "CMD: $cmd\n" if ($debug); # # Execute the exporter program # my @results = `$cmd`; #my @results = `echo Success`; # # Check the results, we expext a one line result: either 'Success' or 'Failure' # chomp $results[0]; if ($results[0] eq "Success") { send_email('success', $out_file); } else { send_email('failure', $out_file); } sub send_email { my ($result) = @_; my $smtp = Net::SMTP->new($smtp_host, 'Hello' => $local_host, 'Timeout' => 60); $smtp->mail($from); $smtp->recipient($email); my $msg .= "From: $from\r\n" . "To: $email\r\n" . "Subject: Stacks pipeline export complete\r\n" . "\r\n"; if ($result eq "success") { # # Trim the path off the output file # my ($f) = ($out_file =~ /.*\/(stacks_export_\w{8}\.\w{3})$/); $msg .= "Your data has been exported and can be downloaded from: " . $url . $f . "\r\n"; } else { $msg .= "There has been an error exporting your data, please contact the system administrator.\r\n"; } $smtp->data($msg); $smtp->quit(); } sub parse_command_line { while (@ARGV) { $_ = shift @ARGV; if ($_ =~ /^-d$/) { $debug++; } elsif ($_ =~ /^-D$/) { $db = shift @ARGV; } elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; } elsif ($_ =~ /^-e$/) { $email = shift @ARGV; } elsif ($_ =~ /^-a$/) { $data_type = lc(shift @ARGV); } elsif ($_ =~ /^-m$/) { $map_type = lc(shift @ARGV); } elsif ($_ =~ /^-L$/) { $depth_lim = shift @ARGV; } elsif ($_ =~ /^-c$/) { $man_cor++; } elsif ($_ =~ /^-t$/) { $out_type = shift @ARGV; } elsif ($_ =~ /^-F$/) { $filter_str = shift @ARGV; } elsif ($_ =~ /^-v$/) { version(); exit(); } elsif ($_ =~ /^-h$/) { usage(); } else { print STDERR "Unknown command line options received: $_\n"; usage(); } } if (length($db) == 0) { print STDERR "You must specify a database to index.\n"; usage(); } if (length($email) == 0) { print STDERR "You must specify an email for notification.\n"; usage(); } if ($out_type ne "tsv" && $out_type ne "xls") { print STDERR "Output type can only be 'tsv' or 'xls'.\n"; usage(); } if ($batch_id !~ /^\d{1,4}$/) { print STDERR "Batch ID must be a numeric value.\n"; usage(); } } sub version { print STDERR "stacks_export_notify.pl ", stacks_version, "\n"; } sub usage { version(); print << "EOQ"; stacks_export_notify.pl -e email -D db -b batch_id [-t type] [-F filters] [-d] [-h] e: email to use for notification. D: radtag database to examine. b: batch_id of data set to export. a: type of data to export, either 'gen' or 'haplo', for genotypes or observed haplotypes. t: output type, either 'tsv' or 'xls'. F: comma separated list of filters to apply to the data. L: if exporting observed haplotypes, specify a stack depth limit. m: map type. If genotypes are to be exported, specify the map type. c: include manual corrections if exporting genotypes. h: display this help message. d: turn on debug output. EOQ exit(0); } stacks-1.35/php/ajax.js000644 000765 000024 00000001341 12533677757 015577 0ustar00catchenstaff000000 000000 var req; function loadXMLDoc(url) { req = false; // branch for native XMLHttpRequest object if(window.XMLHttpRequest && !(window.ActiveXObject)) { try { req = new XMLHttpRequest(); } catch(e) { req = false; } // branch for IE/Windows ActiveX version } else if(window.ActiveXObject) { try { req = new ActiveXObject("Msxml2.XMLHTTP"); } catch(e) { try { req = new ActiveXObject("Microsoft.XMLHTTP"); } catch(e) { req = false; } } } if(req) { req.onreadystatechange = processReqChange; req.open("GET", url, true); req.send(""); } } stacks-1.35/php/annotate.js000644 000765 000024 00000016374 12335173442 016457 0ustar00catchenstaff000000 000000 // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // var http_req; function toggle_annotation(id) { var div_obj = document.getElementById(id + "_div"); var a_obj = document.getElementById(id + "_ann"); var frm_obj = document.getElementById(id + "_frm"); // Turn ON the anchor, turn OFF the form if (a_obj.style.display == "none") { a_obj.style.display = ""; div_obj.style.display = "none"; if (a_obj.innerHTML.length > 0) a_obj.innerHTML = "annotate"; } // Turn OFF the anchor, turn ON the form else { a_obj.style.display = "none"; div_obj.style.display = ""; if (a_obj.innerHTML != "annotate") frm_obj.ext_id.value = a_obj.innerHTML; } } function annotate_marker(id) { // // Fetch the marker annotation // var form_obj = document.getElementById(id + "_frm"); var url = form_obj.url.value + "&" + "ext_id=" + form_obj.ext_id.value; // // Prepare and send XMLHttpRequest Object. // http_req = false; try { http_req = new XMLHttpRequest(); } catch(e) { http_req = false; } if (http_req) { http_req.onreadystatechange = process_annotation; http_req.open("GET", url, true); http_req.send(""); } toggle_annotation(id); } function process_annotation() { // // Possible readyState values: // 0 = uninitialized // 1 = loading // 2 = loaded // 3 = interactive // 4 = complete // if (http_req.readyState == 4) { // Check that the status is "OK" if (http_req.status == 200) { var xml_doc = http_req.responseXML; var tag_obj = xml_doc.getElementsByTagName("marker_id"); var txt_obj = xml_doc.getElementsByTagName("text"); var tag_id = tag_obj[0].childNodes[0].nodeValue; var txt; if (txt_obj[0].childNodes.length > 0) txt = txt_obj[0].childNodes[0].nodeValue; else txt = "annotate"; var a_obj = document.getElementById(tag_id + "_ann"); a_obj.innerHTML = txt; } else { alert("There was a problem retrieving the XML data:\n" + http_req.statusText); } } } function toggle_correction(id) { var div_obj = document.getElementById(id + "_div"); var sel_obj = document.getElementById(id + "_sel"); var s_obj = document.getElementById(id); if (div_obj.style.display == "none") { div_obj.style.display = ""; sel_obj.style.display = "none"; } else { div_obj.style.display = "none"; sel_obj.style.display = ""; s_obj.focus(); } } function cancel_correction(id) { var div_obj = document.getElementById(id + "_div"); var sel_obj = document.getElementById(id + "_sel"); div_obj.style.display = ""; sel_obj.style.display = "none"; } function correct_genotype(id, url) { // // Fetch the marker annotation // var sel_obj = document.getElementById(id); url = url + "&" + "gtype=" + sel_obj.options[sel_obj.selectedIndex].text; // // Prepare and send XMLHttpRequest Object. // http_req = false; try { http_req = new XMLHttpRequest(); } catch(e) { http_req = false; } if (http_req) { http_req.onreadystatechange = process_correction; http_req.open("GET", url, true); http_req.send(""); } toggle_correction(id); } function process_correction() { // // Possible readyState values: // 0 = uninitialized // 1 = loading // 2 = loaded // 3 = interactive // 4 = complete // if (http_req.readyState == 4) { // Check that the status is "OK" if (http_req.status == 200) { var xml_doc = http_req.responseXML; var tag_obj = xml_doc.getElementsByTagName("div_id"); var div_id = tag_obj[0].childNodes[0].nodeValue; var cor_obj = xml_doc.getElementsByTagName("corrected"); var cor = cor_obj[0].childNodes[0].nodeValue; var txt_obj = xml_doc.getElementsByTagName("gtype"); var gtype = txt_obj[0].childNodes[0].nodeValue; var div_obj = document.getElementById(div_id + "_div"); var txt; if (cor == "true") txt = "" + gtype + ""; else txt = gtype; div_obj.innerHTML = "" + txt + ""; } else { alert("There was a problem retrieving the XML data:\n" + http_req.statusText); } } } function toggle_population(id) { var div_obj = document.getElementById(id + "_div"); var a_obj = document.getElementById(id + "_pop"); var frm_obj = document.getElementById(id + "_frm"); // Turn ON the anchor, turn OFF the form if (a_obj.style.display == "none") { a_obj.style.display = ""; div_obj.style.display = "none"; } // Turn OFF the anchor, turn ON the form else { a_obj.style.display = "none"; div_obj.style.display = ""; frm_obj.pop_name.value = a_obj.innerHTML; } } function annotate_population(id) { // // Fetch the marker annotation // var form_obj = document.getElementById(id + "_frm"); var url = form_obj.url.value + "&" + "pop_name=" + escape(form_obj.pop_name.value); // // Prepare and send XMLHttpRequest Object. // http_req = false; try { http_req = new XMLHttpRequest(); } catch(e) { http_req = false; } if (http_req) { http_req.onreadystatechange = process_pop_annotation; http_req.open("GET", url, true); http_req.send(""); } toggle_population(id); } function process_pop_annotation() { // // Possible readyState values: // 0 = uninitialized // 1 = loading // 2 = loaded // 3 = interactive // 4 = complete // if (http_req.readyState == 4) { // Check that the status is "OK" if (http_req.status == 200) { var xml_doc = http_req.responseXML; var id_obj = xml_doc.getElementsByTagName("pop_id"); var name_obj = xml_doc.getElementsByTagName("text"); var pop_id = id_obj[0].childNodes[0].nodeValue; var txt; if (name_obj[0].childNodes.length > 0) txt = name_obj[0].childNodes[0].nodeValue; else txt = "Population " + pop_id; var a_obj = document.getElementById(pop_id + "_pop"); a_obj.innerHTML = txt; } else { alert("There was a problem retrieving the XML data:\n" + http_req.statusText); } } } stacks-1.35/php/annotate_marker.php000644 000765 000024 00000006572 12335173442 020172 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $ext_id = isset($_GET['ext_id']) ? $_GET['ext_id'] : ""; // Connect to the database if (!isset($db)) $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; $display['ext_id'] = $ext_id; // // Prepare some SQL queries // $query = "SELECT id, external_id " . "FROM catalog_annotations " . "WHERE batch_id=? and catalog_id=?"; $db['sel_sth'] = $db['dbh']->prepare($query); check_db_error($db['sel_sth'], __FILE__, __LINE__); $query = "UPDATE catalog_annotations SET external_id=? WHERE id=?"; $db['upd_sth'] = $db['dbh']->prepare($query); check_db_error($db['upd_sth'], __FILE__, __LINE__); $query = "INSERT INTO catalog_annotations SET batch_id=?, catalog_id=?, external_id=?"; $db['ins_sth'] = $db['dbh']->prepare($query); check_db_error($db['ins_sth'], __FILE__, __LINE__); $query = "DELETE FROM catalog_annotations WHERE id=?"; $db['del_sth'] = $db['dbh']->prepare($query); check_db_error($db['del_sth'], __FILE__, __LINE__); // // Fetch any existing annotation for this marker // $result = $db['sel_sth']->execute(array($display['batch_id'], $display['tag_id'])); check_db_error($result, __FILE__, __LINE__); $external_id = ""; $sql_id = 0; if ($row = $result->fetchRow()) { $external_id = $row['external_id']; $sql_id = $row['id']; } if ($external_id != $ext_id) { // // Is this annotation being reset to the original value? If so, delete the corrected record. // if (strlen($external_id) > 0 && strlen($ext_id) == 0) { $result = $db['del_sth']->execute($sql_id); check_db_error($result, __FILE__, __LINE__); // // Are we changing an existing annotation? // } else if (strlen($external_id) > 0 && strlen($ext_id) > 0) { $result = $db['upd_sth']->execute(array($ext_id, $sql_id)); check_db_error($result, __FILE__, __LINE__); // // Otherwise, add a new annotation. // } else if (strlen($ext_id) > 0) { $result = $db['ins_sth']->execute(array($display['batch_id'], $display['tag_id'], $ext_id)); check_db_error($result, __FILE__, __LINE__); } } header("Content-type: text/xml"); $xml_output = "\n" . "\n" . "$ext_id\n" . "$tag_id\n" . "\n"; echo $xml_output; ?> stacks-1.35/php/catalog.php000644 000765 000024 00000132216 12540304631 016417 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $batch_id = isset($_GET['id']) ? $_GET['id'] : 0; $database = isset($_GET['db']) ? $_GET['db'] : ""; $page = isset($_GET['p']) ? $_GET['p'] : 1; $per_page = isset($_GET['pp']) ? $_GET['pp'] : 10; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['id'] = $batch_id; $display['db'] = $database; $display['p'] = $page; $display['pp'] = $per_page; $display['filter_type'] = array(); // // Process the filtering parameters // $param = array($batch_id); process_filter($display); prepare_filter_parameters($display, $param); // // Prepare some SQL queries // $query = "SELECT batches.id as id, date, description, type FROM batches " . "WHERE batches.id=?"; $db['batch_sth'] = $db['dbh']->prepare($query); check_db_error($db['batch_sth'], __FILE__, __LINE__); $query = "SELECT COUNT(tag_id) as count FROM catalog_index " . "WHERE batch_id=?"; $query .= apply_query_filters($display); $db['count_sth'] = $db['dbh']->prepare($query); check_db_error($db['count_sth'], __FILE__, __LINE__); $query = "SELECT alleles as count FROM catalog_index " . "WHERE batch_id=? AND tag_id=?"; $db['allele_sth'] = $db['dbh']->prepare($query); check_db_error($db['allele_sth'], __FILE__, __LINE__); $query = "SELECT col, rank_2 FROM catalog_snps " . "JOIN batches ON (catalog_snps.batch_id=batches.id) " . "WHERE batch_id=? AND tag_id=? ORDER BY col"; $db['snp_sth'] = $db['dbh']->prepare($query); check_db_error($db['snp_sth'], __FILE__, __LINE__); $query = "SELECT chr, max_len FROM chr_index " . "WHERE batch_id=?"; $db['chrs_sth'] = $db['dbh']->prepare($query); check_db_error($db['chrs_sth'], __FILE__, __LINE__); $query = "SELECT max(ests) as ests, max(pe_radtags) as pe_radtags, max(blast_hits) as blast_hits " . "FROM catalog_index WHERE batch_id=?"; $db['seq_sth'] = $db['dbh']->prepare($query); check_db_error($db['seq_sth'], __FILE__, __LINE__); $result = $db['seq_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $cols = array(); if ($row['ests'] == 0 && $row['pe_radtags'] == 0 && $row['blast_hits'] == 0) $cols['seq'] = false; else $cols['seq'] = true; $query = "SELECT count(id) as cnt FROM catalog_genotypes WHERE batch_id=?"; $db['gcnt_sth'] = $db['dbh']->prepare($query); check_db_error($db['gcnt_sth'], __FILE__, __LINE__); $result = $db['gcnt_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); if ($row['cnt'] > 0) $cols['gcnt'] = true; else $cols['gcnt'] = false; // // Pull information about this batch // $result = $db['batch_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $batch = array(); $batch['id'] = $row['id']; $batch['desc'] = $row['description']; $batch['date'] = $row['date']; $batch['type'] = $row['type']; $page_title = "RAD-Tag Catalog Viewer"; write_header($page_title, $batch); echo <<< EOQ

Batch #$batch[id] [$batch[date]; $batch[desc]]

EOQ; if ($batch['type'] == "population") write_pop_filter($cols); else write_map_filter($cols); // // How many columns will we print // $num_cols = 9; foreach ($cols as $col) if ($col == false) $num_cols--; // // Generate Excel export URL // $excel_export = generate_url("export_batch.php", false); echo <<< EOQ
EOQ; } else { echo <<< EOQ EOQ; } if ($cols['seq'] == true) print " \n"; print "\n"; $db['dbh']->setLimit($display['pp'], $start_group - 1); check_db_error($db['dbh'], __FILE__, __LINE__); $query = "SELECT catalog_index.tag_id as tag_id, alleles, parents, progeny, valid_progeny, " . "seq, marker, uncor_marker, chisq_pval, lnl, ratio, ests, pe_radtags, blast_hits, external_id, geno_cnt, " . "catalog_index.chr, catalog_index.bp, catalog_tags.strand, catalog_index.type, gene, ext_id, ex_start, ex_end, ex_index " . "FROM catalog_index " . "JOIN catalog_tags ON (catalog_index.cat_id=catalog_tags.id) " . "LEFT JOIN catalog_annotations ON (catalog_index.batch_id=catalog_annotations.batch_id AND catalog_index.tag_id=catalog_annotations.catalog_id) " . "LEFT JOIN ref_radome ON (catalog_index.ref_id=ref_radome.id) " . "WHERE catalog_index.batch_id=?"; $query .= apply_query_filters($display); $db['tag_sth'] = $db['dbh']->prepare($query); check_db_error($db['tag_sth'], __FILE__, __LINE__); $result = $db['tag_sth']->execute($param); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { // Query the database to find how many SNPs were found in this sample. $snps = array(); $snp_res = $db['snp_sth']->execute(array($batch_id, $row['tag_id'])); check_db_error($snp_res, __FILE__, __LINE__); while ($snp_row = $snp_res->fetchRow()) { array_push($snps, array('col' => $snp_row['col'], 'rank' => $snp_row['rank_2'])); } $url = "$root_path/pop_view.php?db=$database&batch_id=$batch_id&type=$batch[type]&tag_id=$row[tag_id]"; $annotation = strlen($row['external_id']) > 0 ? $row['external_id'] : "annotate"; echo <<< EOQ EOQ; if (count($snps) == 0) print " \n"; else print " \n"; $s = print_snps($row['tag_id'], $row['seq'], $row['seq'], $snps, true); $ratio = explode(";", $row['ratio']); $ratio_parsed = ""; $i = 0; foreach ($ratio as $r) { if (strlen($r) == 0) continue; preg_match("/([a-z]+):(\d+)\((\d+\.?\d*%)\)/", $r, $matches); for ($j = 0; $j < strlen($matches[1]); $j++) { $color = $color_map[$matches[1][$j]]; $ratio_parsed .= "" . $matches[1][$j] . ""; } $ratio_parsed .= ": $matches[2]"; if ($matches[3] > 0) $ratio_parsed .= " ($matches[3])"; $ratio_parsed .= "
"; $i++; } $url = "$root_path/sequence_blast.php?db=$database&batch_id=$batch_id&tag_id=$row[tag_id]"; if ($row['blast_hits'] > 0 || $row['pe_radtags'] > 0 || $row['ests'] > 0) { $blast_hits_str = "
" . "" . "blast hits: $row[blast_hits]"; } else { $blast_hits_str = "blast hits: $row[blast_hits]"; } if (strlen($row['chr']) > 0) { print "
\n"; } else { print "\n"; } if ($batch['type'] == "map") { print " \n"; if (strlen($row['uncor_marker']) > 0 && $row['marker'] != $row['uncor_marker']) print " \n"; else print " \n"; } else { print " \n"; } echo <<< EOQ EOQ; if ($batch['type'] == "map") print "\n"; if ($cols['seq'] == true) echo <<< EOQ EOQ; echo <<< EOQ EOQ; } print "\n" . "
EOQ; // // Figure out how many results there are (including filtering) // and write out the proper pagination links // $result = $db['count_sth']->execute($param); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $pagination_count = $row['count']; $start_group = 0; $end_group = 0; write_pagination($pagination_count, $start_group, $end_group, "catalog.php"); if ($batch['type'] == "map") { echo <<< EOQ
Id SNP Consensus Matches Marker Ratio ChiSq P-value
Id SNP Consensus Matches RatioSequence
NoYes [" . count($snps) . "nuc]\n" . "
$s
\n" . "
Chr: $row[chr], " . print_bp($row['bp']) . ", $row[strand]\n"; if ($row['type'] == "exon") { if (strlen($row['ext_id']) == 0) $gene = $row['gene']; else $gene = $row['ext_id']; print ", Gene: " . $gene . "\n"; } print ", LnL: $row[lnl]
$s
\n" . "
LnL: $row[lnl]
\n" . "
$row[parents]" . " / " . "$row[progeny]" . " / " . "$row[valid_progeny]" . " / " . "$row[geno_cnt]$row[marker]*$row[marker]$row[parents] $ratio_parsed $row[chisq_pval]
ests: $row[ests]pe: $row[pe_radtags]
$blast_hits_str
\n"; write_pagination($pagination_count, $start_group, $end_group, "catalog.php"); echo <<< EOQ
EOQ; write_footer(); function generate_hidden_form_vars($var) { global $root_path, $display; $vars = ""; foreach ($display as $key => $d) { if (strstr($key, $var)) continue; if (is_array($d)) { foreach ($d as $e) { $vars .= " \n"; } } else { $vars .= " \n"; } } return $vars; } function generate_per_page_select($name, $per_page) { $pages = array("10", "50", "100", "all"); $ctl = " \n"; return $ctl; } function generate_url($destination, $prefix) { global $root_path, $display; if ($prefix) $url = "href=\"" . $root_path . "/" . $destination . "?"; else $url = $root_path . "/" . $destination . "?"; foreach ($display as $key => $d) { if (is_array($d)) { foreach ($d as $e) $url .= "{$key}[]=$e&"; } else { $url .= "$key=$d&"; } } // Remove the hanging '&' $url = substr($url, 0, -1); if ($prefix) $url .= "\""; return $url; } function generate_page_list($page, $num_pages, $destination) { global $display; $page_list = ""; if ($page <= 4) { for ($i = 1; $i < $page; $i++) { $display['p'] = $i; $p = generate_url($destination, true); $page_list .= "$i\n"; } } else { $display['p'] = 1; $p = generate_url($destination, true); $page_list .= "1 ...\n"; foreach (array($page - 3, $page - 2, $page - 1) as $i) { $display['p'] = $i; $p = generate_url($destination, true); $page_list .= "$i\n"; } } $page_list .= " $page\n"; if ($page <= $num_pages - 4) { for ($i = $page + 1; $i <= $page + 3; $i++) { $display['p'] = $i; $p = generate_url($destination, true); $page_list .= "$i\n"; } $display['p'] = $num_pages; $p = generate_url($destination, true); $page_list .= "... $num_pages\n"; } else { for ($i = $page + 1; $i <= $num_pages; $i++) { $display['p'] = $i; $p = generate_url($destination, true); $page_list .= "$i\n"; } } $display['p'] = $page; $page_list = "\n" . $page_list . "\n"; return $page_list; } function write_pagination($num_tags, &$start_gene, &$end_gene, $destination) { global $img_path, $root_path, $display; $cur_page = $display['p']; $page = $display['p']; $per_page = $display['pp']; if ($per_page == "all") $per_page = $num_tags; // // First figure out the total number of pages. If there are // additional genes left over, add an extra page. // $num_pages = floor($num_tags / $per_page); $num_pages += $num_tags % $per_page >= 1 ? 1 : 0; if ($page > $num_pages) { $page = $num_pages; $cur_page = $num_pages; } // Determine the start and end gene numbers $start_gene = 1 + (($page - 1) * $per_page); $end_gene = $start_gene + $per_page > $num_tags ? $num_tags : ($start_gene + $per_page - 1); // Generate the URLs for our links $display['p'] -= 1; $prev_page = generate_url($destination, true); $display['p'] += 2; $next_page = generate_url($destination, true); $display['p'] = $cur_page; print "\n" . "\n" . "\n"; $page_list = ""; if ($num_pages > 1) $page_list = generate_page_list($page, $num_pages, $destination); print $page_list; $hidden_vars = generate_hidden_form_vars("pp"); $per_page_ctl = generate_per_page_select("pp", $display['pp']); echo <<< EOQ
\n"; if ($page == 1) { if ($num_pages == 1) { echo <<< EOQ No Previous Page $page No Next Page EOQ; } else { echo <<< EOQ No Previous Page $page View Next Page EOQ; } } else if ($page == $num_pages) { echo <<< EOQ View Previous Page $page No Next Page EOQ; } else { echo <<< EOQ View Previous Page $page View Next Page EOQ; } print " ($num_tags tags)\n" . "
$hidden_vars tags per page   $per_page_ctl
EOQ; } function write_map_filter($cols) { global $img_path, $root_path, $display; $max_chr_len = 0; $hidden_vars = generate_hidden_form_vars("filter"); $chrs = fetch_chrs($max_chr_len); $filters = array("cata" => array(), "alle" => array(), "snps" => array(), "pare" => array(), "prog" => array(), "vprog" => array(), "mark" => array(), "gcnt" => array(), "chisq" => array(), "loc" => array(), "ref" => array(), "est" => array(), "pe" => array(), "blast" => array()); $fall = isset($display['filter_alle_l']) ? $display['filter_alle_l'] : ""; $falu = isset($display['filter_alle_u']) ? $display['filter_alle_u'] : "100"; $fsnl = isset($display['filter_snps_l']) ? $display['filter_snps_l'] : ""; $fsnu = isset($display['filter_snps_u']) ? $display['filter_snps_u'] : "100"; $fpal = isset($display['filter_pare_l']) ? $display['filter_pare_l'] : ""; $fpau = isset($display['filter_pare_u']) ? $display['filter_pare_u'] : "2"; $fpr = isset($display['filter_prog']) ? $display['filter_prog'] : ""; $fvp = isset($display['filter_vprog']) ? $display['filter_vprog'] : ""; $fgc = isset($display['filter_gcnt']) ? $display['filter_gcnt'] : ""; $csql = isset($display['filter_chisq_l']) ? $display['filter_chisq_l'] : "0.0"; $csqu = isset($display['filter_chisq_u']) ? $display['filter_chisq_u'] : "1.0"; $fma = isset($display['filter_mark']) ? $display['filter_mark'] : ""; $ref = isset($display['filter_ref']) ? $display['filter_ref'] : ""; $fch = isset($display['filter_chr']) ? $display['filter_chr'] : ""; $fsb = isset($display['filter_sbp']) ? $display['filter_sbp'] : 0; $feb = isset($display['filter_ebp']) ? $display['filter_ebp'] : $max_chr_len; $r = range(1, 9); $r = array_merge($r, range(10, 100, 5)); array_push($r, 1000); $alle_l_ctl = generate_element_select("filter_alle_l", $r, $fall, ""); $alle_u_ctl = generate_element_select("filter_alle_u", $r, $falu, ""); $snps_l_ctl = generate_element_select("filter_snps_l", $r, $fsnl, ""); $snps_u_ctl = generate_element_select("filter_snps_u", $r, $fsnu, ""); $r = range(1, 9); $r = array_merge($r, range(10, 500, 10)); array_push($r, 1000, 2000, 10000); $pare_l_ctl = generate_element_select("filter_pare_l", $r, $fpal, ""); $pare_u_ctl = generate_element_select("filter_pare_u", $r, $fpau, ""); $prog_ctl = generate_element_select("filter_prog", $r, $fpr, ""); $vprog_ctl = generate_element_select("filter_vprog", $r, $fvp, ""); $gcnt_ctl = generate_element_select("filter_gcnt", $r, $fgc, ""); $csql_ctl = generate_element_select("filter_chisq_l", array(0.0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 1.0), $csql, ""); $csqu_ctl = generate_element_select("filter_chisq_u", array(0.0, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 1.0), $csqu, ""); $chr_ctl = generate_element_select("filter_chr", $chrs, $fch, ""); $sbp_ctl = generate_element_select("filter_sbp", range(0, $max_chr_len), $fsb, ""); $ebp_ctl = generate_element_select("filter_ebp", range(0, $max_chr_len), $feb, ""); $ref_ctl = generate_element_select("filter_ref", array("exon", "intron", "genomic"), $ref, ""); $mark_ctl = generate_element_select("filter_mark", array('Any', 'aa/bb', 'ab/--', '--/ab', 'aa/ab', 'ab/aa', 'ab/ab', 'ab/ac', 'ab/cd', 'ab/cc', 'cc/ab'), $fma, ""); if (isset($display['filter_type'])) { foreach ($filters as $key => $f) if (in_array($key, $display['filter_type'])) { $filters[$key]['sel'] = "checked=\"checked\""; $filters[$key]['tr'] = "class=\"active_filter\""; } else { $filters[$key]['sel'] = ""; $filters[$key]['tr'] = ""; } } else { $filters['none']['sel'] = "checked=\"checked\""; } echo <<< EOQ

Filter Results By

$hidden_vars EOQ; if (count($chrs) > 0) { echo <<< EOQ EOQ; } $cat_id_filter = isset($display['filter_cata']) ? $display['filter_cata'] : ""; echo <<< EOQ
Location: $chr_ctl
Start: $sbp_ctl Mb
End: $ebp_ctl Mb
Type: $ref_ctl
EOQ; if ($cols['seq'] == true) echo <<< EOQ EOQ; echo <<< EOQ
Catalog ID:
Alleles: $alle_l_ctl $alle_u_ctl
SNPs: $snps_l_ctl $snps_u_ctl
   
Parental matches: $pare_l_ctl $pare_u_ctl
Progeny matches: $prog_ctl
Segregation distortion: $csql_ctl $csqu_ctl
Mappable progeny: $vprog_ctl
Mappable markers: $mark_ctl
Genotypes: $gcnt_ctl
Contains ESTs Contains Paired-end RAD-Tags Contains BLAST Hits
EOQ; } function write_pop_filter($cols) { global $img_path, $root_path, $display; $max_chr_len = 0; $hidden_vars = generate_hidden_form_vars("filter"); $chrs = fetch_chrs($max_chr_len); $filters = array("cata" => array(), "alle" => array(), "snps" => array(), "pare" => array(), "gcnt" => array(), "lnl" => array(), "loc" => array(), "ref" => array(), "est" => array(), "pe" => array(), "blast" => array()); $fall = isset($display['filter_alle_l']) ? $display['filter_alle_l'] : ""; $falu = isset($display['filter_alle_u']) ? $display['filter_alle_u'] : "100"; $fsnl = isset($display['filter_snps_l']) ? $display['filter_snps_l'] : ""; $fsnu = isset($display['filter_snps_u']) ? $display['filter_snps_u'] : "100"; $fpal = isset($display['filter_pare_l']) ? $display['filter_pare_l'] : ""; $fpau = isset($display['filter_pare_u']) ? $display['filter_pare_u'] : "1000"; $ref = isset($display['filter_ref']) ? $display['filter_ref'] : ""; $fch = isset($display['filter_chr']) ? $display['filter_chr'] : ""; $fsb = isset($display['filter_sbp']) ? $display['filter_sbp'] : 0; $feb = isset($display['filter_ebp']) ? $display['filter_ebp'] : $max_chr_len; $flnl = isset($display['filter_lnl_l']) ? $display['filter_lnl_l'] : 0; $flnu = isset($display['filter_lnl_u']) ? $display['filter_lnl_u'] : -500; $r = range(1, 9); $r = array_merge($r, range(10, 100, 5)); array_push($r, 1000); $alle_l_ctl = generate_element_select("filter_alle_l", $r, $fall, ""); $alle_u_ctl = generate_element_select("filter_alle_u", $r, $falu, ""); $snps_l_ctl = generate_element_select("filter_snps_l", $r, $fsnl, ""); $snps_u_ctl = generate_element_select("filter_snps_u", $r, $fsnu, ""); $r = range(1, 9); $r = array_merge($r, range(10, 500, 10)); array_push($r, 1000, 2000, 10000); $pare_l_ctl = generate_element_select("filter_pare_l", $r, $fpal, ""); $pare_u_ctl = generate_element_select("filter_pare_u", $r, $fpau, ""); $chr_ctl = generate_element_select("filter_chr", $chrs, $fch, ""); $sbp_ctl = generate_element_select("filter_sbp", range(0, $max_chr_len), $fsb, ""); $ebp_ctl = generate_element_select("filter_ebp", range(0, $max_chr_len), $feb, ""); $ref_ctl = generate_element_select("filter_ref", array("exon", "intron", "genomic"), $ref, ""); $r = range(0, 9); $r = array_merge($r, range(10, 100, 5)); $r = array_merge($r, range(200, 500, 100)); for ($i = 0; $i < count($r); $i++) $r[$i] = $r[$i] * -1; $lnl_l_ctl = generate_element_select("filter_lnl_l", $r, $flnl, ""); $lnl_u_ctl = generate_element_select("filter_lnl_u", $r, $flnu, ""); if (isset($display['filter_type'])) { foreach ($filters as $key => $f) if (in_array($key, $display['filter_type'])) { $filters[$key]['sel'] = "checked=\"checked\""; $filters[$key]['tr'] = "class=\"active_filter\""; } else { $filters[$key]['sel'] = ""; $filters[$key]['tr'] = ""; } } else { $filters['none']['sel'] = "checked=\"checked\""; } echo <<< EOQ

Filter Results By

$hidden_vars EOQ; if (count($chrs) > 0) { echo <<< EOQ EOQ; } $cat_id_filter = isset($display['filter_cata']) ? $display['filter_cata'] : ""; echo <<< EOQ
Location: $chr_ctl
Start: $sbp_ctl Mb
End: $ebp_ctl Mb
Type: $ref_ctl
EOQ; if ($cols['seq'] == true) echo <<< EOQ EOQ; echo <<< EOQ
Catalog ID:
Alleles: $alle_l_ctl $alle_u_ctl
SNPs: $snps_l_ctl $snps_u_ctl
   
Matching samples: $pare_l_ctl $pare_u_ctl
LnL: $lnl_l_ctl $lnl_u_ctl
   
EOQ; if ($cols['gcnt'] == true) { echo <<< EOQ EOQ; } else { echo <<< EOQ EOQ; } echo <<< EOQ
   
   
$gcnt_ctl
   
Contains ESTs Contains Paired-end RAD-Tags Contains BLAST Hits
EOQ; } function process_filter(&$display_params) { if (!isset($_GET['filter_type'])) return; foreach ($_GET['filter_type'] as $filter) { array_push($display_params['filter_type'], $filter); if ($filter == "alle") { $display_params['filter_alle_l'] = $_GET['filter_alle_l']; $display_params['filter_alle_u'] = $_GET['filter_alle_u']; } else if ($filter == "snps") { $display_params['filter_snps_l'] = $_GET['filter_snps_l']; $display_params['filter_snps_u'] = $_GET['filter_snps_u']; } else if ($filter == "pare") { $display_params['filter_pare_l'] = $_GET['filter_pare_l']; $display_params['filter_pare_u'] = $_GET['filter_pare_u']; } else if ($filter == "lnl") { $display_params['filter_lnl_l'] = $_GET['filter_lnl_l']; $display_params['filter_lnl_u'] = $_GET['filter_lnl_u']; } else if ($filter == "prog") { $display_params['filter_prog'] = $_GET['filter_prog']; } else if ($filter == "vprog") { $display_params['filter_vprog'] = $_GET['filter_vprog']; } else if ($filter == "cata") { $display_params['filter_cata'] = $_GET['filter_cata']; } else if ($filter == "mark") { $display_params['filter_mark'] = $_GET['filter_mark']; } else if ($filter == "gcnt") { $display_params['filter_gcnt'] = $_GET['filter_gcnt']; } else if ($filter == "chisq") { $display_params['filter_chisq_l'] = $_GET['filter_chisq_l']; $display_params['filter_chisq_u'] = $_GET['filter_chisq_u']; } else if ($filter == "ref") { $display_params['filter_ref'] = $_GET['filter_ref']; } else if ($filter == "loc") { $display_params['filter_chr'] = $_GET['filter_chr']; $display_params['filter_sbp'] = $_GET['filter_sbp']; $display_params['filter_ebp'] = $_GET['filter_ebp']; } } } function prepare_filter_parameters($display_params, &$param) { $filters = $display_params['filter_type']; if (!isset($filters)) return; foreach ($filters as $filter) { if ($filter == "snps") { array_push($param, $display_params['filter_snps_l']); array_push($param, $display_params['filter_snps_u']); } else if ($filter == "alle") { array_push($param, $display_params['filter_alle_l']); array_push($param, $display_params['filter_alle_u']); } else if ($filter == "pare") { array_push($param, $display_params['filter_pare_l']); array_push($param, $display_params['filter_pare_u']); } else if ($filter == "lnl") { array_push($param, $display_params['filter_lnl_l']); array_push($param, $display_params['filter_lnl_u']); } else if ($filter == "prog") { array_push($param, $display_params['filter_prog']); } else if ($filter == "vprog") { array_push($param, $display_params['filter_vprog']); } else if ($filter == "cata") { array_push($param, $display_params['filter_cata']); } else if ($filter == "est") { array_push($param, 0); } else if ($filter == "pe") { array_push($param, 0); } else if ($filter == "blast") { array_push($param, 0); } else if ($filter == "gcnt") { array_push($param, $display_params['filter_gcnt']); } else if ($filter == "chisq") { array_push($param, $display_params['filter_chisq_l']); array_push($param, $display_params['filter_chisq_u']); } else if ($filter == "ref") { array_push($param, $display_params['filter_ref']); } else if ($filter == "loc") { array_push($param, $display_params['filter_chr']); array_push($param, $display_params['filter_sbp'] * 1000000); array_push($param, $display_params['filter_ebp'] * 1000000); } else if ($filter == "mark") { if ($display_params['filter_mark'] == "Any") array_push($param, "%/%"); else array_push($param, $display_params['filter_mark']); } } } function apply_query_filters($display_params) { $order = 0; $query = ""; $sql_filters = array("cata" => "(catalog_index.tag_id = ?)", "alle" => "(alleles >= ? AND alleles <= ?)", "snps" => "(snps >= ? AND snps <= ?)", "pare" => "(parents >= ? AND parents <= ?)", "prog" => "(progeny >= ?)", "vprog" => "(valid_progeny >= ?)", "lnl" => "(lnl >= ? AND lnl <= ?)", "mark" => "(marker LIKE ?)", "est" => "(ests > ?)", "pe" => "(pe_radtags > ?)", "blast" => "(blast_hits > ?)", "gcnt" => "(geno_cnt >= ?)", "chisq" => "(chisq_pval >= ? AND chisq_pval <= ?)", "ref" => "(catalog_index.type = ?)", "loc" => "(catalog_index.chr = ? && catalog_index.bp >= ? && catalog_index.bp <= ?)"); $filters = $display_params['filter_type']; if (count($filters) > 0) { $query = " AND "; while (count($filters) > 0) { $filter = array_shift($filters); $query .= $sql_filters[$filter]; $query .= count($filters) > 0 ? " AND " : ""; if ($filter == "loc") $order++; } if ($order) $query .= " ORDER BY chr, bp"; } return $query; } function fetch_chrs(&$max_len) { global $db, $batch_id; $max_len = 0; $chrs = array(); $res = $db['chrs_sth']->execute($batch_id); check_db_error($res, __FILE__, __LINE__); while ($row = $res->fetchRow()) { if ($row['max_len'] > $max_len) $max_len = $row['max_len']; array_push($chrs, $row['chr']); } return $chrs; } ?> stacks-1.35/php/catalog_genotypes.php000644 000765 000024 00000014045 12335173442 020521 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); if (isset($_GET['db'])) $database = $_GET['db']; else if (isset($_POST['db'])) $database = $_POST['db']; if (isset($_GET['tag_id'])) $tag_id = $_GET['tag_id']; else if (isset($_POST['tag_id'])) $tag_id = $_POST['tag_id']; else $tag_id = 0; if (isset($_GET['batch_id'])) $batch_id = $_GET['batch_id']; else if (isset($_POST['batch_id'])) $batch_id = $_POST['batch_id']; else $batch_id = 0; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; // // Prepare the possible select lists we will want to construct // $marker_types = array('ab/--' => array('aa', 'bb', '-'), '--/ab' => array('aa', 'bb', '-'), 'ab/aa' => array('aa', 'ab', '-'), 'aa/ab' => array('aa', 'ab', '-'), 'ab/ab' => array('aa', 'ab', 'bb', '-'), 'ab/ac' => array('aa', 'ab', 'ac', 'bc', '-'), 'ab/cd' => array('aa', 'bb', 'cc', 'dd', 'ac', 'ad', 'bc', 'bd', '-'), 'aa/bb' => array('aa', 'bb', 'ab', '-'), 'ab/cc' => array('aa', 'bb', 'ab', 'ac', 'bc', 'cc', '-'), 'cc/ab' => array('aa', 'bb', 'ab', 'ac', 'bc', 'cc', '-')); // // Prepare some SQL queries // $query = "SELECT count(samples.id) as count FROM samples WHERE batch_id=?"; $db['samp_sth'] = $db['dbh']->prepare($query); check_db_error($db['samp_sth'], __FILE__, __LINE__); $query = "SELECT marker, catalog_genotypes.sample_id, file, " . "catalog_genotypes.genotype, genotype_corrections.genotype as corrected " . "FROM catalog_genotypes " . "LEFT JOIN genotype_corrections ON " . "(genotype_corrections.catalog_id=catalog_genotypes.catalog_id AND " . "genotype_corrections.sample_id=catalog_genotypes.sample_id AND " . "genotype_corrections.batch_id=catalog_genotypes.batch_id) " . "JOIN samples ON (catalog_genotypes.sample_id=samples.id) " . "JOIN catalog_index ON (catalog_genotypes.catalog_id=catalog_index.tag_id AND " . "catalog_genotypes.batch_id=catalog_index.batch_id) " . "WHERE catalog_genotypes.batch_id=? and catalog_genotypes.catalog_id=? " . "ORDER BY catalog_genotypes.sample_id"; $db['geno_sth'] = $db['dbh']->prepare($query); check_db_error($db['geno_sth'], __FILE__, __LINE__); $page_title = "Catalog Genotype Viewer"; write_compact_header($page_title); // // Get number of samples so we can determine how many rows to display // in the genotype table. // $result = $db['samp_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $num_samples = $row['count']; $num_cols = 10; $num_rows = ceil($num_samples / $num_cols); $gtypes = array(); $result = $db['geno_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); if ($result->numRows() == 0) { print "

" . "This marker has no genotypes, probably because this tag does not have enough mappable progeny.

\n"; write_compact_footer(); return; } while ($row = $result->fetchRow()) { $gtypes[$row['sample_id']] = array('file' => $row['file'], 'genotype' => $row['genotype'], 'corrected' => $row['corrected'], 'marker' => $row['marker']); } print "
\n" . "\n" . "\n" . "\n" . "\n" . "\n" . "\n"; $i = 0; foreach ($gtypes as $sample_id => $sample) { $i++; $id = "gtype_" . $batch_id . "_" . $tag_id . "_" . $sample_id; if (strlen($sample['corrected']) > 0) { $sel = generate_element_select($id, $marker_types[$sample['marker']], strtolower($sample['corrected']), ""); $genotype = "$sample[corrected]"; } else { $sel = generate_element_select($id, $marker_types[$sample['marker']], strtolower($sample['genotype']), ""); $genotype = $sample['genotype']; } print " \n"; if ($i % $num_cols == 0) print "\n" . "\n"; } while ($i % $num_cols != 0) { print " \n"; $i++; } echo <<< EOQ
" . "" . ucfirst(str_replace("_", " ", $sample['file'])) . "
" . "" . $genotype . "\n" . "
\n" . $sel . "
" . "
EOQ; write_compact_footer(); ?> stacks-1.35/php/CatalogClass.php000644 000765 000024 00000022760 12335173442 017355 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("Locus.php"); class Catalog { var $db; // Hash of database statement handles. var $display; // An array of form variables necessary to write URLs. var $params; // An array of parameters to fetch our results from the database. var $queries; // An array of SQL queries. var $loci; // Array of groups objects. var $num_loci; // Total number of groups, after filtering function Catalog($db, $batch_id, $display_params) { $this->db = $db; $this->batch = $batch_id; $this->display = $display_params; $this->queries = array(); $this->params = array(); $this->loci = array(); $this->num_loci = 0; $this->prepare_queries(); } function prepare_queries() { $query = "SELECT allele FROM catalog_alleles " . "WHERE batch_id=? AND tag_id=?"; $this->db['allele_sth'] = $this->db['dbh']->prepare($query); check_db_error($this->db['allele_sth'], __FILE__, __LINE__); $query = "SELECT col, rank_1, rank_2 FROM catalog_snps " . "WHERE batch_id=? AND tag_id=? ORDER BY col"; $this->db['snp_sth'] = $this->db['dbh']->prepare($query); check_db_error($this->db['snp_sth'], __FILE__, __LINE__); $query = "SELECT samples.id, samples.sample_id, samples.type, file, tag_id, allele " . "FROM matches " . "JOIN samples ON (matches.sample_id=samples.id) " . "WHERE matches.batch_id=? AND catalog_id=? ORDER BY samples.id"; $this->db['mat_sth'] = $this->db['dbh']->prepare($query); check_db_error($this->db['mat_sth'], __FILE__, __LINE__); $query = "SELECT COUNT(tag_id) as count FROM catalog_index " . "WHERE batch_id=?"; $query .= $this->apply_query_filters(); $this->queries['tag_count'] = $query; $query = "SELECT catalog_index.tag_id as tag_id, chr, bp, snps, alleles, parents, progeny, valid_progeny, " . "seq, marker, max_pct, ratio, ests, pe_radtags, blast_hits, external_id " . "FROM catalog_index " . "JOIN catalog_tags ON (catalog_index.cat_id=catalog_tags.id) " . "LEFT JOIN catalog_annotations ON " . "(" . "catalog_index.batch_id=catalog_annotations.batch_id AND " . "catalog_index.tag_id=catalog_annotations.catalog_id" . ") " . "WHERE catalog_index.batch_id=?"; $query .= $this->apply_query_filters(); $this->queries['tag'] = $query; } function prepare_filter_parameters() { array_push($this->params, $this->batch); $filters = $this->display['filter_type']; if (!isset($filters)) return; foreach ($filters as $filter) { if ($filter == "snps") { array_push($this->params, $this->display['filter_snps_l']); array_push($this->params, $this->display['filter_snps_u']); } else if ($filter == "alle") { array_push($this->params, $this->display['filter_alle_l']); array_push($this->params, $this->display['filter_alle_u']); } else if ($filter == "pare") { array_push($this->params, $this->display['filter_pare_l']); array_push($this->params, $this->display['filter_pare_u']); } else if ($filter == "prog") { array_push($this->params, $this->display['filter_prog']); } else if ($filter == "vprog") { array_push($this->params, $this->display['filter_vprog']); } else if ($filter == "cata") { array_push($this->params, $this->display['filter_cata']); } else if ($filter == "gcnt") { array_push($this->params, $this->display['filter_gcnt']); } else if ($filter == "est") { array_push($this->params, 0); } else if ($filter == "pe") { array_push($this->params, 0); } else if ($filter == "blast") { array_push($this->params, 0); } else if ($filter == "ref") { array_push($this->params, $this->display['filter_ref']); } else if ($filter == "loc") { array_push($this->params, $this->display['filter_chr']); array_push($this->params, $this->display['filter_sbp'] * 1000000); array_push($this->params, $this->display['filter_ebp'] * 1000000); } else if ($filter == "mark") { if ($this->display['filter_mark'] == "Any") array_push($this->params, "%/%"); else array_push($this->params, $this->display['filter_mark']); } } } function apply_query_filters() { $sql_filters = array("cata" => "(catalog_index.tag_id = ?)", "alle" => "(alleles >= ? AND alleles <= ?)", "snps" => "(snps >= ? AND snps <= ?)", "pare" => "(parents >= ? AND parents <= ?)", "prog" => "(progeny >= ?)", "vprog" => "(valid_progeny >= ?)", "mark" => "(marker LIKE ?)", "est" => "(ests > ?)", "pe" => "(pe_radtags > ?)", "blast" => "(blast_hits > ?)", "gcnt" => "(geno_cnt >= ?)", "ref" => "(catalog_index.type = ?)", "loc" => "(catalog_index.chr = ? && catalog_index.bp >= ? && catalog_index.bp <= ?)"); $filters = $this->display['filter_type']; if (count($filters) > 0) { $query = " AND "; while (count($filters) > 0) { $filter = array_shift($filters); $query .= $sql_filters[$filter]; $query .= count($filters) > 0 ? " AND " : ""; } } return $query; } function &loci() { return $this->loci; } function &locus($id) { return $this->loci[$id]; } function num_loci() { return $this->num_loci; } function determine_count() { $this->db['tag_count_sth'] = $this->db['dbh']->prepare($this->queries['tag_count']); check_db_error($this->db['tag_count_sth'], __FILE__, __LINE__); $this->prepare_filter_parameters(); $result =& $this->db['tag_count_sth']->execute($this->params); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $this->num_loci = $row['count']; } function populate($start_group, $num_groups) { // // We only want to load genes between $start_gene and $end_gene. // $this->db['dbh']->setLimit($num_groups, $start_group); check_db_error($this->db['dbh'], __FILE__, __LINE__); $this->db['tag_sth'] = $this->db['dbh']->prepare($this->queries['tag']); check_db_error($this->db['tag_sth'], __FILE__, __LINE__); $this->prepare_filter_parameters(); // // Fetch the results and populate the array of groups. // $result = $this->db['tag_sth']->execute($this->params); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $locus = new Locus(); $locus->id = $row['tag_id']; $locus->annotation = $row['external_id']; $locus->chr = $row['chr']; $locus->bp = $row['bp']; $locus->marker = $row['marker']; $locus->seq = $row['seq']; $locus->num_alleles = $row['alleles']; $locus->num_snps = $row['snps']; $locus->num_parents = $row['parents']; $locus->num_progeny = $row['progeny']; $locus->valid_progeny = $row['valid_progeny']; $locus->num_ests = $row['ests']; $locus->num_pe_tags = $row['pe_radtags']; $locus->num_blast = $row['blast_hits']; // // Fetch SNPs and Alleles // $snp_res = $this->db['snp_sth']->execute(array($this->batch, $locus->id)); check_db_error($snp_res, __FILE__, __LINE__); while ($snp_row = $snp_res->fetchRow()) { $locus->snps .= $snp_row['col'] . "," . $snp_row['rank_1'] . ">" . $snp_row['rank_2'] . ";"; } $locus->snps = substr($locus->snps, 0, -1); $all_res = $this->db['allele_sth']->execute(array($this->batch, $locus->id)); check_db_error($all_res, __FILE__, __LINE__); while ($all_row = $all_res->fetchRow()) { $locus->alleles .= $all_row['allele'] . ";"; } $locus->alleles = substr($locus->alleles, 0, -1); // // Add genotypes // $gen_res = $this->db['mat_sth']->execute(array($this->batch, $locus->id)); check_db_error($genres, __FILE__, __LINE__); while ($gen_row = $gen_res->fetchRow()) $locus->add_genotype($gen_row['id'], $gen_row['file'], $gen_row['allele']); $this->loci[$row['tag_id']] = $locus; } } } stacks-1.35/php/constants.php.dist000644 000765 000024 00000005762 12533677757 020020 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // // // Credentials to access Stacks MySQL databases // $db_user = "dbuser"; $db_pass = "dbpass"; $db_host = "localhost"; // // Path to the MySQL client program // $mysql_bin = "/usr/bin/mysql"; // // File system location of PHP/HTML files // $system_path = "_PKGDATADIR_" . "php/"; // // Location of perl script to run the exporting jobs. // $export_cmd = "_BINDIR_" . "stacks_export_notify.pl"; // // Name to print in page header // $site_title = "Stacks Analysis Pipeline"; // // URL path to root of PHP/HTML files // $root_path = "/stacks"; $img_path = "/stacks/images"; // // Length to wrap sequences at. // $display_len = 80; // // Colors for printing alleles/haplotypes // $colors = array("#008000", "#c00000", "#ffc600", "#29356c", "#860000", "#dc6200", "#4b398e", "#008f56", "#bf1e25", "#4cb8ff"); $color_size = count($colors); // // Color map version 2; for printing alleles/haplotypes. // $color_map = array( 'a' => "#0074D9", // Blue 'b' => "#FF4136", // Red 'c' => "#008000", // Dark Green 'd' => "#FF851B", // Orange 'e' => "#001f3f", // Navy 'f' => "#85144b", // Maroon 'g' => "#F012BE", // Fuchsia 'h' => "#39CCCC", // Teal 'i' => "#3D9970", // Olive 'j' => "#01FF70", // Lime 'k' => "#FFDC00", // Yellow 'l' => "#B10DC9", // Purple 'm' => "#111111", // Black 'n' => "#7FDBFF", // Aqua 'o' => "#AAAAAA", // Gray 'p' => "#DDDDDD", // Silver 'q' => "#2ECC40", // Green 'r' => "#c00000", 's' => "#ffc600", 't' => "#29356c", 'u' => "#860000", 'v' => "#dc6200", 'w' => "#4b398e", 'x' => "#008f56", 'y' => "#bf1e25", 'z' => "#4cb8ff", 'A' => "#26081C", 'B' => "#C44900", 'C' => "#036016", 'D' => "#936426", 'E' => "#E87EA1", 'F' => "#692E38", 'G' => "#19423F", 'H' => "#625F64", 'I' => "#432534", 'J' => "#323845", 'K' => "#758E4F", 'L' => "#B287A3", 'M' => "#C0F8D1", 'N' => "#03440C", 'O' => "#8EDCE6", 'P' => "#D5DCF9", 'Q' => "#A7B0CA", 'R' => "#0CBABA", 'S' => "#725E54", 'T' => "#EE2677", 'U' => "#84A98C", 'V' => "#FCCA46", 'W' => "#A1C181", 'X' => "#443627", 'Y' => "#FE7F2D", 'Z' => "#2EBE62" ); $color_map_size = count($color_map); ?> stacks-1.35/php/correct_genotype.php000644 000765 000024 00000010462 12335173442 020364 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $sample_id = isset($_GET['sample_id']) ? $_GET['sample_id'] : 0; $gtype = isset($_GET['gtype']) ? $_GET['gtype'] : 0; // Connect to the database $db = db_connect($database); // // Prepare some SQL queries // $query = "UPDATE genotype_corrections SET genotype=? WHERE id=?"; $db['upd_sth'] = $db['dbh']->prepare($query); check_db_error($db['upd_sth'], __FILE__, __LINE__); $query = "INSERT INTO genotype_corrections SET batch_id=?, catalog_id=?, sample_id=?, genotype=?"; $db['ins_sth'] = $db['dbh']->prepare($query); check_db_error($db['ins_sth'], __FILE__, __LINE__); $query = "DELETE FROM genotype_corrections WHERE id=?"; $db['del_sth'] = $db['dbh']->prepare($query); check_db_error($db['del_sth'], __FILE__, __LINE__); $query = "SELECT catalog_genotypes.id as id, catalog_genotypes.sample_id, catalog_genotypes.genotype, " . "genotype_corrections.genotype as correction, genotype_corrections.id as cid " . "FROM catalog_genotypes " . "LEFT JOIN genotype_corrections ON " . "(genotype_corrections.catalog_id=catalog_genotypes.catalog_id AND " . "genotype_corrections.sample_id=catalog_genotypes.sample_id AND " . "genotype_corrections.batch_id=catalog_genotypes.batch_id) " . "WHERE catalog_genotypes.batch_id=? AND " . "catalog_genotypes.catalog_id=? AND " . "catalog_genotypes.sample_id=?"; $db['geno_sth'] = $db['dbh']->prepare($query); check_db_error($db['geno_sth'], __FILE__, __LINE__); // // Fetch the existing genotypes from the database // $result = $db['geno_sth']->execute(array($batch_id, $tag_id, $sample_id)); check_db_error($result, __FILE__, __LINE__); if ($row = $result->fetchRow()) { $sample = array('id' => $row['id'], 'genotype' => strtolower($row['genotype']), 'corrected' => $row['correction'], 'corrected_id' => $row['cid']); } else { return; } $corrected = "false"; if ($gtype == "clr") { $result = $db['del_sth']->execute($sample['corrected_id']); check_db_error($result, __FILE__, __LINE__); $corrected = "false"; $gtype = $sample['genotype']; // // Is this genotype being reset to the original value? If so, delete the corrected record. // } else if ($gtype == $sample['genotype'] && strlen($sample['corrected_id']) > 0) { $result = $db['del_sth']->execute($sample['corrected_id']); check_db_error($result, __FILE__, __LINE__); $corrected = "false"; // // Is the corrected value for this genotype being changed? If so, update the corrected record. // } else if ($gtype != $sample['genotype'] && strlen($sample['corrected_id']) > 0) { $result = $db['upd_sth']->execute(array(strtoupper($gtype), $sample['corrected_id'])); check_db_error($result, __FILE__, __LINE__); $corrected = "true"; // // Otherwise, add a new correction. // } else if ($gtype != $sample['genotype']) { $result = $db['ins_sth']->execute(array($batch_id, $tag_id, $sample_id, strtoupper($gtype))); check_db_error($result, __FILE__, __LINE__); $corrected = "true"; } $id = "gtype_" . $batch_id . "_" . $tag_id . "_" . $sample_id; if ($corrected == "true") $gtype = strtoupper($gtype); header("Content-type: text/xml"); $xml_output = "\n" . "\n" . "$corrected\n" . "$gtype\n" . "$id\n" . "\n"; echo $xml_output; ?> stacks-1.35/php/correct_genotypes.php000644 000765 000024 00000014010 12335173442 020540 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_POST['db']) ? $_POST['db'] : ""; $tag_id = isset($_POST['tag_id']) ? $_POST['tag_id'] : 0; $batch_id = isset($_POST['batch_id']) ? $_POST['batch_id'] : 0; $op = isset($_POST['op']) ? $_POST['op'] : "display"; // Connect to the database if (!isset($db)) $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; $diplsay['op'] = $op; // // Prepare some SQL queries // $query = "SELECT catalog_genotypes.id as id, catalog_genotypes.sample_id, catalog_genotypes.genotype, " . "genotype_corrections.genotype as correction, genotype_corrections.id as cid, file " . "FROM catalog_genotypes " . "LEFT JOIN genotype_corrections ON " . "(genotype_corrections.catalog_id=catalog_genotypes.catalog_id AND " . "genotype_corrections.sample_id=catalog_genotypes.sample_id AND " . "genotype_corrections.batch_id=catalog_genotypes.batch_id) " . "JOIN samples ON (catalog_genotypes.sample_id=samples.id) " . "WHERE catalog_genotypes.batch_id=? and catalog_genotypes.catalog_id=? " . "ORDER BY catalog_genotypes.sample_id"; $db['geno_sth'] = $db['dbh']->prepare($query); check_db_error($db['geno_sth'], __FILE__, __LINE__); $query = "UPDATE genotype_corrections SET genotype=? WHERE id=?"; $db['upd_sth'] = $db['dbh']->prepare($query); check_db_error($db['upd_sth'], __FILE__, __LINE__); $query = "INSERT INTO genotype_corrections SET batch_id=?, catalog_id=?, sample_id=?, genotype=?"; $db['ins_sth'] = $db['dbh']->prepare($query); check_db_error($db['ins_sth'], __FILE__, __LINE__); $query = "DELETE FROM genotype_corrections WHERE id=?"; $db['del_sth'] = $db['dbh']->prepare($query); check_db_error($db['del_sth'], __FILE__, __LINE__); $query = "SELECT genotype_corrections.id FROM genotype_corrections " . "JOIN catalog_genotypes ON " . "(genotype_corrections.catalog_id=catalog_genotypes.catalog_id AND " . "genotype_corrections.sample_id=catalog_genotypes.sample_id AND " . "genotype_corrections.batch_id=catalog_genotypes.batch_id) " . "WHERE genotype_corrections.batch_id=? AND genotype_corrections.catalog_id=?"; $db['res_sth'] = $db['dbh']->prepare($query); check_db_error($db['res_sth'], __FILE__, __LINE__); if ($op == "reset") { reset_marker($display); } else if ($op == "correct") { correct_marker($display); } include_once("catalog_genotypes.php"); function reset_marker($display) { global $db; $result = $db['res_sth']->execute(array($display['batch_id'], $display['tag_id'])); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $r = $db['del_sth']->execute($row['id']); check_db_error($r, __FILE__, __LINE__); } } function correct_marker($display) { global $db; $gtypes = array(); $form_gtypes = array(); // // Fetch the existing genotypes from the database // $result = $db['geno_sth']->execute(array($display['batch_id'], $display['tag_id'])); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $gtypes[$row['sample_id']] = array('id' => $row['id'], 'file' => $row['file'], 'genotype' => strtolower($row['genotype']), 'corrected' => $row['correction'], 'corrected_id' => $row['cid']); } // // Fetch the corrected genotypes from the submitted form // foreach ($_POST as $key => $value) { if (substr($key, 0, 5) != "gtype") continue; // ID should look like: 'gtype_batchid_catalogid_sampleid' $parts = explode("_", $key); $form_gtypes[$parts[3]] = strtolower($value); //print "Assigning $value to $parts[3]
\n"; } foreach ($form_gtypes as $sample_id => $sample) { //print "LOOKING at sample ID: $sample_id: $sample, original value: " . $gtypes[$sample_id]['genotype'] . "
\n"; // // Is this genotype being reset to the original value? If so, delete the corrected record. // if ($sample == $gtypes[$sample_id]['genotype'] && strlen($gtypes[$sample_id]['corrected_id']) > 0) { $result = $db['del_sth']->execute($gtypes[$sample_id]['corrected_id']); check_db_error($result, __FILE__, __LINE__); // // Is the corrected value for this genotype being changed? If so, update the corrected record. // } else if ($sample != $gtypes[$sample_id]['genotype'] && strlen($gtypes[$sample_id]['corrected_id']) > 0) { $result = $db['upd_sth']->execute(array(strtoupper($sample), $gtypes[$sample_id]['corrected_id'])); check_db_error($result, __FILE__, __LINE__); // // Otherwise, add a new correction. // } else if ($sample != $gtypes[$sample_id]['genotype']) { $result = $db['ins_sth']->execute(array($display['batch_id'], $display['tag_id'], $sample_id, strtoupper($sample))); check_db_error($result, __FILE__, __LINE__); } } } ?> stacks-1.35/php/db_functions.php000644 000765 000024 00000003603 12441417455 017470 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // function db_connect($database) { global $db_user, $db_pass, $db_host; $dsn = array( 'phptype' => 'mysql', 'username' => $db_user, 'password' => $db_pass, 'hostspec' => $db_host, 'port' => 3306 ); $options = array(); if (strlen($database) > 0) $dsn['database'] = $database; else $dsn['database'] = false; $dbh = MDB2::connect($dsn, $options); if (MDB2::isError($dbh)) { die("File: " . __FILE__ . " (line " . __LINE__ . ") " . $dbh->getMessage()); } // Set the database package to always return // results as an associative array $dbh->setFetchMode(MDB2_FETCHMODE_ASSOC); // The $db array will hold the database handle and // common, prepared SQL statements. $db = array(); $db['dbh'] = $dbh; $db['name'] = $database; return $db; } function check_db_error($sth, $file, $line) { if (MDB2::isError($sth)) { $error_str = "File: $file (line $line)
\n " . "" . $sth->getMessage() . "
\n" . $sth->getUserInfo() . "
\n"; die($error_str); } } ?> stacks-1.35/php/export.js000644 000765 000024 00000010302 12335173442 016150 0ustar00catchenstaff000000 000000 // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // var http_req; function toggle_export_popup(id) { var div_obj = document.getElementById(id); if (div_obj.style.display == "none") { div_obj.style.display = ""; } else { div_obj.style.display = "none"; } } function close_export_popup(id) { var div_obj = document.getElementById(id); div_obj.style.display = "none"; } function export_data(id) { var form_obj = document.getElementById(id + "_frm"); var otype, dtype; for (i = 0; i < form_obj.otype.length; i++) if (form_obj.otype[i].checked) otype = form_obj.otype[i].value; for (i = 0; i < form_obj.dtype.length; i++) if (form_obj.dtype[i].checked) dtype = form_obj.dtype[i].value; var url = form_obj.url.value + "&" + "email=" + form_obj.email.value + "&" + "dtype=" + dtype + "&" + "dlim=" + form_obj.dlim.value + "&" + "mtype=" + form_obj.mtype.value + "&" + "mcor=" + form_obj.mcor.value + "&" + "otype=" + otype; //alert(url); // // Prepare and send XMLHttpRequest Object. // http_req = false; try { http_req = new XMLHttpRequest(); } catch(e) { http_req = false; } if (http_req) { http_req.onreadystatechange = process_export; http_req.open("GET", url, true); http_req.send(""); } var txt_obj = document.getElementById(id + "_txt"); while (txt_obj.childNodes.length > 0) txt_obj.removeChild(txt_obj.firstChild); } function gen_export_result(loci, email) { var div_obj = document.getElementById("export_popup_txt"); var res_p = document.createElement("p"); res_p.innerHTML = "Exporting " + loci + " loci. " + "E-mail will be sent to " + email + "" + " when it is complete."; var app_obj = div_obj.appendChild(res_p); var res_p = document.createElement("p"); res_p.innerHTML = "close"; app_obj.appendChild(res_p); } function process_export() { // // Possible readyState values: // 0 = uninitialized // 1 = loading // 2 = loaded // 3 = interactive // 4 = complete // if (http_req.readyState == 4) { // Check that the status is "OK" if (http_req.status == 200) { var xml_doc = http_req.responseXML; var obj = xml_doc.getElementsByTagName("loci"); var loci = obj[0].childNodes[0].nodeValue; obj = xml_doc.getElementsByTagName("email"); var email = obj[0].childNodes[0].nodeValue; obj = xml_doc.getElementsByTagName("msg"); var msg = obj[0].childNodes[0].nodeValue; //alert(msg); gen_export_result(loci, email); } else { alert("There was a problem retrieving the XML data:\n" + http_req.statusText); } } } function toggle_vis(form_id, name) { var form_obj = document.getElementById(form_id); var g_obj = document.getElementById('gopts'); var h_obj = document.getElementById('hopts'); for(i = 0; i < form_obj.elements.length; i++) if (form_obj.elements[i].name == name && form_obj.elements[i].value == "geno") if (form_obj.elements[i].checked == true) { g_obj.style.display = ""; h_obj.style.display = "none"; } else { g_obj.style.display = "none"; h_obj.style.display = ""; } } stacks-1.35/php/export_batch.php000644 000765 000024 00000011570 12335173442 017474 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); require_once("CatalogClass.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $batch_id = isset($_GET['id']) ? $_GET['id'] : 0; $email = isset($_GET['email']) ? $_GET['email'] : ""; $data_type = isset($_GET['dtype']) ? $_GET['dtype'] : "haplo"; $map_type = isset($_GET['mtype']) ? $_GET['mtype'] : "gen"; $depth_lim = isset($_GET['dlim']) ? $_GET['dlim'] : "1"; $man_cor = isset($_GET['mcor']) ? $_GET['mcor'] : "0"; $ex_type = isset($_GET['otype']) ? $_GET['otype'] : "tsv"; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['id'] = $batch_id; $display['db'] = $database; $display['filter_type'] = array(); $filters = array(); process_filter($display, $filters); // // Create a new catalog object to hold filtered loci // $catalog = new Catalog($db, $batch_id, $display); $catalog->determine_count(); $loci_cnt = $catalog->num_loci(); $mc = ($man_cor > 0) ? "-c" : ""; if ($data_type == "haplo") { $dt = "-a haplo"; $dl = "-L $depth_lim"; } else if ($data_type == "geno") { $dt = "-a geno -m $map_type $mc"; $dl = ""; } $cmd = $export_cmd . " -D $database -b $batch_id $dt $dl -e $email -t $ex_type -F " . implode(",", $filters); header("Content-type: text/xml"); $xml_output = "\n" . "\n" . "" . number_format($loci_cnt) . "\n" . "$email\n" . "$cmd\n" . "\n"; echo $xml_output; // // Execute the email notification program, which will run the export and email // the submitter upon its conclusion. // // export_cmd is defined in the constants file. // exec($cmd . " >/dev/null &"); function process_filter(&$display_params, &$filters) { if (!isset($_GET['filter_type'])) return; foreach ($_GET['filter_type'] as $filter) { array_push($display_params['filter_type'], $filter); if ($filter == "alle") { $display_params['filter_alle_l'] = $_GET['filter_alle_l']; array_push($filters, "alle_l=" . $_GET['filter_alle_l']); $display_params['filter_alle_u'] = $_GET['filter_alle_u']; array_push($filters, "alle_u=" . $_GET['filter_alle_u']); } else if ($filter == "snps") { $display_params['filter_snps_l'] = $_GET['filter_snps_l']; array_push($filters, "snps_l=" . $_GET['filter_snps_l']); $display_params['filter_snps_u'] = $_GET['filter_snps_u']; array_push($filters, "snps_u=" . $_GET['filter_snps_u']); } else if ($filter == "pare") { $display_params['filter_pare_l'] = $_GET['filter_pare_l']; array_push($filters, "pare_l=" . $_GET['filter_pare_l']); $display_params['filter_pare_u'] = $_GET['filter_pare_u']; array_push($filters, "pare_u=" . $_GET['filter_pare_u']); } else if ($filter == "prog") { $display_params['filter_prog'] = $_GET['filter_prog']; array_push($filters, "prog=" . $_GET['filter_prog']); } else if ($filter == "vprog") { $display_params['filter_vprog'] = $_GET['filter_vprog']; array_push($filters, "vprog=" . $_GET['filter_vprog']); } else if ($filter == "cata") { $display_params['filter_cata'] = $_GET['filter_cata']; array_push($filters, "cata=" . $_GET['filter_cata']); } else if ($filter == "mark") { $display_params['filter_mark'] = $_GET['filter_mark']; array_push($filters, "mark=" . $_GET['filter_mark']); } else if ($filter == "gcnt") { $display_params['filter_gcnt'] = $_GET['filter_gcnt']; array_push($filters, "gcnt=" . $_GET['filter_gcnt']); } else if ($filter == "ref") { $display_params['filter_ref'] = $_GET['filter_ref']; array_push($filters, "ref=" . $_GET['filter_ref']); } else if ($filter == "loc") { $display_params['filter_chr'] = $_GET['filter_chr']; array_push($filters, "chr=" . $_GET['filter_chr']); $display_params['filter_sbp'] = $_GET['filter_sbp']; array_push($filters, "sbp=" . $_GET['filter_sbp']); $display_params['filter_ebp'] = $_GET['filter_ebp']; array_push($filters, "ebp=" . $_GET['filter_ebp']); } } } ?> stacks-1.35/php/fst_view.php000644 000765 000024 00000010020 12533677757 016647 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $batch_type = isset($_GET['type']) ? $_GET['type'] : "map"; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; // // Prepare some SQL queries // $query = "SELECT pop_id, pop_name FROM populations " . "WHERE batch_id=?"; $db['pop_sth'] = $db['dbh']->prepare($query); check_db_error($db['pop_sth'], __FILE__, __LINE__); $query = "SELECT col, pop_id_1, pop_id_2, pi_o, amova_fst_c as fst, fishers_p, lod FROM fst " . "WHERE batch_id=? AND tag_id=?"; $db['fst_sth'] = $db['dbh']->prepare($query); check_db_error($db['fst_sth'], __FILE__, __LINE__); // // Fetch population names if available. // $pop_names = array(); if ($batch_type == "population") { $result = $db['pop_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) $pop_names[$row['pop_id']] = $row['pop_name']; } $result = $db['fst_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $stats = array(); $pops = array(); while ($row = $result->fetchRow()) { $a = array('col' => $row['col'], 'pid_1' => $row['pop_id_1'], 'pid_2' => $row['pop_id_2'], 'pi_o' => $row['pi_o'], 'fishers_p' => $row['fishers_p'], 'lod' => $row['lod'], 'fst' => $row['fst']); if (!isset($stats[$row['col']])) $stats[$row['col']] = array(); array_push($stats[$row['col']], $a); $pops[$row['pop_id_1']] = $row['pop_id_1']; $pops[$row['pop_id_2']] = $row['pop_id_2']; } ksort($stats); ksort($pops); // // Assign population IDs for any missing population names. // foreach ($pops as $pop_id) if (!isset($pop_names[$pop_id])) $pop_names[$pop_id] = $pop_id; ksort($pop_names); $json_str = "{" . "\"path\": \"$root_path\"," . "\"batch_id\": \"$batch_id\"," . "\"db\": \"$database\"," . "\"id\": \"$tag_id\"," . "\"type\": \"$batch_type\","; $json_str .= "\"popkey\": {"; // // Print the population key. // foreach ($pop_names as $pop_id => $population) { $json_str .= "\"$pop_id\": \"$population\","; } $json_str = substr($json_str, 0, -1); $json_str .= "}," . "\"columns\": {"; foreach ($stats as $col => $stat) { $json_str .= "\"$col\": ["; foreach ($stat as $s) { $fst = $s['fst'] != 0 ? sprintf("%.3f", $s['fst']) : "0"; $lod = $s['lod'] != 0 ? sprintf("%.3f", $s['lod']) : "0"; $pio = $s['pi_o'] != 0 ? sprintf("%.3f", $s['pi_o']) : "0"; $p = $s['fishers_p'] != 0 ? sprintf("%.3f", $s['fishers_p']) : "0"; $json_str .= "{" . "\"pid_1\": \"$s[pid_1]\"," . "\"pid_2\": \"$s[pid_2]\"," . "\"pi_o\": \"$pio\"," . "\"p\": \"$p\"," . "\"lod\": \"$lod\"," . "\"fst\": \"$fst\"" . "},"; } if (count($stat) > 0) $json_str = substr($json_str, 0, -1); $json_str .= "],"; } if (count($stats) > 0) $json_str = substr($json_str, 0, -1); $json_str .= "}}"; echo $json_str; ?> stacks-1.35/php/hapstat_view.php000644 000765 000024 00000006423 12533677757 017533 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $batch_type = isset($_GET['type']) ? $_GET['type'] : "map"; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; // // Prepare some SQL queries // $query = "SELECT pop_id, pop_name FROM populations " . "WHERE batch_id=?"; $db['pop_sth'] = $db['dbh']->prepare($query); check_db_error($db['pop_sth'], __FILE__, __LINE__); $query = "SELECT pop_id, bp, n, hapcnt, gene_div, hap_div FROM hapstats " . "WHERE batch_id=? AND tag_id=?"; $db['stats_sth'] = $db['dbh']->prepare($query); check_db_error($db['stats_sth'], __FILE__, __LINE__); // // Fetch population names if available. // $pop_names = array(); if ($batch_type == "population") { $result = $db['pop_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) $pop_names[$row['pop_id']] = $row['pop_name']; } $result = $db['stats_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $stats = array(); while ($row = $result->fetchRow()) { $a = array('bp' => $row['bp'], 'n' => $row['n'], 'hapcnt' => $row['hapcnt'], 'gene_div' => $row['gene_div'], 'hap_div' => $row['hap_div'], 'pop_id' => $row['pop_id']); $stats[$row['pop_id']] = $a; } ksort($stats); $json_str = "{" . "\"path\": \"$root_path\"," . "\"batch_id\": \"$batch_id\"," . "\"db\": \"$database\"," . "\"id\": \"$tag_id\"," . "\"type\": \"$batch_type\","; $json_str .= "\"hapstats\": ["; foreach ($stats as $pop_id => $stat) if (!isset($pop_names[$pop_id])) $pop_names[$pop_id] = $pop_id; $rows = 0; foreach ($stats as $pop_id => $s) { $gdiv = $s['gene_div'] > 0 ? sprintf("%.3f", $s['gene_div']) : $s['gene_div']; $hdiv = $s['hap_div'] > 0 ? sprintf("%.3f", $s['hap_div']) : $s['hap_div']; $json_str .= "{" . "\"pop_id\": \"" . $pop_names[$pop_id] . "\"," . "\"bp\": \"$s[bp]\"," . "\"n\": \"$s[n]\"," . "\"hapcnt\": \"$s[hapcnt]\"," . "\"genediv\": \"$gdiv\"," . "\"hapdiv\": \"$hdiv\"" . "},"; $rows++; } if ($rows > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]}"; echo $json_str; ?> stacks-1.35/php/header.php000644 000765 000024 00000001543 12335173442 016241 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("MDB2.php"); require_once("constants.php"); require_once("db_functions.php"); require_once("stacks_functions.php"); ?> stacks-1.35/php/images/000755 000765 000024 00000000000 12574070564 015550 5ustar00catchenstaff000000 000000 stacks-1.35/php/index.php000644 000765 000024 00000006621 12441417455 016125 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; if (strlen($database) == 0) write_database_list($database); else write_database($database); function write_database_list($database) { global $db_user, $db_pass, $db_host, $root_path, $img_path, $mysql_bin; $databases = array(); exec("$mysql_bin --user=$db_user --password=$db_pass -h $db_host -N -B -e \"SHOW DATABASES LIKE '%_radtags'\"", $databases); $page_title = "Stacks Databases"; write_header($page_title); echo <<< EOQ

Stacks Databases

EOQ; foreach ($databases as $dbase) { print "\n" . " \n" . "\n"; } echo <<< EOQ
Database
$dbase
EOQ; write_footer(); } function write_database($database) { global $root_path, $img_path; // // Connect to the database // $db = db_connect($database); // // Prepare some SQL queries // $query = "SELECT id, date, description, type FROM batches"; $db['batch_sth'] = $db['dbh']->prepare($query); check_db_error($db['batch_sth'], __FILE__, __LINE__); $page_title = "RAD-Tag Analyses"; write_header($page_title); echo <<< EOQ

RAD-Tag Samples

EOQ; $result = $db['batch_sth']->execute(); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { print "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . "\n"; } echo <<< EOQ
    Batch ID Date Type Description
CatalogSamples" . $row['id'] . "" . $row['date'] . "" . $row['type'] . "" . $row['description'] . "
EOQ; write_footer(); } ?> stacks-1.35/php/last_modified.php000644 000765 000024 00000000035 12533677757 017631 0ustar00catchenstaff000000 000000 Wed Apr 29 13:05:18 PDT 2015 stacks-1.35/php/Locus.php000644 000765 000024 00000004200 12335173442 016067 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // class Locus { var $id; var $seq; var $annotation; var $chr; var $bp; var $snps; var $alleles; var $genotypes; // Array of samples that possess this locus var $num_parents; var $num_progeny; var $num_snps; var $num_alleles; var $num_ests; var $num_pe_tags; var $num_blast; var $valid_progeny; var $marker; function Locus() { $this->genotypes = array(); $this->annotation = ""; $this->chr = ""; $this->bp = 0; $this->marker = ""; $this->snps = ""; $this->alleles = ""; $this->num_parents = 0; $this->num_progeny = 0; $this->valid_progeny = 0; $this->num_alleles = 0; $this->num_snps = 0; $this->num_ests = 0; $this->num_pe_tags = 0; $this->num_blast = 0; } function &genotypes() { return $this->genotypes; } function &genotype($id) { if (isset($this->genotypes[$id])) return $this->genotypes[$id]; else return NULL; } function add_genotype($id, $file, $allele) { $a = array('file' => $file, 'allele' => $allele, 'tag_id' => $id); if (!isset($this->genotypes[$file])) $this->genotypes[$file] = array(); array_push($this->genotypes[$file], $a); } } ?> stacks-1.35/php/phist_view.php000644 000765 000024 00000006711 12533677757 017216 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $batch_type = isset($_GET['type']) ? $_GET['type'] : "map"; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; // // Prepare some SQL queries // $query = "SELECT pop_id, pop_name FROM populations " . "WHERE batch_id=?"; $db['pop_sth'] = $db['dbh']->prepare($query); check_db_error($db['pop_sth'], __FILE__, __LINE__); $query = "SELECT pop_id_1, pop_id_2, phist, fpst FROM phist " . "WHERE batch_id=? AND tag_id=?"; $db['fst_sth'] = $db['dbh']->prepare($query); check_db_error($db['fst_sth'], __FILE__, __LINE__); // // Fetch population names if available. // $pop_names = array(); if ($batch_type == "population") { $result = $db['pop_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) $pop_names[$row['pop_id']] = $row['pop_name']; } $result = $db['fst_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $stats = array(); $pops = array(); while ($row = $result->fetchRow()) { $a = array('pid_1' => $row['pop_id_1'], 'pid_2' => $row['pop_id_2'], 'phist' => $row['phist'], 'fpst' => $row['fpst']); array_push($stats, $a); $pops[$row['pop_id_1']] = $row['pop_id_1']; $pops[$row['pop_id_2']] = $row['pop_id_2']; } ksort($stats); ksort($pops); // // Assign population IDs for any missing population names. // foreach ($pops as $pop_id) if (!isset($pop_names[$pop_id])) $pop_names[$pop_id] = $pop_id; ksort($pop_names); $json_str = "{" . "\"path\": \"$root_path\"," . "\"batch_id\": \"$batch_id\"," . "\"db\": \"$database\"," . "\"id\": \"$tag_id\"," . "\"type\": \"$batch_type\","; $json_str .= "\"popkey\": {"; // // Print the population key. // foreach ($pop_names as $pop_id => $population) { $json_str .= "\"$pop_id\": \"$population\","; } $json_str = substr($json_str, 0, -1); $json_str .= "}," . "\"phist\": ["; foreach ($stats as $s) { $phist = $s['phist'] != 0 ? sprintf("%.3f", $s['phist']) : "0"; $fpst = $s['fpst'] != 0 ? sprintf("%.3f", $s['fpst']) : "0"; $json_str .= "{" . "\"pid_1\": \"$s[pid_1]\"," . "\"pid_2\": \"$s[pid_2]\"," . "\"phist\": \"$phist\"," . "\"fstp\": \"$fpst\"" . "},"; } if (count($stats) > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]}"; echo $json_str; ?> stacks-1.35/php/pop_view.php000644 000765 000024 00000024157 12533677757 016671 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $batch_type = isset($_GET['type']) ? $_GET['type'] : "map"; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; // // Prepare some SQL queries // $query = "SELECT samples.id, samples.sample_id, samples.type, file, tag_id, allele, depth, lnl, pop_id " . "FROM matches " . "JOIN samples ON (matches.sample_id=samples.id) " . "WHERE matches.batch_id=? AND catalog_id=? ORDER BY samples.id"; $db['mat_sth'] = $db['dbh']->prepare($query); check_db_error($db['mat_sth'], __FILE__, __LINE__); $query = "SELECT col, rank_1, rank_2, rank_3, rank_4 FROM catalog_snps " . "WHERE batch_id=? AND tag_id=? ORDER BY col"; $db['snp_sth'] = $db['dbh']->prepare($query); check_db_error($db['snp_sth'], __FILE__, __LINE__); $query = "SELECT allele FROM catalog_alleles " . "WHERE batch_id=? AND tag_id=? "; $db['all_sth'] = $db['dbh']->prepare($query); check_db_error($db['all_sth'], __FILE__, __LINE__); $query = "SELECT geno_map FROM markers " . "WHERE batch_id=? AND catalog_id=? "; $db['map_sth'] = $db['dbh']->prepare($query); check_db_error($db['map_sth'], __FILE__, __LINE__); $query = "SELECT pop_id, pop_name FROM populations " . "WHERE batch_id=?"; $db['pop_sth'] = $db['dbh']->prepare($query); check_db_error($db['pop_sth'], __FILE__, __LINE__); $query = "SELECT count(batch_id) as cnt FROM sumstats " . "WHERE batch_id=? AND tag_id=?"; $db['stats_sth'] = $db['dbh']->prepare($query); check_db_error($db['stats_sth'], __FILE__, __LINE__); $query = "SELECT count(batch_id) as cnt FROM fst " . "WHERE batch_id=? AND tag_id=?"; $db['fst_sth'] = $db['dbh']->prepare($query); check_db_error($db['fst_sth'], __FILE__, __LINE__); $query = "SELECT count(batch_id) as cnt FROM hapstats " . "WHERE batch_id=? AND tag_id=?"; $db['hapstats_sth'] = $db['dbh']->prepare($query); check_db_error($db['hapstats_sth'], __FILE__, __LINE__); $query = "SELECT count(batch_id) as cnt FROM phist " . "WHERE batch_id=? AND tag_id=?"; $db['phist_sth'] = $db['dbh']->prepare($query); check_db_error($db['phist_sth'], __FILE__, __LINE__); $query = "SELECT marker, catalog_genotypes.sample_id, file, " . "catalog_genotypes.genotype, genotype_corrections.genotype as corrected " . "FROM catalog_genotypes " . "LEFT JOIN genotype_corrections ON " . "(genotype_corrections.catalog_id=catalog_genotypes.catalog_id AND " . "genotype_corrections.sample_id=catalog_genotypes.sample_id AND " . "genotype_corrections.batch_id=catalog_genotypes.batch_id) " . "JOIN samples ON (catalog_genotypes.sample_id=samples.id) " . "JOIN catalog_index ON (catalog_genotypes.catalog_id=catalog_index.tag_id AND " . "catalog_genotypes.batch_id=catalog_index.batch_id) " . "WHERE catalog_genotypes.batch_id=? and catalog_genotypes.catalog_id=? " . "ORDER BY catalog_genotypes.sample_id"; $db['geno_sth'] = $db['dbh']->prepare($query); check_db_error($db['geno_sth'], __FILE__, __LINE__); // // Check for the existence of SNP summary statistics or Fst data. // $snp_sumstats = 0; $snp_fst_vals = 0; $hap_sumstats = 0; $hap_fst_vals = 0; if ($batch_type == "population") { $result = $db['stats_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); if ($row = $result->fetchRow()) { if ($row['cnt'] > 0) $snp_sumstats = 1; } $result = $db['fst_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); if ($row = $result->fetchRow()) { if ($row['cnt'] > 0) $snp_fst_vals = 1; } $result = $db['hapstats_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); if ($row = $result->fetchRow()) { if ($row['cnt'] > 0) $hap_sumstats = 1; } $result = $db['phist_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); if ($row = $result->fetchRow()) { if ($row['cnt'] > 0) $hap_fst_vals = 1; } } // // Fetch population names if available. // $pop_names = array(); if ($batch_type == "population") { $result = $db['pop_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) $pop_names[$row['pop_id']] = $row['pop_name']; } $result = $db['snp_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $json_str = "{" . "\"path\": \"$root_path\"," . "\"batch_id\": \"$batch_id\"," . "\"db\": \"$database\"," . "\"id\": \"$tag_id\"," . "\"type\": \"$batch_type\"," . "\"snpstat\": \"$snp_sumstats\"," . "\"snpfst\": \"$snp_fst_vals\"," . "\"hapstat\": \"$snp_sumstats\"," . "\"hapfst\": \"$snp_fst_vals\","; $json_str .= "\"snps\": ["; $rows = 0; while ($row = $result->fetchRow()) { $json_str .= "{" . "\"col\": \"$row[col]\"," . "\"rank_1\": \"$row[rank_1]\"," . "\"rank_2\": \"$row[rank_2]\"," . "\"rank_3\": \"$row[rank_3]\"," . "\"rank_4\": \"$row[rank_4]\"" . "},"; $rows++; } if ($rows > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]," . "\"alleles\": ["; $result = $db['map_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); if ($result->numRows() > 0) { $row = $result->fetchRow(); } else { $row = array(); } $rows = 0; if (isset($row['geno_map'])) { $map = array(); $genos = explode(";", $row['geno_map']); $i = 0; foreach ($genos as $g) { if (strlen($g) == 0) continue; $m = explode(":", $g); $map[$m[0]] = $m[1]; $alleles[$m[0]] = $colors[$i % $color_size]; $i++; } asort($map); foreach ($map as $hapl => $geno) { $json_str .= "{" . "\"gtype\": \"$geno\"," . "\"htype\": \"$hapl\"" . "},"; } $rows++; } else { $result = $db['all_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $gtypes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; $i = 0; while ($row = $result->fetchRow()) { $json_str .= "{" . "\"gtype\": \"" . $gtypes[$i % 52] . "\"," . "\"htype\": \"$row[allele]\"" . "},"; $i++; $rows++; } } if ($rows > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]," . "\"popkey\": {"; $htypes = array(); $gtypes = array(); // // Fetch and record Observed Haplotypes // $result = $db['mat_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $a = array('id' => $row['id'], 'file' => $row['file'], 'allele' => $row['allele'], 'tag_id' => $row['tag_id'], 'depth' => $row['depth'], 'lnl' => $row['lnl'], 'pop_id' => $row['pop_id']); if (!isset($htypes[$row['pop_id']])) $htypes[$row['pop_id']] = array(); if (!isset($htypes[$row['pop_id']][$row['file']])) $htypes[$row['pop_id']][$row['file']] = array(); array_push($htypes[$row['pop_id']][$row['file']], $a); } // // Fetch and record Genotypes // $result = $db['geno_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $gtypes[$row['file']] = array('id' => $row['sample_id'], 'file' => $row['file'], 'genotype' => $row['genotype'], 'corrected' => $row['corrected'], 'marker' => $row['marker']); } ksort($htypes); // // Print the population key. // foreach ($htypes as $pop_id => $population) { if (isset($pop_names[$pop_id])) $json_str .= "\"$pop_id\": \"$pop_names[$pop_id]\","; else $json_str .= "\"$pop_id\": \"\","; } $json_str = substr($json_str, 0, -1); $json_str .= "}," . "\"populations\": {"; // // Print the observed haplotypes grouped by population. // foreach ($htypes as $pop_id => $population) { $json_str .= "\"$pop_id\": ["; foreach ($population as $sample => $match) { $genotype = ""; $corrected = 0; $marker = ""; if (count($gtypes) > 0 && isset($gtypes[$sample])) { $marker = $gtypes[$sample]['marker']; if (strlen($gtypes[$sample]['corrected']) > 0) { $genotype = $gtypes[$sample]['corrected']; $corrected = 1; } else { $genotype = $gtypes[$sample]['genotype']; } } $json_str .= "{" . "\"sample\": \"$sample\"," . "\"sample_id\": \"" . $match[0]['id'] . "\"," . "\"lnl\": \"" . $match[0]['lnl'] . "\","; if ($batch_type == "map") { $json_str .= "\"marker\": \"$marker\"," . "\"genotype\": \"$genotype\"," . "\"corrected\": \"$corrected\","; } $json_str .= "\"obshap\": ["; foreach ($match as $m) { $json_str .= "{" . "\"tag_id\": \"$m[tag_id]\"," . "\"allele\": \"$m[allele]\"," . "\"depth\": \"$m[depth]\"" . "},"; } $json_str = substr($json_str, 0, -1); $json_str .= "]},"; } $json_str = substr($json_str, 0, -1); $json_str .= "],"; } $json_str = substr($json_str, 0, -1); $json_str .= "}}"; echo $json_str; ?> stacks-1.35/php/population_view.js000644 000765 000024 00000117767 12540304631 020074 0ustar00catchenstaff000000 000000 var colors = { 'a' : "#0074D9", // Blue 'b' : "#FF4136", // Red 'c' : "#008000", // Dark Green 'd' : "#FF851B", // Orange 'e' : "#001f3f", // Navy 'f' : "#85144b", // Maroon 'g' : "#F012BE", // Fuchsia 'h' : "#39CCCC", // Teal 'i' : "#3D9970", // Olive 'j' : "#01FF70", // Lime 'k' : "#FFDC00", // Yellow 'l' : "#B10DC9", // Purple 'm' : "#111111", // Black 'n' : "#7FDBFF", // Aqua 'o' : "#AAAAAA", // Gray 'p' : "#DDDDDD", // Silver 'q' : "#2ECC40", // Green 'r' : "#c00000", 's' : "#ffc600", 't' : "#29356c", 'u' : "#860000", 'v' : "#dc6200", 'w' : "#4b398e", 'x' : "#008f56", 'y' : "#bf1e25", 'z' : "#4cb8ff", 'A' : "#26081C", 'B' : "#C44900", 'C' : "#036016", 'D' : "#936426", 'E' : "#E87EA1", 'F' : "#692E38", 'G' : "#19423F", 'H' : "#625F64", 'I' : "#432534", 'J' : "#323845", 'K' : "#758E4F", 'L' : "#B287A3", 'M' : "#C0F8D1", 'N' : "#03440C", 'O' : "#8EDCE6", 'P' : "#D5DCF9", 'Q' : "#A7B0CA", 'R' : "#0CBABA", 'S' : "#725E54", 'T' : "#EE2677", 'U' : "#84A98C", 'V' : "#FCCA46", 'W' : "#A1C181", 'X' : "#443627", 'Y' : "#FE7F2D", 'Z' : "#2EBE62" }; marker_types = { 'ab/--' : ['aa', 'bb', 'ab', '--'], '--/ab' : ['aa', 'bb', 'ab', '--'], 'ab/aa' : ['aa', 'bb', 'ab', '--'], 'aa/ab' : ['aa', 'bb', 'ab', '--'], 'ab/ab' : ['aa', 'ab', 'bb', '--'], 'ab/ac' : ['aa', 'ab', 'ac', 'bc', '--'], 'ab/cd' : ['aa', 'bb', 'cc', 'dd', 'ac', 'ad', 'bc', 'bd', '--'], 'aa/bb' : ['aa', 'bb', 'ab', '--'], 'ab/cc' : ['aa', 'bb', 'ab', 'ac', 'bc', 'cc', '--'], 'cc/ab' : ['aa', 'bb', 'ab', 'ac', 'bc', 'cc', '--'] }; function ajax_locus_population_view(id, root_path, url) { var tr_obj = document.getElementById(id); var caret = document.getElementById(id + "_img"); if (tr_obj.style.display == "none") { tr_obj.style.display = ""; caret.src = root_path + "/caret-d.png"; // // Make the AJAX query for JSON encoded data for this locus. // $.ajax({ dataType: "json", url: url, success: build_population_view }); } else { tr_obj.style.display = "none"; caret.src = root_path + "/caret-u.png"; } } function ajax_locus_sumstats_view(id, url) { var div_obj = document.getElementById(id + "_sumstat_div"); if (div_obj.style.display == "none") { // // Make the AJAX query for JSON encoded data for this locus. // $.ajax({ dataType: "json", url: url, success: build_sumstats_view }); } } function ajax_locus_fst_view(id, url) { var div_obj = document.getElementById(id + "_fst_div"); if (div_obj.style.display == "none") { // // Make the AJAX query for JSON encoded data for this locus. // $.ajax({ dataType: "json", url: url, success: build_fst_view }); } } function ajax_locus_hapstats_view(id, url) { var div_obj = document.getElementById(id + "_hapstat_div"); if (div_obj.style.display == "none") { // // Make the AJAX query for JSON encoded data for this locus. // $.ajax({ dataType: "json", url: url, success: build_hapstats_view }); } } function ajax_locus_phist_view(id, url) { var div_obj = document.getElementById(id + "_phist_div"); if (div_obj.style.display == "none") { // // Make the AJAX query for JSON encoded data for this locus. // $.ajax({ dataType: "json", url: url, success: build_phist_view }); } } function ajax_locus_stack_view(id, url) { var div_obj = document.getElementById(id + "_stacks_div"); $("html, body").css("cursor", "wait"); if (div_obj.style.display == "none") { // // Make the AJAX query for JSON encoded data for this locus. // $.ajax({ dataType: "json", url: url, success: build_stack_view }); } } function build_population_view(json, status, jqXHR) { var url = json.path + "/stack_view.php" + "?db=" + json.db + "&batch_id=" + json.batch_id + "&tag_id=" + json.id; var html = "
\n" + "
\n" + "
View Stacks
\n"; var snps = write_snps_table(json); html += snps; // // Generate map of genotypes to haplotypes (for use in color picking). // var color_map = {}; for(var i = 0; i < json.alleles.length; i++) color_map[json.alleles[i].htype] = json.alleles[i].gtype; var alleles = write_haplotypes_table(json); html += alleles + "
\n" + "
\n"; var tables = write_population_table(json, color_map); html += tables + "
\n"; $("#" + json.id + "_popview_div").html(html); } function write_snps_table(json) { var html = ""; if (json.snps.length == 0) return html; html += "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n"; for(var i = 0; i < json.snps.length; i++) { var snp = "\n" + "" + "" + "" + ""; html += snp; } html += "
SNPs
ColumnAlleles
" + (i + 1) + "." + json.snps[i].col + "" + json.snps[i].rank_1 + " / " + json.snps[i].rank_2; if (json.snps[i].rank_3.length > 0) snp += " / " + json.snps[i].rank_3; if (json.snps[i].rank_4.length > 0) snp += " / " + json.snps[i].rank_4; snp += "
x
\n"; if (json.snpstat == 1) { url = json.path + "/sumstat_view.php?db=" + json.db + "&batch_id=" + json.batch_id + "&type=" + json.type + "&tag_id=" + json.id; html += "
Summary Stats
\n" + "
\n"; } if (json.snpfst == 1) { url = json.path + "/fst_view.php?db=" + json.db + "&batch_id=" + json.batch_id + "&type=" + json.type + "&tag_id=" + json.id; html += "
FST Stats
\n" + "
\n"; } return html; } function write_haplotypes_table(json) { var html = ""; if (json.alleles.length == 0) return html; html += "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n"; // // Count up the number of occurances of each haplotype. // haplotypes = {}; for (var pop_id in json.populations) { for (var i = 0; i < json.populations[pop_id].length; i++) for (var j = 0; j < json.populations[pop_id][i].obshap.length; j++) if (!(json.populations[pop_id][i].obshap[j].allele in haplotypes)) { haplotypes[json.populations[pop_id][i].obshap[j].allele] = 1; } else { haplotypes[json.populations[pop_id][i].obshap[j].allele]++; } } // // Write out the haplotype table. // for(var i = 0; i < json.alleles.length; i++) { html += "\n" + "\n" + "" + "" + "" + ""; } html += "
Haplotypes
GenotypeHaplotypeCnt
" + (i+1) + "." + json.alleles[i].gtype + "
" + json.alleles[i].htype + "
"; if (json.alleles[i].htype in haplotypes) html += haplotypes[json.alleles[i].htype]; else html += "0"; html += "
x
\n"; if (json.hapstat == 1) { url = json.path + "/hapstat_view.php?db=" + json.db + "&batch_id=" + json.batch_id + "&type=" + json.type + "&tag_id=" + json.id; html += "
Haplotype Stats
\n" + "
\n"; } if (json.hapfst == 1) { url = json.path + "/phist_view.php?db=" + json.db + "&batch_id=" + json.batch_id + "&type=" + json.type + "&tag_id=" + json.id; html += "
ΦST Stats
\n" + "
\n"; } return html; } function write_population_table(json, color_map) { var html = ""; html += "" + "\n" + "\n" + " \n" + "\n"; // // Determine the maximum number of columns in the table if less than 10. // var num_cols = 0; for (var pop_id in json.populations) num_cols = json.populations[pop_id].length > num_cols ? json.populations[pop_id].length : num_cols; num_cols = num_cols < 10 ? num_cols : 10; // // Iterate over each population. // for (var pop_id in json.populations) { html += "\n"; if (Object.keys(json.populations).length > 1) { html += "\n" + "\n" + "\n"; } var col_index = 0; var hap_index = 0; for (var i = 0; i < json.populations[pop_id].length; i++) { col_index++; var sample = json.populations[pop_id][i]; var uniq_id = sample.sample_id + "|" + sample.obshap[0].tag_id; html += " \n"; if (col_index % num_cols == 0) html += "\n" + "\n"; } while (col_index % num_cols != 0) { html += " \n"; col_index++; } } html += "\n"; return html; } function enqueue_sample(cat_id, id) { var cb = document.getElementById(id); var td = document.getElementById(id + "_td"); var sel = document.getElementById(cat_id + "_selected"); var checks = Number(sel.value); if (cb.checked == false) { cb.checked = true; td.style.backgroundColor = "#ffffa8"; checks++; } else { cb.checked = false; td.style.backgroundColor = "#ffffff"; checks--; } if (checks > 0) $("#" + cat_id + "_viewstat_stacks").css("display", ""); else $("#" + cat_id + "_viewstat_stacks").css("display", "none"); sel.value = checks; } function build_stack_list(id, url) { var samples = new Array(); var tags = new Array(); $('input[type=checkbox]').each(function () { if (this.name == "stack_view" && this.checked) { var parts = this.id.split("|"); samples.push(parts[0]); tags.push(parts[1]); } }); url += "&samples=" + samples.join(",") + "&tags=" + tags.join(","); ajax_locus_stack_view(id, url); } function print_population_name(pop_id, json) { var pop_str = json.popkey[pop_id].length > 0 ? json.popkey[pop_id] : "Population " + pop_id; var html = "
" + pop_str + "
\n" + "
\n" + "
\n" + " \n" + " \n" + " save|cancel\n" + " \n" + "
\n"; return html; } function generate_element_select(name, marker, selected_ele, change_js, blur_js) { var script_code = ""; if (change_js.length > 0 && blur_js.length > 0) { script_code = " onchange=\"" + change_js + "\" onblur=\"" + blur_js + "\""; } else if (change_js.length > 0) { script_code = " onchange=\"" + change_js + "\""; } else if (blur_js.length > 0) { script_code = " onblur=\"" + blur_js + "\""; } var ctl = " \n"; return ctl; } function highlight_snp(id) { var span_obj = document.getElementById(id); span_obj.className = "rank_1_hi"; } function unhighlight_snp(id) { var span_obj = document.getElementById(id); span_obj.className = "rank_1"; } function unhighlight_haplotypes(cat_id) { $("#alleles_table_" + cat_id + " tr").css("border", ""); $("#alleles_table_" + cat_id + " tr > td").css("background-color", ""); $("#alleles_table_" + cat_id + " tr td:first-child").css("color", ""); $("#alleles_table_" + cat_id + " tr td:last-child").css("color", ""); var table_obj = document.getElementById("locus_gtypes_" + cat_id); var spans = table_obj.getElementsByTagName("span"); for(var i = 0; i < spans.length; i++) { spans[i].className = ""; } $("div.close_x").css("display", "none"); } function highlight_haplotypes(cat_id, haplotype, tr_id) { unhighlight_haplotypes(cat_id) $("#" + tr_id).css("border", "4px solid #a93535"); $("#" + tr_id + " > td").css("background-color", "#aaa"); $("#" + tr_id + " td:first-child").css("color", "#fff"); $("#" + tr_id + " td:last-child").css("color", "#fff"); var table_obj = document.getElementById("locus_gtypes_" + cat_id); var spans = table_obj.getElementsByTagName("span"); for(var i = 0; i < spans.length; i++) { var parts = spans[i].id.split("_"); if (parts[0] == "haphi" && parts[1] == haplotype) { spans[i].className = "haphi"; } } $("#" + tr_id + "_close").css("display", ""); $("#" + tr_id + "_close").bind("click", {cat_id: cat_id}, function(event) { unhighlight_haplotypes(event.data.cat_id); event.stopPropagation(); }); } function build_sumstats_view(json, status, jqXHR) { var html = "
x
\n" + "
\n" + " View:\n" + " \n" + " Haplotypes\n" + " \n" + " Allele Depths\n" + " \n" + " LnLs\n"; if (json.type == "map") { html += " " + "Genotypes"; } html += "
"; html += print_population_name(pop_id, json); html += "
" + "
" + "" + sample['sample'].replace("_", " ") + "
\n"; var hap_strs = []; var dep_strs = []; for (var j = 0; j < sample.obshap.length; j++) { hap_index++; var c = colors[color_map[sample.obshap[j].allele]]; var url = json.path + "/stack_view.php" + "?db=" + json.db + "&batch_id=" + json.batch_id + "&samples=" + sample.sample_id + "&tags=" + sample.obshap[j].tag_id + "&tag_id=" + json.id; var a = ""; hap_strs.push("" + a + sample.obshap[j].allele + ""); dep_strs.push("" + a + sample.obshap[j].depth + ""); } var hap_str = hap_strs.join(" / "); var dep_str = dep_strs.join(" / "); var gen_str = ""; if (json.type == "map" && sample.genotype.length > 0) { var id = "gtype_" + json.batch_id + "_" + json.id + "_" + sample.sample_id; var url = json.path + "/correct_genotype.php?db=" + json.db + "&batch_id=" + json.batch_id + "&tag_id=" + json.id + "&sample_id=" + sample.sample_id; var jscript = "correct_genotype('" + id + "', '" + url + "')"; var blur_js = "cancel_correction('" + id + "')"; var sel = generate_element_select(id, sample.marker, sample.genotype.toLowerCase(), jscript, blur_js); var genotype = sample.corrected == 1 ? "" + sample.genotype + "" : sample.genotype; gen_str = "
" + "\n" + "
\n" + sel + "
"; } html += "
" + hap_str + "
" + "
" + dep_str + "
" + "
" + sample.lnl + "
" + gen_str + "
\n" + "\n" + "\n" + "\n" + "\n" + " \n" + " \n"; for (var i = 0; i < json.sumstats.length; i++) { html += "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n"; } html += "
SNP Summary Statistics
Pop\n" + " BP\n" + " Column\n" + " Allele 1\n" + " Allele 2\n" + " P\n" + " N\n" + " Obs Het\n" + " Obs Hom\n" + " Exp Het\n" + " Exp Hom\n" + " π\n" + " FIS\n" + "
" + (i + 1) + "." + json.sumstats[i].pop_id + "" + json.sumstats[i].bp + "" + json.sumstats[i].col + "" + json.sumstats[i].p_allele + "" + json.sumstats[i].q_allele + "" + json.sumstats[i].p + "" + json.sumstats[i].n + "" + json.sumstats[i].obshet + "" + json.sumstats[i].obshom + "" + json.sumstats[i].exphet + "" + json.sumstats[i].exphom + "" + json.sumstats[i].pi + "" + json.sumstats[i].fis + "
\n"; $("#" + json.id + "_sumstat_div").html(html); // // We need to position the sumstat overlay. Identify the right-most // border of the containing div. // parent_div = "#" + json.id + "_popview_div"; t = $(parent_div + " table.snps").position(); p = $(parent_div + " div.comp_view").position(); top_coord = t.top + 32; right_coord = p.left + $(parent_div + " div.comp_view").width() + 25; $("#" + json.id + "_sumstat_div").css({top: top_coord, left: right_coord}); // // Set a maximum height for the containing div. // h = $(parent_div).height() - 50; $("#" + json.id + "_sumstat_div").css("max-height", h); // // Make sure the Fst and Hapstat divs are closed. // unhighlight_snp_row(json.id); close_hapstat_view(json.id); close_phist_view(json.id); close_stack_view(json.id); // // Bind the escape key to close this popup. // $(document).keyup(function(event){ if(event.keyCode === 27) close_sumstat_view(json.id); }); // // Display the Sumstats div. // $("#" + json.id + "_sumstat_div").css("display", ""); $("#" + json.id + "_viewstat_sumstat").css({"color": "#a93535", "border-color": "#a93535", "background-color": "#ffffff"}); } function close_sumstat_view(id) { $("#" + id + "_sumstat_div").css("display", "none"); $("#" + id + "_viewstat_sumstat").css({"color": "", "border-color": "", "background-color": ""}); } function build_hapstats_view(json, status, jqXHR) { var html = "
x
\n" + "\n" + "\n" + "\n" + "\n" + "\n" + " \n" + " \n"; for (var i = 0; i < json.hapstats.length; i++) { html += "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n"; } html += "
Haplotype Summary Statistics
Pop\n" + " BP\n" + " N\n" + " Haplotype Cnt\n" + " Gene Diversity\n" + " Haplotype Diversity\n" + "
" + (i + 1) + "." + json.hapstats[i].pop_id + "" + json.hapstats[i].bp + "" + json.hapstats[i].n + "" + json.hapstats[i].hapcnt + "" + json.hapstats[i].genediv + "" + json.hapstats[i].hapdiv + "
\n"; $("#" + json.id + "_hapstat_div").html(html); // // We need to position the sumstat overlay. Identify the right-most // border of the containing div. // parent_div = "#" + json.id + "_popview_div"; t = $(parent_div + " table.snps").position(); p = $(parent_div + " div.comp_view").position(); top_coord = t.top + 32; right_coord = p.left + $(parent_div + " div.comp_view").width() + 25; $("#" + json.id + "_hapstat_div").css({top: top_coord, left: right_coord}); // // Set a maximum height for the containing div. // h = $(parent_div).height() - 50; $("#" + json.id + "_hapstat_div").css("max-height", h); // // Make sure the Fst, Phist, and Sumstat divs are closed. // unhighlight_snp_row(json.id); close_sumstat_view(json.id); close_phist_view(json.id); close_stack_view(json.id); // // Bind the escape key to close this popup. // $(document).keyup(function(event){ if(event.keyCode === 27) close_hapstat_view(json.id); }); // // Display the Hapstats div. // $("#" + json.id + "_hapstat_div").css("display", ""); $("#" + json.id + "_viewstat_hapstat").css({"color": "#a93535", "border-color": "#a93535", "background-color": "#ffffff"}); } function close_hapstat_view(id) { $("#" + id + "_hapstat_div").css("display", "none"); $("#" + id + "_viewstat_hapstat").css({"color": "", "border-color": "", "background-color": ""}); } function build_fst_view(json, status, jqXHR) { var html = "
x
\n" + "\n" + "\n" + " \n" + " \n" + "\n" + "
FSTπoverall
\n"; // // Create a population id => name map. // var pop_names = []; for (var pop_id in json.popkey) pop_names[pop_id] = json.popkey[pop_id]; var keys = Object.keys(pop_names); var pop_cnt = keys.length; var cols = Object.keys(json.columns); for (var col in json.columns) { var matrix = []; for (var i = 0; i < keys.length; i++) matrix[keys[i]] = []; for (var i = 0; i < json.columns[col].length; i++) { var s = json.columns[col][i]; matrix[s.pid_1][s.pid_2] = s; matrix[s.pid_2][s.pid_1] = s; } html += "\n" + "\n" + " \n"; for (var i = 0; i < pop_cnt; i++) html += " \n"; html += "\n"; for (var i = 0; i < pop_cnt; i++) { html += "\n" + " \n"; var str = ""; var cssclass = ""; for (var j = 0; j < pop_cnt; j++) { if (i < j) { cssclass = "class=\"pi\""; if (keys[j] in matrix[keys[i]]) str = matrix[keys[i]][keys[j]]['pi_o']; } else { cssclass = "class=\"fst\""; if (keys[j] in matrix[keys[i]]) str = "" + matrix[keys[i]][keys[j]]['fst'] + ""; } if (i == j) html += " \n"; else if (!(keys[j] in matrix[keys[i]])) html += " \n"; else { var id = json.id + "_" + col + "_" + i + "_" + j; var arg = "'" + json.id + "', '" + col + "', '" + i + "', '" + j + "'"; html += " \n"; } } html += "\n"; } html += "
 " + pop_names[keys[i]] + "
" + pop_names[keys[i]] + "  " + str + "
\n"; } html += "\n"; $("#" + json.id + "_fst_div").html(html); // // We need to make the first Fst table visible. // highlight_snp_row(json.id, cols[0]); // // Hook up event handlers to handle clicking on other SNPs to change the Fst table in view. // for (var i = 0; i < cols.length; i++) { var tr_id = "#" + json.id + "_snps_tr_" + cols[i]; $(tr_id).bind("click", {cat_id: json.id, col: cols[i]}, function(event) { change_highlighted_snp_row(event.data.cat_id, event.data.col); }); } // // We need to position the fst overlay. Identify the right-most // border of the containing div. // parent_div = "#" + json.id + "_popview_div"; t = $(parent_div + " table.snps").position(); p = $(parent_div + " div.comp_view").position(); top_coord = t.top + 32; right_coord = p.left + $(parent_div + " div.comp_view").width() + 25; $("#" + json.id + "_fst_div").css({top: top_coord, left: right_coord}); // // Set a maximum height for the containing div. // h = $(parent_div).height() - 32; $("#" + json.id + "_fst_div").css("max-height", h); // // Make sure the Sumstats, Hapstats, and Phist divs are closed. // close_sumstat_view(json.id); close_hapstat_view(json.id); close_phist_view(json.id); close_stack_view(json.id); // // Bind the escape key to close this popup. // $(document).keyup(function(event){ if(event.keyCode === 27) unhighlight_snp_row(json.id); }); // // Display the Fst div. // $("#" + json.id + "_fst_div").css("display", ""); $("#" + json.id + "_viewstat_fststat").css({"color": "#a93535", "border-color": "#a93535", "background-color": "#ffffff"}); } function close_fst_view(id) { $("#" + id + "_fst_div").css("display", "none"); $("#" + id + "_viewstat_fststat").css({"color": "", "border-color": "", "background-color": ""}); // // Remove handlers for clicking on SNP rows. // $("#snps_table_" + id + " tr").unbind("click"); } function build_phist_view(json, status, jqXHR) { var html = "
x
\n" + "\n" + "\n" + " \n" + " \n" + "\n" + "
ΦSTFST
\n"; // // Create a population id => name map. // var pop_names = []; for (var pop_id in json.popkey) pop_names[pop_id] = json.popkey[pop_id]; var keys = Object.keys(pop_names); var pop_cnt = keys.length; var matrix = []; for (var i = 0; i < keys.length; i++) matrix[keys[i]] = []; for (var i = 0; i < json.phist.length; i++) { var s = json.phist[i]; matrix[s.pid_1][s.pid_2] = s; matrix[s.pid_2][s.pid_1] = s; } html += "\n" + "\n" + " \n"; for (var i = 0; i < pop_cnt; i++) html += " \n"; html += "\n"; for (var i = 0; i < pop_cnt; i++) { html += "\n" + " \n"; var str = ""; var cssclass = ""; for (var j = 0; j < pop_cnt; j++) { if (i < j) { cssclass = "class=\"pi\""; if (keys[j] in matrix[keys[i]]) str = matrix[keys[i]][keys[j]]['fstp']; } else { cssclass = "class=\"fst\""; if (keys[j] in matrix[keys[i]]) str = matrix[keys[i]][keys[j]]['phist']; } if (i == j) html += " \n"; else if (!(keys[j] in matrix[keys[i]])) html += " \n"; else { var id = json.id + "_" + i + "_" + j; var arg = "'" + json.id + "', '', '" + i + "', '" + j + "'"; html += " \n"; } } html += "\n"; } html += "
 " + pop_names[keys[i]] + "
" + pop_names[keys[i]] + "  " + str + "
\n"; $("#" + json.id + "_phist_div").html(html); // // We need to position the fst overlay. Identify the right-most // border of the containing div. // parent_div = "#" + json.id + "_popview_div"; t = $(parent_div + " table.snps").position(); p = $(parent_div + " div.comp_view").position(); top_coord = t.top + 32; right_coord = p.left + $(parent_div + " div.comp_view").width() + 25; $("#" + json.id + "_phist_div").css({top: top_coord, left: right_coord}); // // Set a maximum height for the containing div. // h = $(parent_div).height() - 32; $("#" + json.id + "_phist_div").css("max-height", h); // // Make sure the Sumstats, Hapstats, and Fst divs are closed. // close_sumstat_view(json.id); close_hapstat_view(json.id); unhighlight_snp_row(json.id); close_stack_view(json.id); // // Bind the escape key to close this popup. // $(document).keyup(function(event){ if(event.keyCode === 27) close_phist_view(json.id); }); // // Display the Fst div. // $("#" + json.id + "_phist_div").css("display", ""); $("#" + json.id + "_viewstat_phistat").css({"color": "#a93535", "border-color": "#a93535", "background-color": "#ffffff"}); } function close_phist_view(id) { $("#" + id + "_phist_div").css("display", "none"); $("#" + id + "_viewstat_phistat").css({"color": "", "border-color": "", "background-color": ""}); } function highlight_fst_cells(id, snp, row, col) { var cell_1, cell_2; if (snp.length == 0) { cell_1 = document.getElementById(id + "_" + row + "_" + col); cell_2 = document.getElementById(id + "_" + col + "_" + row); } else { cell_1 = document.getElementById(id + "_" + snp + "_" + row + "_" + col); cell_2 = document.getElementById(id + "_" + snp + "_" + col + "_" + row); } if (row < col) { cell_1.style.backgroundColor = "#f1592a"; cell_1.style.color = "#ffffff"; cell_2.style.backgroundColor = "#24aae2"; cell_2.style.color = "#ffffff"; } else { cell_1.style.backgroundColor = "#24aae2"; cell_1.style.color = "#ffffff"; cell_2.style.backgroundColor = "#f1592a"; cell_2.style.color = "#ffffff"; } } function unhighlight_fst_cells(id, snp, row, col) { var cell_1, cell_2; if (snp.length == 0) { cell_1 = document.getElementById(id + "_" + row + "_" + col); cell_2 = document.getElementById(id + "_" + col + "_" + row); } else { cell_1 = document.getElementById(id + "_" + snp + "_" + row + "_" + col); cell_2 = document.getElementById(id + "_" + snp + "_" + col + "_" + row); } if (row < col) { cell_1.style.backgroundColor = "#fdc4b8"; cell_1.style.color = "#000000"; cell_2.style.backgroundColor = "#aee2f3"; cell_2.style.color = "#000000"; } else { cell_1.style.backgroundColor = "#aee2f3"; cell_1.style.color = "#000000"; cell_2.style.backgroundColor = "#fdc4b8"; cell_2.style.color = "#000000"; } } function unhighlight_snp_row(cat_id) { $("#snps_table_" + cat_id + " tr").css("border", ""); $("#snps_table_" + cat_id + " tr > td").css("background-color", ""); $("#snps_table_" + cat_id + " tr td:first-child").css("color", ""); $("#snps_table_" + cat_id + " tr td:last-child").css("color", ""); // // Make the Fst tables invisible. // close_fst_view(cat_id); $("div.close_x").css("display", "none"); } function highlight_snp_row(cat_id, col) { unhighlight_snp_row(cat_id) var tr_id = cat_id + "_snps_tr_" + col; $("#" + tr_id).css("border", "4px solid #a93535"); $("#" + tr_id + " > td").css("background-color", "#aaa"); $("#" + tr_id + " td:first-child").css("color", "#fff"); $("#" + tr_id + " td:last-child").css("color", "#fff"); // // Make the Fst table visible. // $("#" + cat_id + "_" + col + "_fst").css("display", ""); $("#" + tr_id + "_close").css("display", ""); $("#" + tr_id + "_close").bind("click", {cat_id: cat_id}, function(event) { unhighlight_snp_row(event.data.cat_id); event.stopPropagation(); }); } function change_highlighted_snp_row(cat_id, col) { $("#snps_table_" + cat_id + " tr").css("border", ""); $("#snps_table_" + cat_id + " tr > td").css("background-color", ""); $("#snps_table_" + cat_id + " tr td:first-child").css("color", ""); $("#snps_table_" + cat_id + " tr td:last-child").css("color", ""); $("div.close_x").css("display", "none"); var tr_id = cat_id + "_snps_tr_" + col; $("#" + tr_id).css("border", "4px solid #a93535"); $("#" + tr_id + " > td").css("background-color", "#aaa"); $("#" + tr_id + " td:first-child").css("color", "#fff"); $("#" + tr_id + " td:last-child").css("color", "#fff"); // // Hide the Fst tables; // $("#" + cat_id + "_fst_div table").css("display", "none"); // // Make the Fst table visible. // $("table.fst_key").css("display", ""); $("#" + cat_id + "_" + col + "_fst").css("display", ""); $("#" + tr_id + "_close").css("display", ""); $("#" + tr_id + "_close").bind("click", {cat_id: cat_id}, function(event) { unhighlight_snp_row(event.data.cat_id); event.stopPropagation(); }); } function build_stack_view(json, status, jqXHR) { var html = "
x
\n"; for (var i = 0; i < json.stacks.length; i++) { html += "\n" + "\n" + " \n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "\n"; // // Print out the primary stack components. // seq_cnt = 1; for (var j = 0; j < json.stacks[i].primary.length; j++) { var bg = (j + 1) % 2 == 1 ? "style=\"background-color: #dddddd;\"" : ""; for (var k = 0; k < json.stacks[i].primary[j].ids.length; k++) { html += "\n" + " \n" + " \n" + " \n" + " \n" + "\n"; seq_cnt++; } } // // Print out the secondary stack components. // for (var j = 0; j < json.stacks[i].secondary.length; j++) { html += "\n" + " \n" + " \n" + " \n" + " \n" + "\n"; seq_cnt++; } html += "
" + json.stacks[i].sample_name + " [#" + json.stacks[i].sample_id + "]; Stack " + json.stacks[i].tag_id + "
   " + json.stacks[i].scale + "
 consensus" + json.stacks[i].consensus + "
 model" + json.stacks[i].model + "
" + seq_cnt + ".primary" + json.stacks[i].primary[j].ids[k] + "" + json.stacks[i].primary[j].seq + "
" + seq_cnt + ".secondary" + json.stacks[i].secondary[j].id + "" + json.stacks[i].secondary[j].seq + "
\n"; } $("#" + json.id + "_stacks_div").html(html); // // Set a maximum height/width for the containing div. // var parent_div = "#" + json.id + "_popview_div"; var p = $(parent_div).position(); var h = $(parent_div).parent().height(); var w = $(window).height(); h = h > w ? w : h; var div_h = Math.round(h * 0.96); $("#" + json.id + "_stacks_div").css("max-height", div_h); $("html, body").css("cursor", "auto"); // // Display the Stacks div. // $("#" + json.id + "_stacks_div").css("display", ""); // // Make sure the Fst, Phist, and Sumstat divs are closed. // unhighlight_snp_row(json.id); close_sumstat_view(json.id); close_hapstat_view(json.id); close_phist_view(json.id); // // Bind the escape key to close this popup. // $(document).keyup(function(event){ if(event.keyCode === 27) close_stack_view(json.id); }); } function close_stack_view(id) { $("#" + id + "_stacks_div").css("display", "none"); } stacks-1.35/php/samples.php000644 000765 000024 00000007561 12335173442 016463 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $batch_id = isset($_GET['id']) ? $_GET['id'] : 0; $database = isset($_GET['db']) ? $_GET['db'] : ""; // Connect to the database $db = db_connect($database); // // Prepare some SQL queries // $query = "SELECT id, date, description FROM batches WHERE id=?"; $db['batch_sth'] = $db['dbh']->prepare($query); check_db_error($db['batch_sth'], __FILE__, __LINE__); $query = "SELECT id, sample_id, type, file FROM samples " . "WHERE batch_id=? ORDER BY id"; $db['samp_sth'] = $db['dbh']->prepare($query); check_db_error($db['samp_sth'], __FILE__, __LINE__); $query = "SELECT COUNT(tag_id) AS count FROM tag_index " . "WHERE batch_id=? AND sample_id=?"; $db['count_sth'] = $db['dbh']->prepare($query); check_db_error($db['count_sth'], __FILE__, __LINE__); $query = "SELECT snps FROM tag_index " . "WHERE batch_id=? AND sample_id=? AND snps>0"; $db['snp_sth'] = $db['dbh']->prepare($query); check_db_error($db['snp_sth'], __FILE__, __LINE__); // // Pull information about this batch // $result = $db['batch_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $batch = array(); $batch['id'] = $row['id']; $batch['desc'] = $row['description']; $batch['date'] = $row['date']; $page_title = "RAD-Tag Samples"; write_header($page_title); echo <<< EOQ

RAD-Tag Samples for batch #$batch[id] [$batch[date]; $batch[desc]]

EOQ; $result = $db['samp_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $snps = 0; $poly = 0; // Query the database to determine how many tags belong to this sample. $count_res = $db['count_sth']->execute(array($batch_id, $row['id'])); check_db_error($count_res, __FILE__, __LINE__); $count_row = $count_res->fetchRow(); $count = $count_row['count']; // Query the database to find how many SNPs were found in this sample. $count_res = $db['snp_sth']->execute(array($batch_id, $row['id'])); check_db_error($count_res, __FILE__, __LINE__); while ($count_row = $count_res->fetchRow()) { $snps += $count_row['snps']; $poly += $count_row['snps'] > 0 ? 1 : 0; } print "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . "\n"; } echo <<< EOQ
Id Type Unique Stacks Polymorphic Loci SNPs Found Source
$row[id]" . ucfirst($row['type']) . "" . $count . "" . $poly . "" . $snps . "" . $row['file'] . "
EOQ; write_footer(); ?> stacks-1.35/php/sequence_blast.php000644 000765 000024 00000011274 12335173442 020010 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $page = isset($_GET['p']) ? $_GET['p'] : 1; $per_page = isset($_GET['pp']) ? $_GET['pp'] : 10; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; $display['p'] = $page; $display['pp'] = $per_page; // // Prepare some SQL queries // $query = "SELECT id, catalog_id, seq_id, type FROM sequence " . "WHERE batch_id=? AND catalog_id=?"; $db['seq_sth'] = $db['dbh']->prepare($query); check_db_error($db['seq_sth'], __FILE__, __LINE__); $query = "SELECT id, catalog_id, seq_id, algorithm, query_id, query_len, hit_id, hit_len, score, e_value, percent_ident, hsp_rank, " . "aln_len, query_aln_start, query_aln_end, hit_aln_start, hit_aln_end " . "FROM sequence_blast " . "WHERE batch_id=? AND catalog_id=?"; $db['blast_sth'] = $db['dbh']->prepare($query); check_db_error($db['blast_sth'], __FILE__, __LINE__); $page_title = "Catalog RAD-Tag Sequence/BLAST Hits Viewer"; write_compact_header($page_title); $result = $db['seq_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $seqs = array(); // Add the marker itself to the sequence array, it won't // be directly listed in the sequence table. $a = array('id' => -1, 'catalog_id' => $tag_id, 'seq_id' => "", 'type' => "se_radtag"); array_push($seqs, $a); while ($row = $result->fetchRow()) { $a = array('id' => $row['id'], 'catalog_id' => $row['catalog_id'], 'seq_id' => $row['seq_id'], 'type' => $row['type']); array_push($seqs, $a); } $result = $db['blast_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $hits = array(); while ($row = $result->fetchRow()) { $a = array('sql_id' => $row['id'], 'query_id' => $row['query_id'], 'hit_id' => $row['hit_id'], 'query_len' => $row['query_len'], 'hit_len' => $row['hit_len'], 'score' => $row['score'], 'e_value' => $row['e_value'], 'pctid' => $row['percent_ident'], 'hsp_rank' => $row['hsp_rank'], 'aln_len' => $row['aln_len']); if (!isset($hits[$row['query_id']])) $hits[$row['query_id']] = array(); array_push($hits[$row['query_id']], $a); } foreach ($seqs as $seq) { if (strlen($seq['seq_id']) > 0) $query_id = $seq['catalog_id'] . "|" . $seq['seq_id']; else $query_id = $seq['catalog_id']; $hsps = $hits[$query_id]; $i = 0; if ($seq['type'] == "se_radtag" && !isset($hsps)) continue; else if ($seq['type'] == "se_radtag") print "

$seq[seq_id] [$seq[type]]

\n"; else echo <<< EOQ

$seq[seq_id] [$seq[type]]

EOQ; if (!isset($hsps)) { print "\n"; continue; } echo <<< EOQ EOQ; foreach ($hsps as $hsp) { $i++; echo <<< EOQ EOQ; } print "
  Hit ID Score E-Value Percent Ident Aln Len Query Len Hit Len HSP Rank
$i $hsp[hit_id] $hsp[score] $hsp[e_value] $hsp[pctid] $hsp[aln_len] $hsp[query_len] $hsp[hit_len] $hsp[hsp_rank]
\n"; } write_compact_footer(); ?> stacks-1.35/php/stack_view.php000644 000765 000024 00000013434 12571641525 017156 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $tag_str = isset($_GET['tags']) ? $_GET['tags'] : ""; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $cat_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : ""; $sample_str = isset($_GET['samples']) ? $_GET['samples'] : ""; $database = isset($_GET['db']) ? $_GET['db'] : ""; $tags = explode(",", $tag_str); $samples = explode(",", $sample_str); // Connect to the database $db = db_connect($database); // // Prepare some SQL queries // $query = "SELECT tag_id, sub_id, relationship, seq_id, seq, deleveraged, blacklisted, removed, file, pop_id " . "FROM unique_tags " . "JOIN samples ON (unique_tags.sample_id=samples.id) " . "JOIN batches ON (samples.batch_id=batches.id) " . "WHERE batch_id=? AND unique_tags.sample_id=? AND tag_id=?"; $db['seq_sth'] = $db['dbh']->prepare($query); check_db_error($db['seq_sth'], __FILE__, __LINE__); $query = "SELECT col, rank_1, rank_2 FROM snps " . "JOIN samples ON (snps.sample_id=samples.id) " . "JOIN batches ON (samples.batch_id=batches.id) " . "WHERE batch_id=? AND snps.sample_id=? AND tag_id=? AND snps.type='E' ORDER BY col"; $db['snp_sth'] = $db['dbh']->prepare($query); check_db_error($db['snp_sth'], __FILE__, __LINE__); $query = "SELECT col FROM catalog_snps " . "WHERE batch_id=? AND tag_id=?"; $db['cat_snp_sth'] = $db['dbh']->prepare($query); check_db_error($db['cat_snp_sth'], __FILE__, __LINE__); $json_str = "{" . "\"id\": \"$cat_id\"," . "\"stacks\": ["; for ($i = 0; $i < count($tags); $i++) { $sample_id = $samples[$i]; $tag_id = $tags[$i]; // // Fetch and store the SNPs. // $snps = array(); $result = $db['snp_sth']->execute(array($batch_id, $sample_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { $snps[$row['col']] = array('col' => $row['col'], 'rank_1' => $row['rank_1'], 'rank_2' => $row['rank_2']); } // // Fetch and store the catalog SNPs. // $cat_snps = array(); $result = $db['cat_snp_sth']->execute(array($batch_id, $cat_id)); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { if (!isset($cat_snps[$row['col']])) $cat_snps[$row['col']] = 0; $cat_snps[$row['col']]++; } // // Fetch the sequences. // $deleveraged = 0; $lumberjackstack = 0; $blacklisted = 0; $consensus = ""; $model = ""; $file = ""; $stacks = array(); $secondary = array(); $result = $db['seq_sth']->execute(array($batch_id, $sample_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { if ($row['relationship'] == "consensus") { $deleveraged = $row['deleveraged']; $lumberjackstack = $row['removed']; $blacklisted = $row['blacklisted']; $consensus = $row['seq']; $file = $row['file']; } else if ($row['relationship'] == "model") { $model = $row['seq']; } else if ($row['relationship'] == "secondary") { array_push($secondary, array('s' => $row['seq'], 'id' => $row['seq_id'])); } else { if (!isset($stacks[$row['sub_id']])) $stacks[$row['sub_id']] = array(); array_push($stacks[$row['sub_id']], array('s' => $row['seq'], 'id' => $row['seq_id'])); } } $con_len = strlen($consensus); $c = print_snps($tags[$i], $consensus, $consensus, $snps, false); $c = addslashes($c); $scale = print_scale($con_len); $scale = addslashes($scale); $json_str .= "{" . "\"sample_id\": \"$sample_id\"," . "\"sample_name\": \"$file\"," . "\"tag_id\": \"$tag_id\"," . "\"consensus\": \"$c\"," . "\"model\": \"$model\"," . "\"scale\": \"$scale\"," . "\"primary\": ["; foreach ($stacks as $sub_id => $stack) { $s = print_snps_errs($consensus, $stack[0]['s'], $snps, $cat_snps); $s = addslashes($s); $json_str .= "{" . "\"seq\": \"$s\"," . "\"ids\": ["; foreach ($stack as $seq) { $json_str .= "\"$seq[id]\","; } if (count($stack) > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]" . "},"; } if (count($stacks) > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]," . "\"secondary\": ["; foreach ($secondary as $seq) { $s = print_snps_errs($consensus, $seq['s'], $snps, $cat_snps); $s = addslashes($s); $json_str .= "{" . "\"id\": \"$seq[id]\"," . "\"seq\": \"$s\"" . "},"; } if (count($secondary) > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]},"; } $json_str = substr($json_str, 0, -1); $json_str .= "]}"; echo $json_str; ?> stacks-1.35/php/stacks.css000644 000765 000024 00000032072 12540304631 016275 0ustar00catchenstaff000000 000000 /* Copyright 2010-2015, Julian Catchen This file is part of Stacks. Stacks is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stacks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Stacks. If not, see . */ body { font-family: bitstream vera sans, arial, sans; } a:link { color: #29356c; } a:visited { color: #29356c; } a:hover { color: #a93535; } img { border: 0; } strong { color: #333333; } acronym { border-bottom: 1px dotted; cursor: help; } .s { font-size: small; } div.main { width: 80%; margin-left: auto; margin-right: auto; } #header { background-color: #455372; background-image: url(./images/stacks_bg.png); background-repeat: repeat-x; min-height: 75px; height: 75px; -moz-border-radius: 10px; -webkit-border-radius: 10px; border-radius: 10px; } #header h1 { position: relative; top: 0px; left: 0.75em; font-size: 16pt; } #header h1 a, #header p a { text-decoration: none; color: #ffffff; } #header p a:hover { color: #a93535; } #header p { float: right; position: relative; top: -6em; padding-right: 0.75em; font-size: 10pt; } div.footer { width: 100%; margin-left: auto; margin-right: auto; font-size: small; clear: both; padding-top: 20px; } h3.info_head, h4.info_head, h5.info_head { margin-left: auto; margin-right: auto; width: 80%; padding-bottom: 0px; margin-bottom: 5px; } h3.info_head a:hover, h4.info_head a:hover { cursor: pointer; } table.loc_filter { width: 100%; vertical-align: top; margin-left: auto; margin-right: auto; border: thin solid black; background-color: #eeeeee; padding-right: 10px; height: 160px; } div.filter { width: 80%; margin-left: auto; margin-right: auto; } table.filter { width: 100%; vertical-align: top; margin-left: auto; margin-right: auto; border: thin solid black; background-color: #eeeeee; padding-left: 10px; height: 160px; } table.filter td { border: 0px; padding-top: 6px; padding-bottom: 6px; } table.filter td.active_filter, table.loc_filter td.active_filter { background-color: #a93535; } table.filter td.active_filter a:hover, table.loc_filter td.active_filter a:hover { color: #ffffff; } table.db { margin-left: auto; margin-right: auto; border: 1px solid black; line-height: 1.25; width: 90%; text-align: center; border-spacing: 0px 1px; border-collapse: collapse; } table.db th { border: 1px solid black; line-height: 1; padding: 7px; background-color: #dddddd; } table.db td { border: 1px solid black; padding: 5px; } /* Internal table, should be invisible in terms of borders and the like */ table.int { border: none; border-collapse: collapse; width: 100%; margin-left: auto; margin-right: auto; text-align: center; padding: 0px; margin: 0px; } table.int td { border: none; } td.seq { text-align: center; } td.seq div.seq { margin-left: auto; margin-right: auto; width: 100%; text-align: left; font-family: monospace; } table.radtag { width: 100%; border-collapse: collapse; } table.radtag th { border-bottom: thin solid black; background-color: #cccccc; } table.radtag td { padding: 0; margin: 0; } table.radtag td.num { text-align: center; font-size: smaller; width: 5%; } table.radtag td.con { padding-left: 3em; font-size: smaller; } table.radtag td.primary { padding-left: 3em; font-size: smaller; color: #32773f; width: 15%; } table.radtag td.secondary { padding-left: 3em; font-size: smaller; color: #870214; width: 15%; } table.radtag td.tertiary { padding-left: 3em; font-size: smaller; color: #3d5aff; width: 15%; } table.radtag td.id { font-family: monospace; text-align: right; padding-right: 4px; width: 20%; } table.radtag td.tag { font-family: monospace; text-align: left; border-left: thin solid black; padding-left: 4px; width: 60%; } div.seq_frame_head { margin-left: auto; margin-right: auto; width: 95%; overflow-y: scroll; } div.seq_frame { margin-left: auto; margin-right: auto; width: 95%; overflow-y: scroll; height: 35em; } span.rank_1 { background-color: #ccddd4; color: #a93535; font-weight: bold; } .rank_1_hi { display: inline-block; border: 2px solid #a93535; background-color: #eeeeee; color: #29356c; font-size: 18pt; font-weight: bold; padding: 5px; line-height: 40%; } span.rank_2 { background-color: #ccddd4; color: #29356c; font-weight: bold; } span.err { background-color: #fcdba7; color: #29356c; } span.cat_snp { background-color: #d8f3ff; color: #29356c; } div.footer { clear: both; } tr.catrow { height: 5em; } td.catlink { font-size: small; position: relative; } td.catlink,div.catlink a:hover { cursor: pointer; } span.light_scale { color: #888888; background-color: #e29c71; } span.dark_scale { color: #888888; background-color: #bbd5ff; } table.genotypes td.gtype_toggle { font-size: smaller; text-align: right; border-top: none; border-left: none; border-right: none; } table.genotypes { padding: 1em; margin-top: 0.5em; margin-bottom: 1em; margin-left: auto; margin-right: auto; border-collapse: collapse; } table.genotypes td { padding: 3px; border: thin solid #aaaaaa; text-align: center; max-width: 10%; word-wrap: break-word; vertical-align: top; } table.genotypes td a:hover { cursor: pointer; } table.genotypes div.title { white-space: pre; font-size: smaller; color: #aaaaaa; } table.genotypes div.title:hover { cursor: pointer; color: #a93535; } table.genotypes td div.title input { display:none; } table.genotypes td.pop_id { height: 1.5em; text-align: left; font-size: 12pt; padding-left: 10px; background-color: #eeeeee; } div.pop_annotation { display: inline; } div.pop_annotation:hover { cursor: pointer; } span.corrected { font-weight: bold; color: #870214; } div.ann_parent { position: absolute; float: left; width: 100%; bottom: 0.75em; } a.annotation { font-size: small; } td.export_icon a:hover { cursor: pointer; } div#export_popup { position: absolute; right: 7%; /*top: 2%;*/ border: 2px solid black; background-color: #6f9b6d; width: 350px; height: 250px; font-size: smaller; text-align: center; padding: 1em; -moz-box-shadow: 4px 4px 7px #888; -webkit-box-shadow: 4px 4px 7px #888; box-shadow: 4px 4px 7px #888; } div#export_popup h3 { color: #ffc600; text-decoration: underline; margin-top: 0px; padding-top: 0px; } .r { font-weight: bold; color: #a93535; } #stacks_filter { margin-left: auto; margin-right: auto; width: 85%; padding-bottom: 20px; } div.gloc { margin-top: 0.5em; margin-left: 0.5em; float: left; font-size: smaller; font-family: bitstream vera sans, arial, sans; clear: both; } div.popview { position: relative; } div.comp_view { float: left; margin-left: 1em; } div.matches { overflow: scroll; } div.stack_view { overflow: scroll; position: absolute; width: 98%; background-color: #333; border: 2px solid #ccc; opacity: 0.95; padding: 10px; border-radius: 5px; overflow: scroll; box-shadow: 5px 5px 7px #888888; } div.haplotype_def { margin-left: auto; margin-right: auto; max-width: 150px; word-wrap: break-word; } div.haplotype { max-width: 85px; word-wrap: break-word; } div.lnl { font-size: 10pt; font-family: monospace; } table.snps, table.alleles { border-collapse: collapse; min-width: 225px; } table.snps { margin-top: 2em; } table.alleles { margin-top: 1.5em; margin-bottom: 1.5em; } table.snps th, table.alleles th { height: 1.5em; text-align: left; font-size: 12pt; font-weight: normal; padding-left: 10px; background-color: #eeeeee; border: thin solid #aaaaaa; } table.snps tr.subhead th, table.alleles tr.subhead th { background-color: #ffffff; font-size: x-small; text-decoration: underline; text-align: center; padding-bottom: 0px; border-left: 0; border-right: 0; border-bottom: 0; } table.snps tr:nth-child(3) td, table.alleles tr:nth-child(3) td { border-top: 0; } table.snps tr:nth-child(2n+4) td, table.alleles tr:nth-child(2n+4) td { background-color: #efefef; } table.snps td, table.alleles td { padding: 3px; border: thin solid #aaaaaa; text-align: center; } table.snps tr:first-child th, table.alleles tr:first-child th { border-top: 0; } table.snps tr:last-child td, table.alleles tr:last-child td { border-bottom: 0; } table.snps tr th:first-child, table.alleles tr th:first-child { border-left: 0; } table.snps tr td:first-child, table.alleles tr td:first-child { border-left: 0; font-size: x-small; text-align: left; color: #aaaaaa; vertical-align: bottom; } table.alleles tr td:last-child, table.alleles tr th:last-child { text-align: left; border-right: 0; } table.snps tr td:last-child, table.snps tr th:last-child { border-right: 0; } table.alleles tr:hover td, table.snps tr:hover td { background-color: #aaa; color: #fff; } table.alleles tr:hover td { cursor: pointer; } div.close_x { float: right; font-weight: bold; } div.close_x:hover { cursor: pointer; } .haphi { display: inline-block; border: 3px solid #ef6c00; padding: 7px; line-height: 35%; border-radius: 5px; } div.sumstats { clear: both; padding-top: 2em; } table.sumstats, table.fst { min-width: 250px; border-collapse: collapse; font-size: 10pt; margin-bottom: 2em; } table.sumstats td, table.fst td { height: 2em; border: thin solid #aaa; vertical-align: middle; } table.sumstats td.pop_id, table.fst td.pop_id { width: 5%; background-color: #eeeeee; border-left: 0px; } table.sumstats td.key, table.fst td.key { border-top: 0px; background-color: #eeeeee; padding: 2px; word-wrap: break-word; } table.fst td.fst span:hover { cursor: help; } table.fst td.diagonal { background-color: #888; } table.fst td.fst, table.fst_key td.fst { background-color: #aee2f3; } table.fst td.pi, table.fst_key td.pi { background-color: #fdc4b8; } table.fst_key { font-size: 10pt; width: 100%; margin-top: 1em; margin-bottom: 1em; } table.fst_key td { font-weight: bold; width: 45%; } span.snp { font-weight: normal; } span.snp:hover { color: #a93535; cursor: pointer; } span.snp_sel { font-weight: bold; text-decoration: underline; } div.viewstat { width: 80%; margin-left: auto; margin-right: auto; margin-top: 0.75em; border: 2px solid #29356c; border-radius: 5px; background-color: #eeeeee; color: #29356c; font-size: 11pt; padding: 3px; font-variant: small-caps; } div.viewstat:hover { color: #a93535; border: 2px solid #a93535; background-color: #ffffff; cursor: pointer; } div.top_viewstat { margin-top: 1.5em; margin-bottom: -1em; } div.popup_stat { position: absolute; background-color: #dddddd; border: 2px solid #aaaaaa; opacity: 0.95; padding: 10px; border-radius: 5px; overflow: scroll; box-shadow: 5px 5px 7px #888888; } div.popup_stat_close { float: right; vertical-align: top; color: #a93535; } div.popup_stat_close:hover { color: #29356c; cursor: pointer; } div.stack_view div.popup_stat_close:hover { color: #ffffff; } table.stack { background-color: #ffffff; border-collapse: collapse; margin-left: auto; margin-right: auto; margin-bottom: 2em; } table.stack tr:last-child td { padding-bottom: 5px; } table.stack th { height: 1.5em; text-align: left; font-size: 12pt; font-weight: normal; padding-left: 10px; background-color: #eeeeee; border: thin solid #aaaaaa; } table.stack td { border: 0; padding: 0px; font-size: smaller; } table.stack td.num { padding-left: 10px; text-align: right; font-size: x-small; padding-right: 5px; border-left: thin solid #aaaaaa; } table.stack td.primary { color: #32773f; } table.stack td.secondary { color: #870214; } table.stack td.id { font-family: monospace; text-align: right; padding-right: 4px; font-size: 8pt; } table.stack td.tag { font-family: monospace; text-align: left; border-left: thin solid black; padding-left: 4px; font-size: 10pt; padding-right: 10px; border-right: thin solid #aaaaaa; } stacks-1.35/php/stacks.js000644 000765 000024 00000010153 12533677757 016145 0ustar00catchenstaff000000 000000 // // Copyright 2010, Julian Catchen // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // function toggle_div(id, root_path, page_state_form) { var tr_obj = document.getElementById(id); var caret = document.getElementById(id + "_img"); if (tr_obj.style.display == "none") { tr_obj.style.display = ""; caret.src = root_path + "/caret-d.png"; update_page_state(page_state_form, id, 1); } else { tr_obj.style.display = "none"; caret.src = root_path + "/caret-u.png"; update_page_state(page_state_form, id, 0); } } function toggle_aln_tr(id, root_path, url) { var tr_obj = document.getElementById(id); var caret = document.getElementById(id + "_img"); if (tr_obj.style.display == "none") { tr_obj.style.display = ""; caret.src = root_path + "/caret-d.png"; // Check if the alignment has been loaded, if not, load // it in the iframe. var iframe_obj = document.getElementById(id + '_iframe'); iframe_obj.src = url; } else { tr_obj.style.display = "none"; caret.src = root_path + "/caret-u.png"; } } function toggle_sel(id) { var div_obj = document.getElementById(id); if (div_obj.style.display == "none") { div_obj.style.display = ""; } else { div_obj.style.display = "none"; } } function set_operation(id, operation) { if (operation == "reset") { var verify = confirm("Are you sure you want to delete all corrections for this marker?"); if (!verify) return; } var form_obj = document.getElementById(id); form_obj.op.value = operation; form_obj.submit(); } function toggle_cb(form_id, value) { var form_obj = document.getElementById(form_id); for(i = 0; i < form_obj.elements.length; i++) if (form_obj.elements[i].value == value) if (form_obj.elements[i].checked == true) form_obj.elements[i].checked = false; else form_obj.elements[i].checked = true; } function toggle_genotypes(catalog_id, table_id, type) { var table_obj = document.getElementById(table_id); divs = table_obj.getElementsByTagName("div"); for(i = 0; i < divs.length; i++) { if (divs[i].id.substr(0, 3) == type) { if (divs[i].style.display == "none") divs[i].style.display = ""; else divs[i].style.display = "none"; } } // // Adjust the height of the parent iframe to possibly accomodate more data // var iframe_obj = parent.document.getElementById(catalog_id + "_iframe"); if (iframe_obj) iframe_obj.style.height = (this.document.body.offsetHeight+25) + 'px'; } function toggle_sumstats(span_obj, col) { var snp_index = document.getElementById("snp_index"); var old_snp = snp_index.value; var old_snp_id = old_snp + "_snp"; var old_span_obj = document.getElementById(old_snp_id); snp_index.value = col; if (col == old_snp) return; span_obj.className = "snp_sel"; old_span_obj.className = "snp"; var div; var div_id; div_id = old_snp + "_sumstats"; div = document.getElementById(div_id); div.style.display = "none"; div_id = old_snp + "_fst"; div = document.getElementById(div_id); div.style.display = "none"; div_id = col + "_sumstats"; div = document.getElementById(div_id); div.style.display = ""; div_id = col + "_fst"; div = document.getElementById(div_id); div.style.display = ""; } stacks-1.35/php/stacks_functions.php000644 000765 000024 00000015560 12571641525 020401 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // function write_header($page_title) { global $version, $site_title; global $root_path, $img_path; echo <<< EOQ $site_title: $page_title
EOQ; } function write_compact_header($page_title) { global $root_path, $css_path, $site_title, $js_path; echo <<< EOQ $site_title: $page_title EOQ; } function write_footer() { echo <<< EOQ EOQ; } function write_compact_footer() { echo <<< EOQ EOQ; } function print_scale($max_len) { if ($max_len == 0) return ""; $class = array("light_scale", "dark_scale"); $str = ""; $c = 0; $str .= ""; for ($i = 0; $i < $max_len; $i++) { $str .= $i % 10; if (($i + 1) % 10 == 0) { $c++; $str .= ""; } } $str .= ""; return $str; } function print_snps($tag_id, $consensus, $seq, $snps, $wrap) { global $display_len; $str = ""; $start = 0; $snp_cnt = count($snps); while (count($snps)) { $snp = array_shift($snps); $con = substr($consensus, $snp['col'], 1); $end = $snp['col']; $s = substr($seq, $start, $end - $start); $str .= $s; $s = substr($seq, $end, 1); if ($con == $s) $str .= "$s"; else if ($s == "N") $str .= "$s"; else $str .= "$s"; $start = $end + 1; } if ($snp_cnt > 0) { $s = substr($seq, $start); $str .= $s; } else { $str = $consensus; } if ($wrap == false || strlen($consensus) <= $display_len) return $str; // // Add line breaks to the sequence // $s = ""; $nuc = 0; $pos = 0; $len = strlen($str); while ($len > $display_len) { for ($pos = 0, $nuc = 0; $pos < $display_len && $nuc < $len; $nuc++, $pos++) { if ($str[$nuc] == '<') { do { $nuc++; } while ($str[$nuc] != '>'); $pos--; } } $s .= substr($str, 0, $nuc) . "
\n"; $str = substr($str, $nuc); $pos = 0; $nuc = 0; $len = strlen($str); } $s .= $str; return $s; } function print_snps_errs($consensus, $sequence, $snps, $cat_snps) { $str = ""; $con = str_split($consensus); $seq = str_split($sequence); for ($i = 0; $i < count($con); $i++) { // Is a SNP defined in this column? if (isset($snps[$i])) { if ($con[$i] == $seq[$i]) $str .= "$seq[$i]"; else if ($seq[$i] == "N") $str .= "$seq[$i]"; else $str .= "$seq[$i]"; } else if (isset($cat_snps[$i])) { $str .= "$seq[$i]"; } else { // Does this nucleotide equal the consensus nucleotide at position $i? if ($con[$i] == $seq[$i] || $seq[$i] == "N") $str .= $seq[$i]; else $str .= "$seq[$i]"; } } return $str; } function generate_key_element_select($name, $elements, $selected_key, $javascript) { $script_code = ""; if (strlen($javascript) > 0) { $script_code = " onchange=\"$javascript\""; } $ctl = " \n"; return $ctl; } function generate_element_select($name, $elements, $selected_ele, $change_js, $blur_js = "") { $script_code = ""; if (strlen($change_js) > 0 && strlen($blur_js) > 0) { $script_code = " onchange=\"$change_js\" onblur=\"$blur_js\""; } else if (strlen($change_js) > 0) { $script_code = " onchange=\"$change_js\""; } else if (strlen($blur_js) > 0) { $script_code = " onblur=\"$blur_js\""; } $ctl = " \n"; return $ctl; } function print_bp($bp) { // Convert the location to be in megabases if ($bp > 1000000) $bp = sprintf("%.02fMb", $bp / 1000000); else if ($bp > 1000) $bp = sprintf("%.1fKb", $bp / 1000); else $bp = sprintf("%dbp", $bp); return $bp; } ?> stacks-1.35/php/sumstat_view.php000644 000765 000024 00000010627 12533677757 017570 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $tag_id = isset($_GET['tag_id']) ? $_GET['tag_id'] : 0; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $batch_type = isset($_GET['type']) ? $_GET['type'] : "map"; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['tag_id'] = $tag_id; $display['batch_id'] = $batch_id; // // Prepare some SQL queries // $query = "SELECT pop_id, pop_name FROM populations " . "WHERE batch_id=?"; $db['pop_sth'] = $db['dbh']->prepare($query); check_db_error($db['pop_sth'], __FILE__, __LINE__); $query = "SELECT pop_id, col, bp, p_nuc, q_nuc, n, p, obs_het, obs_hom, exp_het, exp_hom, pi, fis FROM sumstats " . "WHERE batch_id=? AND tag_id=?"; $db['stats_sth'] = $db['dbh']->prepare($query); check_db_error($db['stats_sth'], __FILE__, __LINE__); // // Fetch population names if available. // $pop_names = array(); if ($batch_type == "population") { $result = $db['pop_sth']->execute($batch_id); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) $pop_names[$row['pop_id']] = $row['pop_name']; } $result = $db['stats_sth']->execute(array($batch_id, $tag_id)); check_db_error($result, __FILE__, __LINE__); $stats = array(); $pops = array(); while ($row = $result->fetchRow()) { $a = array('col' => $row['col'], 'bp' => $row['bp'], 'p_nuc' => $row['p_nuc'], 'q_nuc' => $row['q_nuc'], 'n' => $row['n'], 'p' => $row['p'], 'obs_het' => $row['obs_het'], 'obs_hom' => $row['obs_hom'], 'exp_het' => $row['exp_het'], 'exp_hom' => $row['exp_hom'], 'pi' => $row['pi'], 'fis' => $row['fis'], 'pop_id' => $row['pop_id']); if (!isset($stats[$row['col']])) $stats[$row['col']] = array(); $stats[$row['col']][$row['pop_id']] = $a; $pops[$row['pop_id']] = $row['pop_id']; } ksort($stats); ksort($pops); $json_str = "{" . "\"path\": \"$root_path\"," . "\"batch_id\": \"$batch_id\"," . "\"db\": \"$database\"," . "\"id\": \"$tag_id\"," . "\"type\": \"$batch_type\","; $json_str .= "\"sumstats\": ["; foreach ($pops as $pop_id) if (!isset($pop_names[$pop_id])) $pop_names[$pop_id] = $pop_id; $index = count($pop_names) == 0 ? $pops : $pop_names; $rows = 0; foreach ($stats as $col => $stat) { foreach ($index as $pop_id => $pop_name) { if (!isset($stat[$pop_id])) continue; $s = $stat[$pop_id]; $p = $s['p'] < 1 ? sprintf("%.5f", $s['p']) : $s['p']; $ohet = $s['obs_het'] > 0 ? sprintf("%.3f", $s['obs_het']) : $s['obs_het']; $ohom = $s['obs_hom'] < 1 ? sprintf("%.3f", $s['obs_hom']) : $s['obs_hom']; $ehet = $s['exp_het'] > 0 ? sprintf("%.3f", $s['exp_het']) : $s['exp_het']; $ehom = $s['exp_hom'] < 1 ? sprintf("%.3f", $s['exp_hom']) : $s['exp_hom']; $pi = $s['pi'] > 0 ? sprintf("%.3f", $s['pi']) : $s['pi']; $fis = $s['fis'] != 0 ? sprintf("%.3f", $s['fis']) : "0"; $json_str .= "{" . "\"pop_id\": \"$pop_name\"," . "\"col\": \"$s[col]\"," . "\"bp\": \"$s[bp]\"," . "\"p_allele\": \"$s[p_nuc]\"," . "\"q_allele\": \"$s[q_nuc]\"," . "\"p\": \"$p\"," . "\"n\": \"$s[n]\"," . "\"obshet\": \"$ohet\"," . "\"obshom\": \"$ohom\"," . "\"exphet\": \"$ehet\"," . "\"exphom\": \"$ehom\"," . "\"pi\": \"$pi\"," . "\"fis\": \"$fis\"" . "},"; $rows++; } } if ($rows > 0) $json_str = substr($json_str, 0, -1); $json_str .= "]}"; echo $json_str; ?> stacks-1.35/php/tags.php000644 000765 000024 00000043416 12553472444 015762 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $batch_id = isset($_GET['batch_id']) ? $_GET['batch_id'] : 0; $sample_id = isset($_GET['sample_id']) ? $_GET['sample_id'] : 0; $page = isset($_GET['p']) ? $_GET['p'] : 1; $per_page = isset($_GET['pp']) ? $_GET['pp'] : 10; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['batch_id'] = $batch_id; $display['sample_id'] = $sample_id; $display['db'] = $database; $display['p'] = $page; $display['pp'] = $per_page; $display['filter_type'] = array(); // // Process the filtering parameters // $param = array($batch_id, $sample_id); process_filter($display); prepare_filter_parameters($display, $param); // // Prepare some SQL queries // $query = "SELECT batches.id as id, date, description, samples.id as sample_id, file FROM batches " . "JOIN samples ON (batch_id=batches.id) " . "WHERE batches.id=? AND samples.id=?"; $db['batch_sth'] = $db['dbh']->prepare($query); check_db_error($db['batch_sth'], __FILE__, __LINE__); $query = "SELECT COUNT(tag_id) as count FROM tag_index " . "WHERE batch_id=? AND sample_id=?"; $query .= apply_query_filters($display); $db['count_sth'] = $db['dbh']->prepare($query); check_db_error($db['count_sth'], __FILE__, __LINE__); $query = "SELECT depth as count FROM tag_index " . "WHERE batch_id=? AND sample_id=? AND tag_id=?"; $db['depth_sth'] = $db['dbh']->prepare($query); check_db_error($db['depth_sth'], __FILE__, __LINE__); $query = "SELECT col, rank_2 FROM snps " . "JOIN samples ON (snps.sample_id=samples.id) " . "JOIN batches ON (samples.batch_id=batches.id) " . "WHERE snps.type='E' AND batch_id=? AND snps.sample_id=? AND tag_id=? ORDER BY col"; $db['snp_sth'] = $db['dbh']->prepare($query); check_db_error($db['snp_sth'], __FILE__, __LINE__); // // Pull information about this batch // $result = $db['batch_sth']->execute(array($batch_id, $sample_id)); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $batch = array(); $batch['id'] = $row['id']; $batch['desc'] = $row['description']; $batch['date'] = $row['date']; $batch['sample_id'] = $row['sample_id']; $batch['file'] = $row['file']; $page_title = "RAD-Tag Sample Viewer"; write_header($page_title, $batch); echo <<< EOQ

Batch #$batch[id] [$batch[date]; $batch[desc]]

EOQ; write_filter(); echo <<< EOQ

RAD-Tag Sample #$batch[sample_id] [$batch[file]]

EOQ; $db['dbh']->setLimit($display['pp'], $start_group - 1); check_db_error($db['dbh'], __FILE__, __LINE__); $query = "SELECT con_tag_id as id, tag_index.tag_id as tag_id, " . "depth, seq, catalog_id, " . "tag_index.deleveraged, tag_index.removed, tag_index.blacklisted " . "FROM tag_index " . "JOIN unique_tags ON (con_tag_id=unique_tags.id) " . "WHERE tag_index.batch_id=? AND tag_index.sample_id=?"; $query .= apply_query_filters($display); $db['tag_sth'] = $db['dbh']->prepare($query); check_db_error($db['tag_sth'], __FILE__, __LINE__); $result = $db['tag_sth']->execute($param); check_db_error($result, __FILE__, __LINE__); while ($row = $result->fetchRow()) { // Query the database to find how many SNPs were found in this sample. $snps = array(); $snp_res = $db['snp_sth']->execute(array($batch_id, $sample_id, $row['tag_id'])); check_db_error($snp_res, __FILE__, __LINE__); while ($snp_row = $snp_res->fetchRow()) { array_push($snps, array('col' => $snp_row['col'], 'rank' => $snp_row['rank_2'])); } print "\n" . " \n" . " \n"; if (count($snps) == 0) print " \n"; else print " \n"; $s = print_snps($row['tag_id'], $row['seq'], $row['seq'], $snps, true); print " \n"; $row['catalog_id'] > 0 ? print " \n" : print " \n"; print "\n"; } print "\n" . "
EOQ; // // Figure out how many results there are (including filtering) // and write out the proper pagination links // $result = $db['count_sth']->execute($param); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); $pagination_count = $row['count']; $start_group = 0; $end_group = 0; write_pagination($pagination_count, $start_group, $end_group, "tags.php"); echo <<< EOQ
Id Depth SNP Consensus Catalog ID
" . "$row[tag_id]$row[depth]NoYes [" . count($snps) . "nuc]
" . $s . "
$row[catalog_id]absent
\n"; write_pagination($pagination_count, $start_group, $end_group, "tags.php"); echo <<< EOQ
EOQ; write_footer(); function generate_hidden_form_vars($var) { global $root_path, $display; $vars = ""; foreach ($display as $key => $d) { if (strstr($key, $var)) continue; if (is_array($d)) { foreach ($d as $e) { $vars .= " \n"; } } else { $vars .= " \n"; } } return $vars; } function generate_per_page_select($name, $per_page) { $pages = array("10", "50", "100", "all"); $ctl = " \n"; return $ctl; } function generate_url($destination) { global $root_path, $display; $url = "href=\"" . $root_path . "/" . $destination . "?"; foreach ($display as $key => $d) { if (is_array($d)) { foreach ($d as $e) $url .= "{$key}[]=$e&"; } else { $url .= "$key=$d&"; } } // Remove the hanging '&' $url = substr($url, 0, -1); $url .= "\""; return $url; } function generate_page_list($page, $num_pages, $destination) { global $display; $page_list = ""; if ($page <= 4) { for ($i = 1; $i < $page; $i++) { $display['p'] = $i; $p = generate_url($destination); $page_list .= "$i\n"; } } else { $display['p'] = 1; $p = generate_url($destination); $page_list .= "1 ...\n"; foreach (array($page - 3, $page - 2, $page - 1) as $i) { $display['p'] = $i; $p = generate_url($destination); $page_list .= "$i\n"; } } $page_list .= " $page\n"; if ($page <= $num_pages - 4) { for ($i = $page + 1; $i <= $page + 3; $i++) { $display['p'] = $i; $p = generate_url($destination); $page_list .= "$i\n"; } $display['p'] = $num_pages; $p = generate_url($destination); $page_list .= "... $num_pages\n"; } else { for ($i = $page + 1; $i <= $num_pages; $i++) { $display['p'] = $i; $p = generate_url($destination); $page_list .= "$i\n"; } } $display['p'] = $page; $page_list = "\n" . $page_list . "\n"; return $page_list; } function write_pagination($num_tags, &$start_gene, &$end_gene, $destination) { global $img_path, $root_path, $display; $cur_page = $display['p']; $page = $display['p']; $per_page = $display['pp']; if ($per_page == "all") $per_page = $num_tags; // // First figure out the total number of pages. If there are // additional genes left over, add an extra page. // $num_pages = floor($num_tags / $per_page); $num_pages += $num_tags % $per_page >= 1 ? 1 : 0; if ($page > $num_pages) { $page = $num_pages; $cur_page = $num_pages; } // Determine the start and end gene numbers $start_gene = 1 + (($page - 1) * $per_page); $end_gene = $start_gene + $per_page > $num_tags ? $num_tags : ($start_gene + $per_page - 1); // Generate the URLs for our links $display['p'] -= 1; $prev_page = generate_url($destination); $display['p'] += 2; $next_page = generate_url($destination); $display['p'] = $cur_page; print "\n" . "\n" . "\n"; if ($num_pages > 1) $page_list = generate_page_list($page, $num_pages, $destination); print $page_list; $hidden_vars = generate_hidden_form_vars("pp"); $per_page_ctl = generate_per_page_select("pp", $display['pp']); echo <<< EOQ
\n"; if ($page == 1) { if ($num_pages == 1) { echo <<< EOQ No Previous Page $page No Next Page EOQ; } else { echo <<< EOQ No Previous Page $page View Next Page EOQ; } } else if ($page == $num_pages) { echo <<< EOQ View Previous Page $page No Next Page EOQ; } else { echo <<< EOQ View Previous Page $page View Next Page EOQ; } print " ($num_tags tags)\n" . "
$hidden_vars tags per page   $per_page_ctl
EOQ; } function write_filter() { global $img_path, $root_path, $display; $hidden_vars = generate_hidden_form_vars("filter"); $filters = array("depth" => array(), "snps" => array(), "tagid" => array(), "black" => array(), "delv" => array(), "rem" => array()); $ele_name = isset($display['filter_depth']) ? $display['filter_depth'] : ""; $depth_ctl = generate_element_select("filter_depth", array(1, 5, 10, 20), $ele_name, ""); $ele_name = isset($display['filter_snps']) ? $display['filter_snps'] : ""; $snps_ctl = generate_element_select("filter_snps", array(1, 2, 3, 4, 5), $ele_name, ""); $ele_name = isset($display['filter_delv']) ? $display['filter_delv'] : ""; $delv_ctl = generate_key_element_select("filter_delv", array(1 => "True", 0 => "False"), $ele_name, ""); $ele_name = isset($display['filter_rem']) ? $display['filter_rem'] : ""; $rem_ctl = generate_key_element_select("filter_rem", array(1 => "True", 0 => "False"), $ele_name, ""); $ele_name = isset($display['filter_black']) ? $display['filter_black'] : ""; $black_ctl = generate_key_element_select("filter_black", array(1 => "True", 0 => "False"), $ele_name, ""); if (isset($display['filter_type'])) { foreach ($filters as $key => $f) if (in_array($key, $display['filter_type'])) { $filters[$key]['sel'] = "checked=\"checked\""; $filters[$key]['tr'] = "class=\"active_filter\""; } else { $filters[$key]['sel'] = ""; $filters[$key]['tr'] = ""; } } else { $filters['none']['sel'] = "checked=\"checked\""; } $tagid = isset($display['filter_tagid']) ? $display['filter_tagid'] : ""; echo <<< EOQ

Filter Results

EOQ; } function process_filter(&$display_params) { if (!isset($_GET['filter_type'])) return; foreach ($_GET['filter_type'] as $filter) { array_push($display_params['filter_type'], $filter); if ($filter == "depth") { $display_params['filter_depth'] = $_GET['filter_depth']; } else if ($filter == "snps") { $display_params['filter_snps'] = $_GET['filter_snps']; } else if ($filter == "tagid") { $display_params['filter_tagid'] = $_GET['filter_tagid']; } else if ($filter == "delv") { $display_params['filter_delv'] = $_GET['filter_delv']; } else if ($filter == "black") { $display_params['filter_black'] = $_GET['filter_black']; } else if ($filter == "rem") { $display_params['filter_rem'] = $_GET['filter_rem']; } } } function prepare_filter_parameters($display_params, &$param) { $filters = $display_params['filter_type']; if (!isset($filters)) return; foreach ($filters as $filter) { if ($filter == "snps") { array_push($param, $display_params['filter_snps']); } else if ($filter == "depth") { array_push($param, $display_params['filter_depth']); } else if ($filter == "tagid") { array_push($param, $display_params['filter_tagid']); } else if ($filter == "delv") { array_push($param, $display_params['filter_delv']); } else if ($filter == "black") { array_push($param, $display_params['filter_black']); } else if ($filter == "rem") { array_push($param, $display_params['filter_rem']); } } } function apply_query_filters($display_params) { $sql_filters = array("depth" => "(depth >= ?)", "snps" => "(snps >= ?)", "tagid" => "(tag_index.tag_id = ?)", "delv" => "(tag_index.deleveraged = ?)", "black" => "(tag_index.blacklisted = ?)", "rem" => "(tag_index.removed = ?)"); $filters = $display_params['filter_type']; $query = ""; if (count($filters) > 0) { $query = " AND "; while (count($filters) > 0) { $filter = array_shift($filters); $query .= $sql_filters[$filter]; $query .= count($filters) > 0 ? " AND " : ""; } } return $query; } ?> stacks-1.35/php/version.php000644 000765 000024 00000000000 12335173442 016461 0ustar00catchenstaff000000 000000 stacks-1.35/php/view_sequence.php000644 000765 000024 00000003542 12335173442 017654 0ustar00catchenstaff000000 000000 // // This file is part of Stacks. // // Stacks is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Stacks is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Stacks. If not, see . // require_once("header.php"); $database = isset($_GET['db']) ? $_GET['db'] : ""; $seq_id = isset($_GET['id']) ? $_GET['id'] : 0; // Connect to the database $db = db_connect($database); // Save these variables for automatic URL formation later on. $display = array(); $display['db'] = $database; $display['seq_id'] = $seq_id; // // Prepare some SQL queries // $query = "SELECT id, catalog_id, seq_id, type, seq FROM sequence " . "WHERE id=?"; $db['seq_sth'] = $db['dbh']->prepare($query); check_db_error($db['seq_sth'], __FILE__, __LINE__); $page_title = "Catalog RAD-Tag Sequence Viewer"; write_compact_header($page_title); $result = $db['seq_sth']->execute($seq_id); check_db_error($result, __FILE__, __LINE__); $row = $result->fetchRow(); // // Line wrap the sequence to 80 characters // $full_seq = $row['seq']; $seq = ""; do { $seq .= substr($full_seq, 0, 80) . "\n"; $full_seq = substr($full_seq, 80); } while (strlen($full_seq) > 80); if (strlen($seq) > 0) { $seq .= $full_seq . "\n"; } echo <<< EOQ
>$row[catalog_id]|$row[seq_id]|$row[type]
$seq
EOQ; write_compact_footer(); ?> stacks-1.35/php/images/caret-d.png000644 000765 000024 00000000241 12335173442 017564 0ustar00catchenstaff000000 000000 ‰PNG  IHDR ‰^ëPLTEÀÀÀ+ס¦tRNS@æØfbKGDˆH pHYs  šœtIMEÔ *.IDATxÚc`€‚c 9  0{¤ÑƒÜIEND®B`‚stacks-1.35/php/images/caret-u.png000644 000765 000024 00000000245 12335173442 017611 0ustar00catchenstaff000000 000000 ‰PNG  IHDR ‰^ëPLTEÀÀÀ+ס¦tRNS@æØfbKGDˆH pHYs  šœtIMEÔ 7°IžIDATxÚc`  6² ŠÝq@ÆIEND®B`‚stacks-1.35/php/images/excel_icon.png000644 000765 000024 00000005057 12335173442 020367 0ustar00catchenstaff000000 000000 ‰PNG  IHDR##Ù³YbKGDÿÿÿ ½§“ pHYs  šœtIME× 7@OÒr ¼IDATXÃ¥˜ßo×uÇ?gfvùcµ¤–¤HI¤dÉrÇŽk'vóqP4nP }Hß ŸÒ×þy Š~I¢/y)ú‰"vë$hë4±#)ªYµeŠ”ø›Ë]îìì̽çœ<Ì’–[,Ð{wîÌ=ß{î9ßï9+<àS„p£È«Ç1Ç\@ÀÁqðñ w9š÷z1 RÔ•‰FÂüÌìË"ò¥ÿmOæÎÆŽ¯ov™iMãÔ`Ük£õ·ƒC¸oÎê[®xâ˜&¬ÝÛ¦‘&|æ©G9·Ø¹!"OÜo/{°¼Øáâùþ¿Ÿ*DŠ Z§:¼öú¯xú“ù„»¯‹ÈÊÑ3ɃÈÒ&YÚø?˜ò¹Þ›sw°’¥Å9.?ò(¯¾vÕ»÷–Ý}ëØ^Qæß{{ïí¯»¸ 抚ƒ 7Ö~‚ºq{¯† S#ºb1m€žxäI–/aÉîMîe¸*g—æè ãŽÁ—>Ï⑇²Á¨{ù?o½Œdàbhˆ„*B$†HYUŒÊ‚ªª@¥!T”ªìïmò_ýK–/!Œ#WNð¡$”––Î1>Á÷_y¯>—.»ûϳR IŒg.=ÇÅÎÌ sÇÌ0¨jйÖ÷UA„»Ý5þöïÿŠT÷íÿÃÐÔG–\ó&&å(Ê„ï|÷{|ëÏþèé,3 ”œj´èLÍÿöæ¨8ê’qîn)†#Bí¡ß‚âdiJ§3ËëÿýE£¢¢(úäÑYÛð/½JEúÚã¿úEÈ ÃCȦùܥϱ4³Èk7ÿ‹_¯]Ã$¡7èñ…Ç~ŸÎ\‹Ê+d'u¼ø ¤©ðé§>•+Ñ hL¨0Äœïÿø—ô7o‘™+#->bkwƒÃô€‰é„õ›¼³s‹‡fâææ› 'º¼»·Áú >qùSt¬Eå#+ºy  VGÌPu#$ ‚] ’²|zšÎlF¬O#L„DbRÄ¡ÓÖ6_yêËLÇiÏg?þY´XJ ÁEV°s‰š*ÂÝ’ƒ‘á ¸ëÛV·T@P#F0«7’ …Z±Ô>ÇþΗyéê¿ “ÎÒÂ9v¼»µCøÆ³ÏóµO}…a2¤ôœ¹SÂ|«æÎ¼ˆt5r¾3u|Dy?°¸0ÉB«æ«…X)æÌÁjj'“ü=-Ã!ÃÆ T”üÛÕåï^þ’i§“µÙX»Çw^å÷>ÿ,çfÎÐÛÜÃGMF¹ÑËûé´Œ¹S1=¬Hˆ‡ÏLÓa7çÂÒi;Í:͵¢,)ê` î‚»¹QU¡¢ÐÛû{,Ø"Í<wÎÍ?DÏúl¯m³±±ÁÝ·îRU ß}çÿü¤É;=a»VÊa98]ÏQU¢w”ƒÐ£½-˜%ÜÞ²²0‹¨â&˜§µwÝÉðZQÅR¾þ…?åkÏþ jŠº‚;£ªäÚêUÖWßå¯8æ SÍIN/LSV¬Ä1så“üîG?&V‘&SܸûSB5`öT“¥Ù&Ðd£QPV9WVÚ$€»û9—ÏfeaÜ­´Ž-Ô8ŽÌͧŠê¿ö27W¯C@ÝP‹¸+QKªÇÞrÔ¸ÒË»ôóœµí’›wŽ*úÃÀÁ P ·°‰æÜÙ.Ùííðë‰ZFîu ^™¯µÏê Vw !›hN¥N_ac—­Z(Ôù°‚I3¡ŠJQF’ÔÀ °Ë­'Y^8Ëâü$íÐ$;(¨bÁ…³³`Ž›±×_ãÌ™ÎÎLÄ D µ7'˜Õ1š]\¼ôÌéI>êýü­Û÷ž>öÐy†QXÛîsñÌ „Û÷z4&&8ÝÊh¡túÒäüÜ$âu…¼?\bþt‹³­Œ BwwH©J‘¸¦8öáegkrö™[ïl:‰09Ù&±º:Xëe$T•Ý|‚Ðt6¨TÆA>¢} Šˆ21Ñd¿èÍOröL‹…¹"ŽF;ŽÃ1—×ÀÁF¥®,N5IÓ&¸På‘Mp~©Á0í|Äìéà,´ÚÜÞêÓH„ÖÜ4¯þâ6Ï=}‰ùN›JV& æ5rñt\˜ ívF³‘à“Ýœ)ŒåÎ4ý‘òúz—ÆäûýCœ&íÖ­Fdf¶A‘(JEµö ž .˜;jâú`0>æ\0Œ^¹þö>B$lu#•¶ö†€ÉðÔ<ÿquÄÕ«|ü‘”U]ëQæXZgG¿3 > Ô©‡Õ/%K6´vkUö’qiq†~¥líäxtÜ$­yDµ¤._ÝQ¢I€£c@âœä™ZìÔ ªbdj"¡Ù„,5$qfÛ“dAÙÜѯ mÎAáÎ”Ô ›ŠN‹s Faø¦áObô1AÝv[HÜ›î`æ•!§‘ŠTånH}¹™Nnþ­Ï,ÙØeEIEND®B`‚stacks-1.35/php/images/l-arrow-disabled.png000644 000765 000024 00000011364 12335173442 021405 0ustar00catchenstaff000000 000000 ‰PNG  IHDR"ÀN' pHYs  šœbiCCPPhotoshop ICC profilexÚ­Wy8Ôí×?ß1Æ>ö±3–,ÙÆ¾ed'; ÃXÆ:} zDIŠH‘¢$”½B¥¨DYž¬©ÈÒ¢yÿÐÓs]¿÷zß÷Ÿ÷üõ9ç:ç|Î}ŸûºÎ}¸ITj "£biλÍðž^Þx¦×Àœ€p‘c¨¦NNvð?Êúk@^*“¨ÔN¥3“Å ¦¾ñmáÚ‹u¿à,ÍÓËQ¾ml|ÛØøb©±H(ð†’ÈH*(Ñ\‰H5`C¶q`¶q/`ãCb O™À´€1"Å`•€LŽ ŒÀž@©DFF“¸:@>J‹àšeO/oüvÉä }4æí¿¶0I€š;¢’ÿÚäÚIu;ÿµ­:îyL°†: ìfŒtúª,ÓI€­:ýG¾u €a  #"0Žÿû¾¤àÿÒ·Ïü[J•ÁÊxSÌTÇÜÅ2Âú‰m…ƒ+À)ɥ̭ÏcÊkÇçÊïó¤Q…cDbEãÄÅS$2ð™’™RG¤ÊßqFö¼\¹|µÂÍMŠ=JCÊ“*ëV5)u 7Í­ríÇ:›z ú$ƒó»Æ$Œ)&wÍØ‰ûÌo[òì¦Y Ú¨Ú³ûâ`çX»‡Ó9Âå±›ˆ{„G›ÖÛooµÏW_¢ßQÿþÎ@GòÉ ç!¬¡¦”¸°ÚðÉHÞ(óèêýocб„8¿ø¼„û‰óÉÜ)F©Ôei=écSg-^Î\ÉZÏÞú Ã~”;—?Oì˜üq|ƒ¦V'÷œr?í}Æ÷l`!¥(¼8üå<¥$¬4ô©Ì»Üå¢m…É%µË •rU;®HU㯊]ºÎƒ»†«–ý&ãÍ_u›õ«·oÏ6ŒÞl|z·»©¹¹¾åƽ«÷/¶žm;Þ~´#³ó`Wr7íAÔèG=¤^b®oêñ­'éOmúúçž5<Ïp”\|Ññ²à•ßòÐ÷×O† GHo”ß|{ûh4Ì}\l|f¢æoÚ;íwß&[¦R¦õ§×ggSçLß3¼ï›Ïûàð‘ëãËO… ž‹‚‹#KEŸ=——‡¾¬8¬r¬ö}Í\3]£¯·m$ojm.«ùþCáÇÜÏŠ-ÿ_¿ÞÒ÷Óé(4ÇȇágâdfcafÙbýÆö™}†ã ¶Ÿ³“«ž»‚ç4o_8nŸ€£ QHOX[DITULE\[Âo!i,¥+­,£´CIv§A^I°S[‘¨d£ì©BQM"䨕ª7j<×\ÒæÖÑÔuÑKÖ¿b0lÈl¤e¼ß¤Ætž¨hnqÓrÓÊÂ:×fÄNÍ>ÕaÐIdOˆs£+âfï~ÒcÌKÁ›º÷–φ¯®ßÿvÒV 9>ènðj¨ %,ìZøÇHÙ(Ÿè"êkoŒklAÜãæD»¤ãÉS¦S?øœ¶–þ-ãûÁ_‡3Ù³x³…Žˆý%“C8ªŸk–g}Ìñ¸g¾Ï ÿòɰSѧ©ghg£ £Š¨Å‘ç‚Ïû—x–:]°(Ó.W¾¨\¡tIá²B¥|•ì™jé«2פ¯KÜ­®º)XÇ_Ïs‹ó6göG#Ç]î&\³`‹à=‘ûøV¹6•v­½N³.‡nï{î~„´ÒÓ×[ÒùØô‰À“ŧû‹ŸE>'ˆ ¬ö¿¨x™ðÊaHjhíußpÉHôÓ·ÜoÇGoŒ¥Ž[MpOŒþ]ñ.lReryªiúÄLè¬ñœÀÜâûóÅ¢>Z|úôqáþâ±¥}ŸU>ÿXîùrrÅwUauåë½µìu§ Á±ÍŠo”ï„ï›?îÿÌØ²ø…ýõœ¹ÝÔÃ2ú+ãgÌ"Ó"ó"Ëë(Û+ö'­ØZÎ2®\î2¯Ÿ“7(p]0SÈG˜ Â(2,Z)'n!Á)1ޝ”Œ”Ò—ÆH¿–¹º#MÖMNIžYþ½BÏÎjÅcJ±Ê{UÌTU¢j,j곃šZuÚ¥:9ºñz$};ƒ]»ð†Ãe£!ã;&§LãÍ(DsK ]KÙÝ‚VŒVëÖlÞØöÚ5ÚW99æ8%î qvw±s5pSr—ðÀyrx¡¼¾zØ;åóvß+ß~¿nÿ6Rk@Kà-ò¥ œ`Zˆ_¨E5L8œ)|-b&r0ª-º–Z±ÿ,í¯˜ÄØÐ8ßx‡£DÕ$Éd¾tÊjê»ÏÒÚÒ¯gœ>˜~(ø°c¦f.ÉÞ8²ø×tÎÛ£¹=y-Çn¯Î/9‘[~’zŠ|ÚíŒåYÍB™"þbÆâ¯çfÏ”ô—¶]¸QVZ~êbVEê¥èˤJª=W¬« ®J]ƒkc×ïß(ª¡Õ:ÞT©ÃÖ-Õ÷ߺz;·!úŽK£Ö]Üݦ±æÖ–Ò{i÷I­Ä¶íŒíÓÝU]™Ý¤†…®?è©ìÍí+yÜùdê)ý™ðs½ïÁÔ%/{_-¾vÉ}ót”w̼ùoÞwé“_§Îbç®Ì;}äþôz±öóÙ/«¥kõsßñ?ý¥ÑéÛ³£ Pà9 à\ ¿ Ï€«pâpÕ¤+ëY€˜åÿ3?ÌÀ â  Æà  —àÌ"¬ˆ:âƒü…4!Pb(7ÔqÔcf+†\†A´0šŒ®GÓ/1~Ã8a®1¡™HL̒̇™çYìYêY…Y3YØ<غØÕÙK9˜8â8¦°ŽØVNEÎ".&®®In'îVEž"^FÞ8ÞY>¾N~9Ž—…û&!ðNÐKð¹Ðn¡á]Â"Ú"-¢F¢]bVbâžâ³4 :>OR\ò¶”µÔ¤tŠ ŸLý§+²…rFrÓòy z s;Ï(š+®*U)ûªª ¨æ,Ô@­U=]ÃL“A³K+KÛNG\¥»¤7¬ßep{W¥ái£,ã“0Ó@³}Ds+ 3KãÝ&V&Ö&6»mì¼íƒ¨Ž‡ ÷Ô;÷¹Lº!îröž ^ÞÃ>œûô}ýNûw‘VåȾA…Áý¡Êî°œðþH®(÷èrêgšiLnìßñš 9‰cÉÊ)GRçÓÌÓK3~r?Üœ%‘vämŽñÑ;y>ÇEó‡ ŠOœ‘>û±¨ö\Z‰ý޲ɋ— *ÉW¬®ª\ç»ñ£v©îý­™†‰Æù¦µ{ì­ÒíÑÝéË{Zú&ž2=# x¾8ñªwó†8š;>öNv*ufèýÎ>M/Ù,_]e[‹Ù˜ùnùó>¾ý“VàÐ;‚ƒpºáƒ!‘H2„â@Y¡²QX &ÑŠèdt£ c(c ‹¡`z˜˜r˜>1;17°ˆ³aùÂêÍÚ˦ÅVÁÎÃ~˜}#„ã-ÖÛÉ©ÅyKšë7÷î_<ñ<+¼ÞI¾½|¯ùøã,q†M‚:‚w…4„j…Õ„oЍ‰4‰‰v‹ÙŠ ‰û‰/I¤âYðE’;%;¤Ü¤¤³eÄešw¸ïX—-”3•ÏPSx¾3Q¯Ø«£,©üJ%SUCu†P¨f¥¶¥Þ¨¥)£9¢uBÛ^§³®;¥÷D¿ÑàÊ®s†9FÉÆ&~¦f{ˆVæ†:–š»5­´­µl m-íœì½‚“œòöT9·» »n¸‹y˜yFzy?õaÜGðõð;âß@š ä#[¥ׇÌQ¤ÃHáeQ¢Ñ~ÔÊýK1ê±Iq°‰îI%ɳ©Ú²Ó^dHŒ8ԜɑEÊ®=ò=Çþè­<ëc[ùÕ¾§„N?={¸H§xí|ciT™|ù|EÝå„*“jÞ«ï¯ß¯9s3¾>à¶íí»jÍ*÷4[ Û=;ÉݦÕz„ûØo=Ý|¶4ðáÅ×W?‡ÙÞjŽ{ü2Y1ýr=¯ù1|¡aiã‹áêÁµ› ßi?_ýé?€,è#„B&TB,"8„ˆÄ UÈJå†*DM0È2Ä2¯~­]Ë[§l˜mrmN|»ñ=ù‡õOŸã[¥¿ºét€í} X‰ÑÑ4¼Ñþ%2"î.`§ÄZ» Ó,À f¢€‘ бpùuƒ)–ÖÛ±ˆ 5ÖÉïäPWÀ ´0’Óoœá`8ä89ÈÜâwìeZœ³È áѶÎÀ€ŒE¹ýÃ5ïòÿ:™dn ¢(Lr(ÑaÛ%v@sÀC DCD (Ð@DÞˆX A$"€û!(@† ˆùq@K  B ”3üww˜Pþ !Úrˆy+8¾(:Iß=”pððð¿½mÿ0AÔŸLÛìÿèhY´:Z m†6D¡õÆ¡…A­‰ÖE›¢Ñh-´Þ³…» ²ÿT€ÿ“qhòµ*C0€ñ1s@ƒH?Ê¡ã`{WÀp”ø´o¦dü绊 JŒ FS“h”ÐX¼)•„'FGRãbƒhJxë¨@%¼: ð_ºLTÄl°Ã­gAMAØëõª cHRMm˜sŽâù†™xƒÔN3¼ycá,üIDATxÚ¼–ËNë:†/Û‰e‚„A…T €÷ðP0A€ •‹PA¥©/gÀq¶Óö@Ïfk[Š’¦ñú–ÿu±™÷Þã/ö·´ê‡Þ{8çºßÎ9üޝ´*È9"Âóó3ŽŽŽðññ±àÄ*C| F‰777¨ª ãñmÛ"I0Æà½cìgÀ ÷óós\^^¢( @Û¶0Æ€èS¤pï%È'Äw°étг³3<>>bggRJXkaŒµv),9çÀëÞ-C¼`<ãøømÛbww·“NOO!¥õ Æ@ï=¡µ†÷DÔưÑh„““$I‚¢(zñÜÞÞÆl6û2K‰MÓàíí J©.Öb>9¼÷¨ëUU!ÏsdYÖI¾%"¤iúerpΡ”Âl6ƒ1œs0Æ~s°ÖâââWWW(ŠI’ô`1t•ròÞw@Æ8çáÆªªÂõõ5ƒcÿ»Æ– cL*ïýgá{ïaŒAžçRâýý}åºúnXka­íVLáÁƒ àõõOOOî?ó-âT¶ÖBkápˆÉd‚¦iÀ[¨3"çDô嵬‰‹(¥°··‡º®q{{‹²,Á9‡sœsŒF£Ùç¦Ó)’$éfŒAA$IºŒ xxx@]×(ËJ)Àd2Áúú:Ò4íÉl !¦)´ÖȲ¬§’^¤” -ª,KH)q­­-äy"‚ÖZë^k '8ž$ „r]ÎÇ)L ï‹¢€R MÓt5VB1?G)%¤”BüZa¬y<)–:X[[ÃÝÝ^^^0¡µþ,æÈ`˜Ë9ï]½ÎÇ`8ìûûûØÜÜD–ePJAÑÆÇwöþëLºƒ1¦ÛûŒ1Ÿ ø_@€…Ìïñ.ÒÝ¿:Dhè¡cÄqŠëñÇGŒXŠˆ7ÔX²?zL í/nôóÙ¼êøgÆ3p©­ÆIEND®B`‚stacks-1.35/php/images/l-arrow.png000644 000765 000024 00000011576 12335173442 017645 0ustar00catchenstaff000000 000000 ‰PNG  IHDR­¢Ùp pHYs  šœbiCCPPhotoshop ICC profilexÚ­Wy8Ôí×?ß1Æ>ö±3–,ÙÆ¾ed'; ÃXÆ:} zDIŠH‘¢$”½B¥¨DYž¬©ÈÒ¢yÿÐÓs]¿÷zß÷Ÿ÷üõ9ç:ç|Î}ŸûºÎ}¸ITj "£biλÍðž^Þx¦×Àœ€p‘c¨¦NNvð?Êúk@^*“¨ÔN¥3“Å ¦¾ñmáÚ‹u¿à,ÍÓËQ¾ml|ÛØøb©±H(ð†’ÈH*(Ñ\‰H5`C¶q`¶q/`ãCb O™À´€1"Å`•€LŽ ŒÀž@©DFF“¸:@>J‹àšeO/oüvÉä }4æí¿¶0I€š;¢’ÿÚäÚIu;ÿµ­:îyL°†: ìfŒtúª,ÓI€­:ýG¾u €a  #"0Žÿû¾¤àÿÒ·Ïü[J•ÁÊxSÌTÇÜÅ2Âú‰m…ƒ+À)ɥ̭ÏcÊkÇçÊïó¤Q…cDbEãÄÅS$2ð™’™RG¤ÊßqFö¼\¹|µÂÍMŠ=JCÊ“*ëV5)u 7Í­ríÇ:›z ú$ƒó»Æ$Œ)&wÍØ‰ûÌo[òì¦Y Ú¨Ú³ûâ`çX»‡Ó9Âå±›ˆ{„G›ÖÛooµÏW_¢ßQÿþÎ@GòÉ ç!¬¡¦”¸°ÚðÉHÞ(óèêýocб„8¿ø¼„û‰óÉÜ)F©Ôei=écSg-^Î\ÉZÏÞú Ã~”;—?Oì˜üq|ƒ¦V'÷œr?í}Æ÷l`!¥(¼8üå<¥$¬4ô©Ì»Üå¢m…É%µË •rU;®HU㯊]ºÎƒ»†«–ý&ãÍ_u›õ«·oÏ6ŒÞl|z·»©¹¹¾åƽ«÷/¶žm;Þ~´#³ó`Wr7íAÔèG=¤^b®oêñ­'éOmúúçž5<Ïp”\|Ññ²à•ßòÐ÷×O† GHo”ß|{ûh4Ì}\l|f¢æoÚ;íwß&[¦R¦õ§×ggSçLß3¼ï›Ïûàð‘ëãËO… ž‹‚‹#KEŸ=——‡¾¬8¬r¬ö}Í\3]£¯·m$ojm.«ùþCáÇÜÏŠ-ÿ_¿ÞÒ÷Óé(4ÇȇágâdfcafÙbýÆö™}†ã ¶Ÿ³“«ž»‚ç4o_8nŸ€£ QHOX[DITULE\[Âo!i,¥+­,£´CIv§A^I°S[‘¨d£ì©BQM"䨕ª7j<×\ÒæÖÑÔuÑKÖ¿b0lÈl¤e¼ß¤Ætž¨hnqÓrÓÊÂ:×fÄNÍ>ÕaÐIdOˆs£+âfï~ÒcÌKÁ›º÷–φ¯®ßÿvÒV 9>ènðj¨ %,ìZøÇHÙ(Ÿè"êkoŒklAÜãæD»¤ãÉS¦S?øœ¶–þ-ãûÁ_‡3Ù³x³…Žˆý%“C8ªŸk–g}Ìñ¸g¾Ï ÿòɰSѧ©ghg£ £Š¨Å‘ç‚Ïû—x–:]°(Ó.W¾¨\¡tIá²B¥|•ì™jé«2פ¯KÜ­®º)XÇ_Ïs‹ó6göG#Ç]î&\³`‹à=‘ûøV¹6•v­½N³.‡nï{î~„´ÒÓ×[ÒùØô‰À“ŧû‹ŸE>'ˆ ¬ö¿¨x™ðÊaHjhíußpÉHôÓ·ÜoÇGoŒ¥Ž[MpOŒþ]ñ.lReryªiúÄLè¬ñœÀÜâûóÅ¢>Z|úôqáþâ±¥}ŸU>ÿXîùrrÅwUauåë½µìu§ Á±ÍŠo”ï„ï›?îÿÌØ²ø…ýõœ¹ÝÔÃ2ú+ãgÌ"Ó"ó"Ëë(Û+ö'­ØZÎ2®\î2¯Ÿ“7(p]0SÈG˜ Â(2,Z)'n!Á)1ޝ”Œ”Ò—ÆH¿–¹º#MÖMNIžYþ½BÏÎjÅcJ±Ê{UÌTU¢j,j곃šZuÚ¥:9ºñz$};ƒ]»ð†Ãe£!ã;&§LãÍ(DsK ]KÙÝ‚VŒVëÖlÞØöÚ5ÚW99æ8%î qvw±s5pSr—ðÀyrx¡¼¾zØ;åóvß+ß~¿nÿ6Rk@Kà-ò¥ œ`Zˆ_¨E5L8œ)|-b&r0ª-º–Z±ÿ,í¯˜ÄØÐ8ßx‡£DÕ$Éd¾tÊjê»ÏÒÚÒ¯gœ>˜~(ø°c¦f.ÉÞ8²ø×tÎÛ£¹=y-Çn¯Î/9‘[~’zŠ|ÚíŒåYÍB™"þbÆâ¯çfÏ”ô—¶]¸QVZ~êbVEê¥èˤJª=W¬« ®J]ƒkc×ïß(ª¡Õ:ÞT©ÃÖ-Õ÷ߺz;·!úŽK£Ö]Üݦ±æÖ–Ò{i÷I­Ä¶íŒíÓÝU]™Ý¤†…®?è©ìÍí+yÜùdê)ý™ðs½ïÁÔ%/{_-¾vÉ}ót”w̼ùoÞwé“_§Îbç®Ì;}äþôz±öóÙ/«¥kõsßñ?ý¥ÑéÛ³£ Pà9 à\ ¿ Ï€«pâpÕ¤+ëY€˜åÿ3?ÌÀ â  Æà  —àÌ"¬ˆ:âƒü…4!Pb(7ÔqÔcf+†\†A´0šŒ®GÓ/1~Ã8a®1¡™HL̒̇™çYìYêY…Y3YØ<غØÕÙK9˜8â8¦°ŽØVNEÎ".&®®In'îVEž"^FÞ8ÞY>¾N~9Ž—…û&!ðNÐKð¹Ðn¡á]Â"Ú"-¢F¢]bVbâžâ³4 :>OR\ò¶”µÔ¤tŠ ŸLý§+²…rFrÓòy z s;Ï(š+®*U)ûªª ¨æ,Ô@­U=]ÃL“A³K+KÛNG\¥»¤7¬ßep{W¥ái£,ã“0Ó@³}Ds+ 3KãÝ&V&Ö&6»mì¼íƒ¨Ž‡ ÷Ô;÷¹Lº!îröž ^ÞÃ>œûô}ýNûw‘VåȾA…Áý¡Êî°œðþH®(÷èrêgšiLnìßñš 9‰cÉÊ)GRçÓÌÓK3~r?Üœ%‘vämŽñÑ;y>ÇEó‡ ŠOœ‘>û±¨ö\Z‰ý޲ɋ— *ÉW¬®ª\ç»ñ£v©îý­™†‰Æù¦µ{ì­ÒíÑÝéË{Zú&ž2=# x¾8ñªwó†8š;>öNv*ufèýÎ>M/Ù,_]e[‹Ù˜ùnùó>¾ý“VàÐ;‚ƒpºáƒ!‘H2„â@Y¡²QX &ÑŠèdt£ c(c ‹¡`z˜˜r˜>1;17°ˆ³aùÂêÍÚ˦ÅVÁÎÃ~˜}#„ã-ÖÛÉ©ÅyKšë7÷î_<ñ<+¼ÞI¾½|¯ùøã,q†M‚:‚w…4„j…Õ„oЍ‰4‰‰v‹ÙŠ ‰û‰/I¤âYðE’;%;¤Ü¤¤³eÄešw¸ïX—-”3•ÏPSx¾3Q¯Ø«£,©üJ%SUCu†P¨f¥¶¥Þ¨¥)£9¢uBÛ^§³®;¥÷D¿ÑàÊ®s†9FÉÆ&~¦f{ˆVæ†:–š»5­´­µl m-íœì½‚“œòöT9·» »n¸‹y˜yFzy?õaÜGðõð;âß@š ä#[¥ׇÌQ¤ÃHáeQ¢Ñ~ÔÊýK1ê±Iq°‰îI%ɳ©Ú²Ó^dHŒ8ԜɑEÊ®=ò=Çþè­<ëc[ùÕ¾§„N?={¸H§xí|ciT™|ù|EÝå„*“jÞ«ï¯ß¯9s3¾>à¶íí»jÍ*÷4[ Û=;ÉݦÕz„ûØo=Ý|¶4ðáÅ×W?‡ÙÞjŽ{ü2Y1ýr=¯ù1|¡aiã‹áêÁµ› ßi?_ýé?€,è#„B&TB,"8„ˆÄ UÈJå†*DM0È2Ä2¯~­]Ë[§l˜mrmN|»ñ=ù‡õOŸã[¥¿ºét€í} X‰ÑÑ4¼Ñþ%2"î.`§ÄZ» Ó,À f¢€‘ бpùuƒ)–ÖÛ±ˆ 5ÖÉïäPWÀ ´0’Óoœá`8ä89ÈÜâwìeZœ³È áѶÎÀ€ŒE¹ýÃ5ïòÿ:™dn ¢(Lr(ÑaÛ%v@sÀC DCD (Ð@DÞˆX A$"€û!(@† ˆùq@K  B ”3üww˜Pþ !Úrˆy+8¾(:Iß=”pððð¿½mÿ0AÔŸLÛìÿèhY´:Z m†6D¡õÆ¡…A­‰ÖE›¢Ñh-´Þ³…» ²ÿT€ÿ“qhòµ*C0€ñ1s@ƒH?Ê¡ã`{WÀp”ø´o¦dü绊 JŒ FS“h”ÐX¼)•„'FGRãbƒhJxë¨@%¼: ð_ºLTÄl°Ã­gAMAØëõª cHRMm˜sŽâù†™xƒÔN3¼ycá,†IDATxÚ¬•=K3KÇw^7»y3qQ—@°-ìŒoˆù©ìÒê±llL-Ö‚…`Qµ *ØX(""¢˜Å8fwggžâä.¹>7ÞÇË=Ű,3¿=çÌÿi­ÿ;ðŸlúé‡ñ÷,¥””!d†”R)õ'è UJEQ†!¥ôêêjiiéåå^þ+—ö"J)Ã0äœ×ëõõõõ»»;Ïó²Ù,Bc ¹ÿª” ÃPJ)¥ÜØØØÚÚšžžÖZ7›MÇqc†aÀjлׯи‰Q5›Íµµµóóó¹¹¹L&ãûþÇÇG»ÝþB„€ŽÇE ¸AZë(Š‚ 0 ãööveeÅó¼R©¤µfŒضN§)¥„¸Š¢hyy¹P(†A¡1QJB¨Ñh¬®®öõõMMMI)µÖAÌÎζZ­(Š~Wçüäääþþ~``§Ý×¢µÞÝÝ­Õj£££Åb² ¥4—Ëý㵘¦™Ïç[­–‚sN¡P5¤¹¹¹¹½½]*•r¹\LŒ¹½”â(BÈ4Íc\«Õvvv*• B( ÃYHk-„ð}ß4M¥¨bdd$N?<<Bþƒß}ß‚¬AFŸŸŸããã‹‹‹777———œóïåý{„a›Çr ‚ÀuÝjµúôôtzzŠ1ŽSUqÎYÐZw'AAYŒ±D"Ásgaa¡^¯ïïï—ËåD"!¥4M³Ñh<>>Æ'Á{}}Íf³”RÐ/…Œ8ç¶mg³Ù Úív¥R9;;ÛÛÛ›ŸŸw!ôüü<<<œÏç£(4Ƙ1fÛv>Ÿw]·X,B(¥!Šh2™TJufÆår9“ÉMLLŒ1Ɔ††\×…$à!l–ËåR©”išƒ‚ácL)eŒMNN:Žs||Ün· !ýýý®ëš¦iš&pa›mÛÉd2“ÉX–Å9Çwl·c uÅ …ÂáááõõuµZu]ײ,˲cGRÊ9OüN·OÀ¯¾ï !<Ï{{{ó<xqq133388hÛv7´»2˜5„ôÅ|ñdB¼¿¿ !„RÊd2I)…JA'q÷aí ½x tT} „X–•J¥ÀÇœs˲€H)í>òE§¨×˜€)¿¥4.ó{¿¡oþb`ä(Š”RÝ„’¿‰_ãÎýŠ.’IEND®B`‚stacks-1.35/php/images/r-arrow-disabled.png000644 000765 000024 00000011405 12335173442 021407 0ustar00catchenstaff000000 000000 ‰PNG  IHDR"ÀN' pHYs  šœbiCCPPhotoshop ICC profilexÚ­Wy8Ôí×?ß1Æ>ö±3–,ÙÆ¾ed'; ÃXÆ:} zDIŠH‘¢$”½B¥¨DYž¬©ÈÒ¢yÿÐÓs]¿÷zß÷Ÿ÷üõ9ç:ç|Î}ŸûºÎ}¸ITj "£biλÍðž^Þx¦×Àœ€p‘c¨¦NNvð?Êúk@^*“¨ÔN¥3“Å ¦¾ñmáÚ‹u¿à,ÍÓËQ¾ml|ÛØøb©±H(ð†’ÈH*(Ñ\‰H5`C¶q`¶q/`ãCb O™À´€1"Å`•€LŽ ŒÀž@©DFF“¸:@>J‹àšeO/oüvÉä }4æí¿¶0I€š;¢’ÿÚäÚIu;ÿµ­:îyL°†: ìfŒtúª,ÓI€­:ýG¾u €a  #"0Žÿû¾¤àÿÒ·Ïü[J•ÁÊxSÌTÇÜÅ2Âú‰m…ƒ+À)ɥ̭ÏcÊkÇçÊïó¤Q…cDbEãÄÅS$2ð™’™RG¤ÊßqFö¼\¹|µÂÍMŠ=JCÊ“*ëV5)u 7Í­ríÇ:›z ú$ƒó»Æ$Œ)&wÍØ‰ûÌo[òì¦Y Ú¨Ú³ûâ`çX»‡Ó9Âå±›ˆ{„G›ÖÛooµÏW_¢ßQÿþÎ@GòÉ ç!¬¡¦”¸°ÚðÉHÞ(óèêýocб„8¿ø¼„û‰óÉÜ)F©Ôei=écSg-^Î\ÉZÏÞú Ã~”;—?Oì˜üq|ƒ¦V'÷œr?í}Æ÷l`!¥(¼8üå<¥$¬4ô©Ì»Üå¢m…É%µË •rU;®HU㯊]ºÎƒ»†«–ý&ãÍ_u›õ«·oÏ6ŒÞl|z·»©¹¹¾åƽ«÷/¶žm;Þ~´#³ó`Wr7íAÔèG=¤^b®oêñ­'éOmúúçž5<Ïp”\|Ññ²à•ßòÐ÷×O† GHo”ß|{ûh4Ì}\l|f¢æoÚ;íwß&[¦R¦õ§×ggSçLß3¼ï›Ïûàð‘ëãËO… ž‹‚‹#KEŸ=——‡¾¬8¬r¬ö}Í\3]£¯·m$ojm.«ùþCáÇÜÏŠ-ÿ_¿ÞÒ÷Óé(4ÇȇágâdfcafÙbýÆö™}†ã ¶Ÿ³“«ž»‚ç4o_8nŸ€£ QHOX[DITULE\[Âo!i,¥+­,£´CIv§A^I°S[‘¨d£ì©BQM"䨕ª7j<×\ÒæÖÑÔuÑKÖ¿b0lÈl¤e¼ß¤Ætž¨hnqÓrÓÊÂ:×fÄNÍ>ÕaÐIdOˆs£+âfï~ÒcÌKÁ›º÷–φ¯®ßÿvÒV 9>ènðj¨ %,ìZøÇHÙ(Ÿè"êkoŒklAÜãæD»¤ãÉS¦S?øœ¶–þ-ãûÁ_‡3Ù³x³…Žˆý%“C8ªŸk–g}Ìñ¸g¾Ï ÿòɰSѧ©ghg£ £Š¨Å‘ç‚Ïû—x–:]°(Ó.W¾¨\¡tIá²B¥|•ì™jé«2פ¯KÜ­®º)XÇ_Ïs‹ó6göG#Ç]î&\³`‹à=‘ûøV¹6•v­½N³.‡nï{î~„´ÒÓ×[ÒùØô‰À“ŧû‹ŸE>'ˆ ¬ö¿¨x™ðÊaHjhíußpÉHôÓ·ÜoÇGoŒ¥Ž[MpOŒþ]ñ.lReryªiúÄLè¬ñœÀÜâûóÅ¢>Z|úôqáþâ±¥}ŸU>ÿXîùrrÅwUauåë½µìu§ Á±ÍŠo”ï„ï›?îÿÌØ²ø…ýõœ¹ÝÔÃ2ú+ãgÌ"Ó"ó"Ëë(Û+ö'­ØZÎ2®\î2¯Ÿ“7(p]0SÈG˜ Â(2,Z)'n!Á)1ޝ”Œ”Ò—ÆH¿–¹º#MÖMNIžYþ½BÏÎjÅcJ±Ê{UÌTU¢j,j곃šZuÚ¥:9ºñz$};ƒ]»ð†Ãe£!ã;&§LãÍ(DsK ]KÙÝ‚VŒVëÖlÞØöÚ5ÚW99æ8%î qvw±s5pSr—ðÀyrx¡¼¾zØ;åóvß+ß~¿nÿ6Rk@Kà-ò¥ œ`Zˆ_¨E5L8œ)|-b&r0ª-º–Z±ÿ,í¯˜ÄØÐ8ßx‡£DÕ$Éd¾tÊjê»ÏÒÚÒ¯gœ>˜~(ø°c¦f.ÉÞ8²ø×tÎÛ£¹=y-Çn¯Î/9‘[~’zŠ|ÚíŒåYÍB™"þbÆâ¯çfÏ”ô—¶]¸QVZ~êbVEê¥èˤJª=W¬« ®J]ƒkc×ïß(ª¡Õ:ÞT©ÃÖ-Õ÷ߺz;·!úŽK£Ö]Üݦ±æÖ–Ò{i÷I­Ä¶íŒíÓÝU]™Ý¤†…®?è©ìÍí+yÜùdê)ý™ðs½ïÁÔ%/{_-¾vÉ}ót”w̼ùoÞwé“_§Îbç®Ì;}äþôz±öóÙ/«¥kõsßñ?ý¥ÑéÛ³£ Pà9 à\ ¿ Ï€«pâpÕ¤+ëY€˜åÿ3?ÌÀ â  Æà  —àÌ"¬ˆ:âƒü…4!Pb(7ÔqÔcf+†\†A´0šŒ®GÓ/1~Ã8a®1¡™HL̒̇™çYìYêY…Y3YØ<غØÕÙK9˜8â8¦°ŽØVNEÎ".&®®In'îVEž"^FÞ8ÞY>¾N~9Ž—…û&!ðNÐKð¹Ðn¡á]Â"Ú"-¢F¢]bVbâžâ³4 :>OR\ò¶”µÔ¤tŠ ŸLý§+²…rFrÓòy z s;Ï(š+®*U)ûªª ¨æ,Ô@­U=]ÃL“A³K+KÛNG\¥»¤7¬ßep{W¥ái£,ã“0Ó@³}Ds+ 3KãÝ&V&Ö&6»mì¼íƒ¨Ž‡ ÷Ô;÷¹Lº!îröž ^ÞÃ>œûô}ýNûw‘VåȾA…Áý¡Êî°œðþH®(÷èrêgšiLnìßñš 9‰cÉÊ)GRçÓÌÓK3~r?Üœ%‘vämŽñÑ;y>ÇEó‡ ŠOœ‘>û±¨ö\Z‰ý޲ɋ— *ÉW¬®ª\ç»ñ£v©îý­™†‰Æù¦µ{ì­ÒíÑÝéË{Zú&ž2=# x¾8ñªwó†8š;>öNv*ufèýÎ>M/Ù,_]e[‹Ù˜ùnùó>¾ý“VàÐ;‚ƒpºáƒ!‘H2„â@Y¡²QX &ÑŠèdt£ c(c ‹¡`z˜˜r˜>1;17°ˆ³aùÂêÍÚ˦ÅVÁÎÃ~˜}#„ã-ÖÛÉ©ÅyKšë7÷î_<ñ<+¼ÞI¾½|¯ùøã,q†M‚:‚w…4„j…Õ„oЍ‰4‰‰v‹ÙŠ ‰û‰/I¤âYðE’;%;¤Ü¤¤³eÄešw¸ïX—-”3•ÏPSx¾3Q¯Ø«£,©üJ%SUCu†P¨f¥¶¥Þ¨¥)£9¢uBÛ^§³®;¥÷D¿ÑàÊ®s†9FÉÆ&~¦f{ˆVæ†:–š»5­´­µl m-íœì½‚“œòöT9·» »n¸‹y˜yFzy?õaÜGðõð;âß@š ä#[¥ׇÌQ¤ÃHáeQ¢Ñ~ÔÊýK1ê±Iq°‰îI%ɳ©Ú²Ó^dHŒ8ԜɑEÊ®=ò=Çþè­<ëc[ùÕ¾§„N?={¸H§xí|ciT™|ù|EÝå„*“jÞ«ï¯ß¯9s3¾>à¶íí»jÍ*÷4[ Û=;ÉݦÕz„ûØo=Ý|¶4ðáÅ×W?‡ÙÞjŽ{ü2Y1ýr=¯ù1|¡aiã‹áêÁµ› ßi?_ýé?€,è#„B&TB,"8„ˆÄ UÈJå†*DM0È2Ä2¯~­]Ë[§l˜mrmN|»ñ=ù‡õOŸã[¥¿ºét€í} X‰ÑÑ4¼Ñþ%2"î.`§ÄZ» Ó,À f¢€‘ бpùuƒ)–ÖÛ±ˆ 5ÖÉïäPWÀ ´0’Óoœá`8ä89ÈÜâwìeZœ³È áѶÎÀ€ŒE¹ýÃ5ïòÿ:™dn ¢(Lr(ÑaÛ%v@sÀC DCD (Ð@DÞˆX A$"€û!(@† ˆùq@K  B ”3üww˜Pþ !Úrˆy+8¾(:Iß=”pððð¿½mÿ0AÔŸLÛìÿèhY´:Z m†6D¡õÆ¡…A­‰ÖE›¢Ñh-´Þ³…» ²ÿT€ÿ“qhòµ*C0€ñ1s@ƒH?Ê¡ã`{WÀp”ø´o¦dü绊 JŒ FS“h”ÐX¼)•„'FGRãbƒhJxë¨@%¼: ð_ºLTÄl°Ã­gAMAØëõª cHRMm˜sŽâù†™xƒÔN3¼ycá, IDATxÚ¼–ËKë@ÅÏ<[£Z°µˆ Š…úÿoÅ‚Y¹ÐÖGDM›vwá¹Ó½ ‚Ù$í÷ûgÎ ±ÖZüâ"¿ ¤Mÿ`­E˜£1MrntÁ'“ ÎÎÎðôôJ)Œ1KƒyÓʬµ¨ª ÷÷÷x}}ÅÑуŒ1 ô#BH³ ÃàácŒR ÓéI’ MS\]]!Ë2_åÿ*¥u³!„ø'üî*¥ µFEØØØÀíí-.//QU•ÿÝWPb­µÆ@Y–¸¸¸¥ô“8ÂJ§Ó)”RØÜÜ„1„ ‡CH)qzzŠ$I>ª¡Ô·Ù18p~~Ž^¯÷®6KB „ð‰¹nEªªprr‚n·[ %Zk«µ†1Ãá×××è÷ûÐZ/%¤p1Æðöö†¢(pxxˆ­­-B@)õIq'w¥f³Ù\ëš.­5Úí6„Ȳ eYbooÏWJuÁµÖPJýØIŒ1R¢ßïãææY–â‹ ¡ú–iãR~IÞßß!„Àúú:”R¾c<Ìì;¡, ¢”âññãñûûûHÓJ)/0^'÷ñ»À®3~CÿUbžçÐZcgg­V ZkH)?[¥RJŒÇcÜÝÝÍ áÖn·Ñív¡µ¥ZkäyÎ9¶··Ç1(¥`ŒyÁe»ººŠããc”e‰ªª0›ÍjÛ\Uʲô[a2™ Ïs$I‚^¯‡8ŽE¤”àœû½HùdŒAV«Æâ8†RjΦÜAc(Š£ÑNNQaeeÅCAÌíCJ)8ç¾×œsï—á¾táììáá/// X[[CEˆãqCJ !Ä×-eŒAJéç±èüÎÜûççg!°»»ë+ aœóO0tmrpr0§HgÖZ MS0Æ|²áÜ\¼Å³Ñßi‹ê ΕRsáÜ$IãKTx>†nЉâ«t-ü ÖøÖ¶xª;“p£Xœ×‹'IxÖÍ«nýÝàY4»;;ÕIEND®B`‚stacks-1.35/php/images/r-arrow.png000644 000765 000024 00000011647 12335173442 017652 0ustar00catchenstaff000000 000000 ‰PNG  IHDR­¢Ùp pHYs  šœbiCCPPhotoshop ICC profilexÚ­Wy8Ôí×?ß1Æ>ö±3–,ÙÆ¾ed'; ÃXÆ:} zDIŠH‘¢$”½B¥¨DYž¬©ÈÒ¢yÿÐÓs]¿÷zß÷Ÿ÷üõ9ç:ç|Î}ŸûºÎ}¸ITj "£biλÍðž^Þx¦×Àœ€p‘c¨¦NNvð?Êúk@^*“¨ÔN¥3“Å ¦¾ñmáÚ‹u¿à,ÍÓËQ¾ml|ÛØøb©±H(ð†’ÈH*(Ñ\‰H5`C¶q`¶q/`ãCb O™À´€1"Å`•€LŽ ŒÀž@©DFF“¸:@>J‹àšeO/oüvÉä }4æí¿¶0I€š;¢’ÿÚäÚIu;ÿµ­:îyL°†: ìfŒtúª,ÓI€­:ýG¾u €a  #"0Žÿû¾¤àÿÒ·Ïü[J•ÁÊxSÌTÇÜÅ2Âú‰m…ƒ+À)ɥ̭ÏcÊkÇçÊïó¤Q…cDbEãÄÅS$2ð™’™RG¤ÊßqFö¼\¹|µÂÍMŠ=JCÊ“*ëV5)u 7Í­ríÇ:›z ú$ƒó»Æ$Œ)&wÍØ‰ûÌo[òì¦Y Ú¨Ú³ûâ`çX»‡Ó9Âå±›ˆ{„G›ÖÛooµÏW_¢ßQÿþÎ@GòÉ ç!¬¡¦”¸°ÚðÉHÞ(óèêýocб„8¿ø¼„û‰óÉÜ)F©Ôei=écSg-^Î\ÉZÏÞú Ã~”;—?Oì˜üq|ƒ¦V'÷œr?í}Æ÷l`!¥(¼8üå<¥$¬4ô©Ì»Üå¢m…É%µË •rU;®HU㯊]ºÎƒ»†«–ý&ãÍ_u›õ«·oÏ6ŒÞl|z·»©¹¹¾åƽ«÷/¶žm;Þ~´#³ó`Wr7íAÔèG=¤^b®oêñ­'éOmúúçž5<Ïp”\|Ññ²à•ßòÐ÷×O† GHo”ß|{ûh4Ì}\l|f¢æoÚ;íwß&[¦R¦õ§×ggSçLß3¼ï›Ïûàð‘ëãËO… ž‹‚‹#KEŸ=——‡¾¬8¬r¬ö}Í\3]£¯·m$ojm.«ùþCáÇÜÏŠ-ÿ_¿ÞÒ÷Óé(4ÇȇágâdfcafÙbýÆö™}†ã ¶Ÿ³“«ž»‚ç4o_8nŸ€£ QHOX[DITULE\[Âo!i,¥+­,£´CIv§A^I°S[‘¨d£ì©BQM"䨕ª7j<×\ÒæÖÑÔuÑKÖ¿b0lÈl¤e¼ß¤Ætž¨hnqÓrÓÊÂ:×fÄNÍ>ÕaÐIdOˆs£+âfï~ÒcÌKÁ›º÷–φ¯®ßÿvÒV 9>ènðj¨ %,ìZøÇHÙ(Ÿè"êkoŒklAÜãæD»¤ãÉS¦S?øœ¶–þ-ãûÁ_‡3Ù³x³…Žˆý%“C8ªŸk–g}Ìñ¸g¾Ï ÿòɰSѧ©ghg£ £Š¨Å‘ç‚Ïû—x–:]°(Ó.W¾¨\¡tIá²B¥|•ì™jé«2פ¯KÜ­®º)XÇ_Ïs‹ó6göG#Ç]î&\³`‹à=‘ûøV¹6•v­½N³.‡nï{î~„´ÒÓ×[ÒùØô‰À“ŧû‹ŸE>'ˆ ¬ö¿¨x™ðÊaHjhíußpÉHôÓ·ÜoÇGoŒ¥Ž[MpOŒþ]ñ.lReryªiúÄLè¬ñœÀÜâûóÅ¢>Z|úôqáþâ±¥}ŸU>ÿXîùrrÅwUauåë½µìu§ Á±ÍŠo”ï„ï›?îÿÌØ²ø…ýõœ¹ÝÔÃ2ú+ãgÌ"Ó"ó"Ëë(Û+ö'­ØZÎ2®\î2¯Ÿ“7(p]0SÈG˜ Â(2,Z)'n!Á)1ޝ”Œ”Ò—ÆH¿–¹º#MÖMNIžYþ½BÏÎjÅcJ±Ê{UÌTU¢j,j곃šZuÚ¥:9ºñz$};ƒ]»ð†Ãe£!ã;&§LãÍ(DsK ]KÙÝ‚VŒVëÖlÞØöÚ5ÚW99æ8%î qvw±s5pSr—ðÀyrx¡¼¾zØ;åóvß+ß~¿nÿ6Rk@Kà-ò¥ œ`Zˆ_¨E5L8œ)|-b&r0ª-º–Z±ÿ,í¯˜ÄØÐ8ßx‡£DÕ$Éd¾tÊjê»ÏÒÚÒ¯gœ>˜~(ø°c¦f.ÉÞ8²ø×tÎÛ£¹=y-Çn¯Î/9‘[~’zŠ|ÚíŒåYÍB™"þbÆâ¯çfÏ”ô—¶]¸QVZ~êbVEê¥èˤJª=W¬« ®J]ƒkc×ïß(ª¡Õ:ÞT©ÃÖ-Õ÷ߺz;·!úŽK£Ö]Üݦ±æÖ–Ò{i÷I­Ä¶íŒíÓÝU]™Ý¤†…®?è©ìÍí+yÜùdê)ý™ðs½ïÁÔ%/{_-¾vÉ}ót”w̼ùoÞwé“_§Îbç®Ì;}äþôz±öóÙ/«¥kõsßñ?ý¥ÑéÛ³£ Pà9 à\ ¿ Ï€«pâpÕ¤+ëY€˜åÿ3?ÌÀ â  Æà  —àÌ"¬ˆ:âƒü…4!Pb(7ÔqÔcf+†\†A´0šŒ®GÓ/1~Ã8a®1¡™HL̒̇™çYìYêY…Y3YØ<غØÕÙK9˜8â8¦°ŽØVNEÎ".&®®In'îVEž"^FÞ8ÞY>¾N~9Ž—…û&!ðNÐKð¹Ðn¡á]Â"Ú"-¢F¢]bVbâžâ³4 :>OR\ò¶”µÔ¤tŠ ŸLý§+²…rFrÓòy z s;Ï(š+®*U)ûªª ¨æ,Ô@­U=]ÃL“A³K+KÛNG\¥»¤7¬ßep{W¥ái£,ã“0Ó@³}Ds+ 3KãÝ&V&Ö&6»mì¼íƒ¨Ž‡ ÷Ô;÷¹Lº!îröž ^ÞÃ>œûô}ýNûw‘VåȾA…Áý¡Êî°œðþH®(÷èrêgšiLnìßñš 9‰cÉÊ)GRçÓÌÓK3~r?Üœ%‘vämŽñÑ;y>ÇEó‡ ŠOœ‘>û±¨ö\Z‰ý޲ɋ— *ÉW¬®ª\ç»ñ£v©îý­™†‰Æù¦µ{ì­ÒíÑÝéË{Zú&ž2=# x¾8ñªwó†8š;>öNv*ufèýÎ>M/Ù,_]e[‹Ù˜ùnùó>¾ý“VàÐ;‚ƒpºáƒ!‘H2„â@Y¡²QX &ÑŠèdt£ c(c ‹¡`z˜˜r˜>1;17°ˆ³aùÂêÍÚ˦ÅVÁÎÃ~˜}#„ã-ÖÛÉ©ÅyKšë7÷î_<ñ<+¼ÞI¾½|¯ùøã,q†M‚:‚w…4„j…Õ„oЍ‰4‰‰v‹ÙŠ ‰û‰/I¤âYðE’;%;¤Ü¤¤³eÄešw¸ïX—-”3•ÏPSx¾3Q¯Ø«£,©üJ%SUCu†P¨f¥¶¥Þ¨¥)£9¢uBÛ^§³®;¥÷D¿ÑàÊ®s†9FÉÆ&~¦f{ˆVæ†:–š»5­´­µl m-íœì½‚“œòöT9·» »n¸‹y˜yFzy?õaÜGðõð;âß@š ä#[¥ׇÌQ¤ÃHáeQ¢Ñ~ÔÊýK1ê±Iq°‰îI%ɳ©Ú²Ó^dHŒ8ԜɑEÊ®=ò=Çþè­<ëc[ùÕ¾§„N?={¸H§xí|ciT™|ù|EÝå„*“jÞ«ï¯ß¯9s3¾>à¶íí»jÍ*÷4[ Û=;ÉݦÕz„ûØo=Ý|¶4ðáÅ×W?‡ÙÞjŽ{ü2Y1ýr=¯ù1|¡aiã‹áêÁµ› ßi?_ýé?€,è#„B&TB,"8„ˆÄ UÈJå†*DM0È2Ä2¯~­]Ë[§l˜mrmN|»ñ=ù‡õOŸã[¥¿ºét€í} X‰ÑÑ4¼Ñþ%2"î.`§ÄZ» Ó,À f¢€‘ бpùuƒ)–ÖÛ±ˆ 5ÖÉïäPWÀ ´0’Óoœá`8ä89ÈÜâwìeZœ³È áѶÎÀ€ŒE¹ýÃ5ïòÿ:™dn ¢(Lr(ÑaÛ%v@sÀC DCD (Ð@DÞˆX A$"€û!(@† ˆùq@K  B ”3üww˜Pþ !Úrˆy+8¾(:Iß=”pððð¿½mÿ0AÔŸLÛìÿèhY´:Z m†6D¡õÆ¡…A­‰ÖE›¢Ñh-´Þ³…» ²ÿT€ÿ“qhòµ*C0€ñ1s@ƒH?Ê¡ã`{WÀp”ø´o¦dü绊 JŒ FS“h”ÐX¼)•„'FGRãbƒhJxë¨@%¼: ð_ºLTÄl°Ã­gAMAØëõª cHRMm˜sŽâù†™xƒÔN3¼ycá,¯IDATxÚ¬U=K+Mž™=û‘ÍÆë*¨(ñ‚ BéÔB+K±°±Jg!bc¡? à°€lR)4XŠ¿£’ÍÎìîÜb¼{½z¯¯Â{Še`fŸsæœçy !Ðÿä[§¿Xù PEQ!„0ÆaÆqü9:ù ¢âáá¡T*Õj5àœGQÇñg¿}QµÛmJi½^™=88‚ ÕjA ó} ò.AÇqËBâ8Ã1F)}~~Îårù|~{{{gg' CÎ9çü¯õ’·]Ó_!·ã8æœAày^ŽãŒïíímmm===EQ$[ñ®ÅXÖ…º¼¼ÜÜÜYc†aE///”ÒééiÎ9ƸZ­Ú¶½¾¾>44„Ò4MQŒñ+h†a !ªÕêÊÊÊää$cì´#„E±, „c8==m6›kkkÅbQ¡iÈà /Èó<Ïqœžžž >©\pÎGGGÏÏÏ766J¥ÒÜÜcL„'(¥­V+ÔWΰ,«\.7¥¥%ü+ˆìšÅw%Ë9·m{jjjww·\.BäЈBòæ_·þ<E¹¸¸Èd2ÃÃÔR ¥8ç߂ëªZ«Õnnn–——óù¼ïûªª¾‚¾…ªªí)ÆXÁKÈ€ªT*¾ï/,,ôööÊ­×é#„!`Ûöõõõþþ¾´Œ·ž„1Žã¸¿¿¿X,2Æ Ýnš¦9??ﺮªª†a¨ª*“!DQBÈàààêêj£Ñh6›žç½• !äñññêê c¬iÚýý}¥RÉår®ëf³YÛ¶MÓ” „€la–eõõõ†áº.¥4±8) MÓîîît]¯×ëÕjµP( Ék‰›N§]!DÓ´T*eÛ6Æ8•Jù¾/H*8Æç\Q”ããã³³³™™™?~8ŽÓÝÝÝÕÕ•Íf-Ëz}H¥RRÂétZÚÔç\æàœŸœœX–µ¸¸è8N&“q]×uÝŽŽŽt:­ëz‚ˆ‰EI¶JûO@)¥···GGGcccº®ëºnÛvgg§l¥®ëR¿©’8«J¾‰f¤I{žE‘çy`š¦iš™LF'ñ‘?@?¾kr!_$)6ß÷¥u*Š"‹5 ãã%]îíà7‘% EUU iâ{¥ü§‰$ÌëûCˆ¬î_ˆ¡Ÿ wÛŸx­¶IEND®B`‚stacks-1.35/php/images/stacks_bg.png000644 000765 000024 00000002035 12335173442 020210 0ustar00catchenstaff000000 000000 ‰PNG  IHDR#Kú~>nîiCCPICC Profilex…TÏkAþ6n©Ð"Zk²x"IY«hEÔ6ýbk Û¶Ed3IÖn6ëî&µ¥ˆäâÑ*ÞEí¡ÿ€zðd/J…ZE(Þ«(b¡-ñÍnL¶¥êÀÎ~óÞ7ï}ovß rÒ4õ€ä ÇR¢il|BjüˆŽ¢ A4%UÛìN$Aƒsù{çØz[VÃ{ûw²w­šÒ¶š„ý@àGšÙ*°ïq Yˆ<ß¡)ÇtßãØòì9NyxÁµ+=ÄY"|@5-ÎM¸SÍ%Ó@ƒH8”õqR>œ×‹”×infÆÈ½O¦»Ìî«b¡œNö½ô~N³Þ>Â! ­?F¸žõŒÕ?âaá¤æÄ†=5ôø`·©ø5Â_M'¢TqÙ. ñ˜®ýVòJ‚p8Êda€sZHO×Lnøº‡}&ׯâwVQáygÞÔÝïEÚ¯0  š HPEa˜°P@†<14²r?#«“{2u$j»tbD±A{6Ü=·Q¤Ý<þ("q”Cµ’üAþ*¯ÉOåyùË\°ØV÷”­›šºòà;Å噹×ÓÈãsM^|•Ôv“WG–¬yz¼šì?ìW—1æ‚5Äs°ûñ-_•Ì—)ŒÅãUóêK„uZ17ߟl;=â.Ï.µÖs­‰‹7V›—gýjHû“æUùO^õñügÍÄcâ)1&vŠç!‰—Å.ñ’ØK« â`mÇ•†)Òm‘ú$Õ``š¼õ/]?[x½F õQ”ÌÒT‰÷Â*d4¹oúÛÇüä÷ŠçŸ(/làÈ™ºmSqï¡e¥ns®¿Ñ}ð¶nk£~8üX<«­R5Ÿ ¼v‡zè)˜Ó––Í9R‡,Ÿ“ºéÊbRÌPÛCRR×%×eK³™UbévØ™Ón¡9B÷ħJe“ú¯ñ°ý°Rùù¬RÙ~NÖ—úoÀ¼ýEÀx‹‰ pHYs  šœÕIDATh혱ƒ@ …G-9w'î¿ 08Ö¥ü2³„G€fï^z±½?ߣ Ï RÇ¿ ‹InHF2‰@ÒÍŒd¤›É$I73’I’nf$“$ÝÌH&Hº™‘L"t3#™D éfædºÌŸ×Be¦kãiK…ÊLsLª:mâ”C³ DfSš÷ë]¥M#ö³ÏHf&suà}¬r…H+Ìß„Ù4¹D¥‘&TŸ!íM½sFl;ð>º˜ÛAs]!Ò«ûumJÌa—+Ðpú¶«He5ñîæIEND®B`‚stacks-1.35/php/images/stacks_logo_rev_small.png000644 000765 000024 00000014166 12335173442 022634 0ustar00catchenstaff000000 000000 ‰PNG  IHDR|AúÀÍîiCCPICC Profilex…TÏkAþ6n©Ð"Zk²x"IY«hEÔ6ýbk Û¶Ed3IÖn6ëî&µ¥ˆäâÑ*ÞEí¡ÿ€zðd/J…ZE(Þ«(b¡-ñÍnL¶¥êÀÎ~óÞ7ï}ovß rÒ4õ€ä ÇR¢il|BjüˆŽ¢ A4%UÛìN$Aƒsù{çØz[VÃ{ûw²w­šÒ¶š„ý@àGšÙ*°ïq Yˆ<ß¡)ÇtßãØòì9NyxÁµ+=ÄY"|@5-ÎM¸SÍ%Ó@ƒH8”õqR>œ×‹”×infÆÈ½O¦»Ìî«b¡œNö½ô~N³Þ>Â! ­?F¸žõŒÕ?âaá¤æÄ†=5ôø`·©ø5Â_M'¢TqÙ. ñ˜®ýVòJ‚p8Êda€sZHO×Lnøº‡}&ׯâwVQáygÞÔÝïEÚ¯0  š HPEa˜°P@†<14²r?#«“{2u$j»tbD±A{6Ü=·Q¤Ý<þ("q”Cµ’üAþ*¯ÉOåyùË\°ØV÷”­›šºòà;Å噹×ÓÈãsM^|•Ôv“WG–¬yz¼šì?ìW—1æ‚5Äs°ûñ-_•Ì—)ŒÅãUóêK„uZ17ߟl;=â.Ï.µÖs­‰‹7V›—gýjHû“æUùO^õñügÍÄcâ)1&vŠç!‰—Å.ñ’ØK« â`mÇ•†)Òm‘ú$Õ``š¼õ/]?[x½F õQ”ÌÒT‰÷Â*d4¹oúÛÇüä÷ŠçŸ(/làÈ™ºmSqï¡e¥ns®¿Ñ}ð¶nk£~8üX<«­R5Ÿ ¼v‡zè)˜Ó––Í9R‡,Ÿ“ºéÊbRÌPÛCRR×%×eK³™UbévØ™Ón¡9B÷ħJe“ú¯ñ°ý°Rùù¬RÙ~NÖ—úoÀ¼ýEÀx‹‰ pHYsgŸÒR.IDATxí› ey€ÏžÝ„% „pÉXÂݨT" T*E…)hŠ„¡`§Ni±H™¡GEĩР"Bi(·JB…àÀ€ÐŠX.5á „p IövNŸç?ÿ»ùöÏž³›M¶2çežýîß÷~ïûÝÎ9¡­Z­–ÆPÚé[F#}4ªŒ¦a«M} ´±ÃëÜ*iŠÊMµ5hÓ,ÐrxÓLßœ[oŽÝ›6jËáM3}sn9¼9voÚ¨c<² JFóÙÏd£i7ÆSzgw?Ö·ÿq£4Ñ:Úõ²m«Y Œõçð6ƕш»»µÃGc¹mÆz‡·œÖÀøÍ(j=Úšaõ&ŽÙrxߌ¡ÇúHŸÀ¤dcîâ¸óߤ]O3Œ²%9ÖïÆx¾´7Æáao-kÉf¶ÀX¿Ò7³º­î6Õc½Ã·BÁNÍ_C»wò.÷jJßHÚ é¿ïµÃýÒek¨¾Uz«4‘ÿ†w¸×Á;ÙáÎåm÷ÅÑHŽôX©áˆÔgNêm±rS¥ÞFq¬Û€ ÷up‡kGmÖi´Ã=ŽÄU:ÜJÆ4T½¡ÊH&Û4£ŒD¹:ub®ê~\;‚v¼þüÊØtSŽ÷z×:/UÊ<ÏdCó½c{A GGY8k{Ê$íÇú$Œö•ÞjTñm^önô›—è¨í.„eð¶r¸N º:? *¾/LbQ¯Àø/¸ž†hÇ–Ÿ¥]Š‹À‰Úˆ ¡Þbx§Ïõ¯b“6ç¼ b#Dvø ‹;<œmx œïk Íµ²êï™ÔMÄ/ƒÅà„tº Àx¤unq²Ž•æÅOó¨òN•6yHÓç”:<œ½-Ú}¾k©’á„ c,Ç-µµµ•Ú;;³rëP×׿'D•>ê’R¬+Ø6(–mLÚþ]¡Å»×¾=½ª6·8–ã†8V£qÔÏú#Ò1uøî4rK9;á&--Eg»+‡’0ŠGýci…µÝ=­Ú¾õÌrµº¦¿gUwçnûoÛ¾ÝÔõUú{Æu/{b§êªÕÒV“&–ª•¾R¥ZS¾s××­ÅœT¶X](£‘øÕn8§Q½bYq¸1ì_{ §G±ÿziǵ?NÂn’zõµ—:Z'[4шtör6Tùp¸Ç¶ÒhB–E›ré†*¥ãŽËXyý¡óªåI‡—Ê«+o¬èŸ|Üu{M:èx†®¶±½K}+_™²òÖsŽè]z×[Ûï5®Òû|©m⸶ʊþꛥGf\Z}ÖsÀŒ¡®ö«B\ ® ÉÐÞÛÛÛ?nÜ8u~ £®môp¢œ{jÄíHOƒ‰ öëµå'ˆíŸ&u¥¨c1­nŽŸêùžÚ<™Õý xÜ„¡£©b§!áT;TxpõïT›ÿ #¦Vºá_ѳ£}iÒáÿ²´cÆž»–Êm¥þÞžŽ)ÓwÌšy–#åIS:'}ñ¥ÞžžR{G{©R©0šN•r­nma†l6†?„90 ü˜WÆÙDG8ÙßÀÏ`!h„Ø=D‡ ¢XW|Œ¾K3ûˆÐÇç2øo¸mœ“¨ÃÆŠ»PÇøð= üîC{Úßð¯`¿ÎÁÐñÔç8æTKU>ޤyðò*µ©ã-ð˜_fïT³P? !}y䇄Ö)-Y²d«ˆr‚×Ú~úa ¬‚nH%ÊV“ùZO9²±ópG¯¯!ô#ÚPÖRú}˜ öÕž‡i¿ÆS÷'}+ôÀpò&ìDÿÑ× yãJ.&Ü9©WÓáñêø<¯“ðz(Ê%d„¾y|?Â;Šë¤W:d}„’†{Wª•—óFá {fT&T¹˜P(Q Êû{»çS¿(a€‹åµtÿeD¢ïéÄG:Áè/uØÍdn“÷F‹¾]ð±èÿ˜øó2Ò…µÛƒ}Fÿ#qø¸¼í®€7ˆ²v´ß÷‘~ †çì&*ÊCdL»½´˜ãÀ#ðdð(ñHñùx\\Þ ŠGžG‹uŠb~v´•;ÆßKüsà‘e[,®‰¬­u|züzœ9–ÇŽa[©\¾›0d‘Ã#‘‡Ï>OÀkyžýŽÇ\ÞóGÃép1xf÷¡:ˆã¾®ïÃnêò4,‚*¥J¹\*Ï$~ x„|œˆãzÄ;ÿáÄ:qŒ[WÛžfÑ&×ÀI° ´·z«Ó·aО¦×µp?øÝ…ývÁÁpLß!ÞïhW[=qÔyT¼ Š»<]áw“>¢n¶ê,X¦£¿Ýí]°'LƒóAé­ÕG ç‚;xØö‚.HWÿq¤C â©cŸ§ ñÓÐݧ•W…òì–Gß¡»'×B@*^¹d§Lì°´ð38ÓöYò¬:©ƒ'Ùbâ;ƒuÒëñ+¤Cò±+ÎqW°®'FèyñTœÛŸAª[ÄcÜ?‚­aÀᡤ•O‚"¢£î„ ë€Ð6*”öa^“ƃ“‰+ѧKû‰zÆDü5 q²£‰0êÔ Ï¦N*Žù°~è:ŸW 瘼êõù³©óSp“¸pÍot¤Ç]¶:ú!¹³«Ú¤ ¢¯°éo€z^KÑ6 㣈§»÷˜w–ò#ðμï ï†nº÷.Q¼W¯„»à3 Ä½_KÕî3ïïh¥xç{—E™õìÛ´¨O*ÖUÔC½­cýÈ'š‰ý(ËáÙ,¶¾¯ü£^–ëÝ­¼üˆ§8Oå:ð³¬cy§:Žú¨è¨Þ­oÂ£à¸æõ!ˇ ŸI#ëJp<û÷~ý%ü)<öëXözxCÅû˜‰x;¶uE]Ã>ö%™X)•Ôa÷Sp|~•WÚ Åmœ:ñ Ò7Ãeà)íƒä …Så-Ke¸2ÇU_'â„Ç$‰fb~CCúhKe`âdF½ýˆO†X`¯¿”¨SK þk}ï&õoTŸâ쇨}©v5qRÚQg?C9›ìAòTž ]%ýùÏ€újÇ¢¨Côñ(ó¸„ôù`{ë{Bœ .š[áßàÐþáÛgó­çpʳɦ Œ{WØá-p ̇i c„»ÞÅðï°TÖ2ÛnŠ„³ícø, {ƒßh¬zâ÷êPO<Ε0ª;1aVaþØ»Ñ+O‰Åæ›bOÐñ‘Gt±Ì9η_Áæó¶û¶ÝøóWpü®›À¯‚•¬½† ÇkxÛx\^Ÿ‚§Ac™o=ã'’iR‹Žúoôëâ<—Ýzá|8ÜÙœMñ€óŒE]Ý!©t§‰ÍwœÅàNtA¹;Ý<~)ò=˜ÚÔ SOôƒö·ž'è©ð$ v? öýqø1üŽÅöåF;<«•ü±bï¬3†/Ãp{ÄX6¦ÂrØÑÙ™¢„—éCÍ.¸gàð”±Fqn]À´ùEq1¯(Û›D›FaèïÂü1è A{©ïl¸ŽßL:Ýü¡Ä ¥} áŸËù¡}+Ž©ŸÌ» ÜõWBeÐg4>»mL:>ÇÚæ?@ñ‹Å/ìß –¥õþ‚´Ÿ‹ýŽw*XÏÏéøégÐôK”ö'Â,(~þ~n¢LYW ²_¢,¨ӗ׹0ÊÒ϶‘×(Luô9œ>Ÿ…øÄ1”ønü›}§6‹¼4T/NJŰÑñ”UÊÿÔ×|w /Ú3óºž îÀ{áH¸^W|HìdÈGYƸ+ÒLâÓ`R!o¸dôÕ¨ž»vÛ¼ÂÙ„KÀ]ï®V¾'f±õ§ZžÜ ˆS)æhßwƒ;ùcp3(^Wkóвö0ˆ ‹ÆŒ<¢ÃŠQŠÖ*Q”¨ù¦‹y–…>Æ=–v7‚h û¯‚‹ÊÅ¥Îb»XtÖ5O)êg^”-5‘È.Ä»òtÔIŠ7ˆÆøCQ¬ ó·ü5¸€'@8ý[Ä÷:ÜØÚÍz†Ö =#îõp=PX {Ì%¹›U4»kíDC7 ÙN™] vš¿^y§¥hÓ‘}ÙÆx¤»ÌHd)q'¥¨«F4ŒÉG»uäÅ,òÈ7ú øÃÎk5²íµ“ã‹cDŸD‡uT¬çÝ:ߢÓÕw&ø±k;ÐöbutS¦ã8®ý†îjãÞÙkyó¸ðO—ÉNÈx4¸ºw‘8 8uÅxº{d…bBÿCåPIE%âøŒÇ’¡ÇRBáZª¦oŒy†ALï]Q0DFûeÏååq*OÚqÝ>®ì×úó7n}õÿ4xõ¨—ùõ$Êâqåã톼²óÖ>‡À×ò<û»è'Ë]JêƒZÎàE·šÌøHf¹ºU¨ï"±î€¿„@q@q ¬A×æÏ€o°Š\‘Ö çÜH\q‚©ÞÈr&QMzß…@C…шn Ï“³0ϵ®òa87‹Õî@ûµLŒ;çÕ?_˶©h;õvŽçÀã Žæ+gÁiY¬6žQwîQ°/¨Gè`»˜¿¡w¶¢\ô!^¯Å+ïÈõ¿¸e<Åú[ø økPԋЗµ¿öøRVüE(^ÂÿIÜ_Ȭ¯çxuHÞ*Pâ¥~ñø×(¶ñõù˜¦ßÑ&^è?#/~Í "ÜŸ²…¢nñ¯@¾J<ê©[¼ÂÕk (ñ²õÕ>âSD´34ïTxB¾H$­S|¥/¦|ç¼NÑ.‡‘sô7n Zy`DŸþ"ggát˜QV µ™ÿÖ@‰_=B|`å¯LN"w×sVþ–‚?*¸²¦‚«Ç:Š+Mñ˜z\±öáîs(q”-&þÌW³+òXð ˆ… ¼>_‡‹Á#÷øØŸ:øB¿®ût§¨×!p"ì !¶‰“,ò í'äA"ßóÀä.ñ(?>÷Â2Pì{ÄüÝñóž,J:oÓ1wã阦Ã>Îýà"˜ëòSó‰´«o‹‰àòÏpÜ ÀK`_~ëø8 º` ØF¯4¨­Cv§…¸º{"Q'LwµUž†ì³aúyÔ1b'? B\ͱû"/û¿G$ô;€ø‹yº¥Ÿ__"½ b%çÕª¿%b–…ü‘¬ÏÕ©»Íß½¯Š„ë ‘ œÈÃDºÀ¾£¿ã‰+ö£<Åný8ýŒ» ÝÝ= m”+Á²½ N¡tlO½×`mìBH”¥a6pž1™p6ìNx¼KA J,$ï*Ûy¯éTãZ«Aqë;†ý›ï¢ˆ>ˆn êÚÀùï ¶×kÁ>žËC‚Ll§mŠs×n¾Ot²Îómã<ŠõÈÊò -sNSÁ~]àn¦b»mÉÛ¦ƒsÔñúƺ¿2`óÔáÚ(&ÝÈ0Ñ‘¡Ž¶®©„3†šxô¡.EKã5ÛÉP°]Ñp楢n¶nÛÔ›ûpcØv(±RÏ.ê¥~#ÑÍ~ÔO; رèp+)lÇ©¡„ƒÆ„uFþƈ}¸¸¢/û7.ö+CILÚzN$Ú™o<=e¬“ޱ1ú†ãé"ë7ƱOe¸EžŽõÓÓ#ë¤Îuv>!¶pñÔêeæ;®yCεžÃ©ß’-Ñé*ÚçךSÁ-‡ ²¥'[ßÒ=\˜_Ëáƒléɖ÷tæ×rxÁ [zòÿBéuï#ðIEND®B`‚stacks-1.35/config/compile000755 000765 000024 00000016245 12540305552 016336 0ustar00catchenstaff000000 000000 #! /bin/sh # Wrapper for compilers which do not understand '-c -o'. scriptversion=2012-10-14.11; # UTC # Copyright (C) 1999-2013 Free Software Foundation, Inc. # Written by Tom Tromey . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . nl=' ' # We need space, tab and new line, in precisely that order. Quoting is # there to prevent tools from complaining about whitespace usage. IFS=" "" $nl" file_conv= # func_file_conv build_file lazy # Convert a $build file to $host form and store it in $file # Currently only supports Windows hosts. If the determined conversion # type is listed in (the comma separated) LAZY, no conversion will # take place. func_file_conv () { file=$1 case $file in / | /[!/]*) # absolute file, and not a UNC file if test -z "$file_conv"; then # lazily determine how to convert abs files case `uname -s` in MINGW*) file_conv=mingw ;; CYGWIN*) file_conv=cygwin ;; *) file_conv=wine ;; esac fi case $file_conv/,$2, in *,$file_conv,*) ;; mingw/*) file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` ;; cygwin/*) file=`cygpath -m "$file" || echo "$file"` ;; wine/*) file=`winepath -w "$file" || echo "$file"` ;; esac ;; esac } # func_cl_dashL linkdir # Make cl look for libraries in LINKDIR func_cl_dashL () { func_file_conv "$1" if test -z "$lib_path"; then lib_path=$file else lib_path="$lib_path;$file" fi linker_opts="$linker_opts -LIBPATH:$file" } # func_cl_dashl library # Do a library search-path lookup for cl func_cl_dashl () { lib=$1 found=no save_IFS=$IFS IFS=';' for dir in $lib_path $LIB do IFS=$save_IFS if $shared && test -f "$dir/$lib.dll.lib"; then found=yes lib=$dir/$lib.dll.lib break fi if test -f "$dir/$lib.lib"; then found=yes lib=$dir/$lib.lib break fi if test -f "$dir/lib$lib.a"; then found=yes lib=$dir/lib$lib.a break fi done IFS=$save_IFS if test "$found" != yes; then lib=$lib.lib fi } # func_cl_wrapper cl arg... # Adjust compile command to suit cl func_cl_wrapper () { # Assume a capable shell lib_path= shared=: linker_opts= for arg do if test -n "$eat"; then eat= else case $1 in -o) # configure might choose to run compile as 'compile cc -o foo foo.c'. eat=1 case $2 in *.o | *.[oO][bB][jJ]) func_file_conv "$2" set x "$@" -Fo"$file" shift ;; *) func_file_conv "$2" set x "$@" -Fe"$file" shift ;; esac ;; -I) eat=1 func_file_conv "$2" mingw set x "$@" -I"$file" shift ;; -I*) func_file_conv "${1#-I}" mingw set x "$@" -I"$file" shift ;; -l) eat=1 func_cl_dashl "$2" set x "$@" "$lib" shift ;; -l*) func_cl_dashl "${1#-l}" set x "$@" "$lib" shift ;; -L) eat=1 func_cl_dashL "$2" ;; -L*) func_cl_dashL "${1#-L}" ;; -static) shared=false ;; -Wl,*) arg=${1#-Wl,} save_ifs="$IFS"; IFS=',' for flag in $arg; do IFS="$save_ifs" linker_opts="$linker_opts $flag" done IFS="$save_ifs" ;; -Xlinker) eat=1 linker_opts="$linker_opts $2" ;; -*) set x "$@" "$1" shift ;; *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) func_file_conv "$1" set x "$@" -Tp"$file" shift ;; *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) func_file_conv "$1" mingw set x "$@" "$file" shift ;; *) set x "$@" "$1" shift ;; esac fi shift done if test -n "$linker_opts"; then linker_opts="-link$linker_opts" fi exec "$@" $linker_opts exit 1 } eat= case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: compile [--help] [--version] PROGRAM [ARGS] Wrapper for compilers which do not understand '-c -o'. Remove '-o dest.o' from ARGS, run PROGRAM with the remaining arguments, and rename the output as expected. If you are trying to build a whole package this is not the right script to run: please start by reading the file 'INSTALL'. Report bugs to . EOF exit $? ;; -v | --v*) echo "compile $scriptversion" exit $? ;; cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) func_cl_wrapper "$@" # Doesn't return... ;; esac ofile= cfile= for arg do if test -n "$eat"; then eat= else case $1 in -o) # configure might choose to run compile as 'compile cc -o foo foo.c'. # So we strip '-o arg' only if arg is an object. eat=1 case $2 in *.o | *.obj) ofile=$2 ;; *) set x "$@" -o "$2" shift ;; esac ;; *.c) cfile=$1 set x "$@" "$1" shift ;; *) set x "$@" "$1" shift ;; esac fi shift done if test -z "$ofile" || test -z "$cfile"; then # If no '-o' option was seen then we might have been invoked from a # pattern rule where we don't need one. That is ok -- this is a # normal compilation that the losing compiler can handle. If no # '.c' file was seen then we are probably linking. That is also # ok. exec "$@" fi # Name of file we expect compiler to create. cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` # Create the lock directory. # Note: use '[/\\:.-]' here to ensure that we don't use the same name # that we are using for the .o file. Also, base the name on the expected # object file name, since that is what matters with a parallel build. lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d while true; do if mkdir "$lockdir" >/dev/null 2>&1; then break fi sleep 1 done # FIXME: race condition here if user kills between mkdir and trap. trap "rmdir '$lockdir'; exit 1" 1 2 15 # Run the compile. "$@" ret=$? if test -f "$cofile"; then test "$cofile" = "$ofile" || mv "$cofile" "$ofile" elif test -f "${cofile}bj"; then test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" fi rmdir "$lockdir" exit $ret # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: stacks-1.35/config/depcomp000755 000765 000024 00000056016 12540305553 016336 0ustar00catchenstaff000000 000000 #! /bin/sh # depcomp - compile a program generating dependencies as side-effects scriptversion=2013-05-30.07; # UTC # Copyright (C) 1999-2013 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Alexandre Oliva . case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: depcomp [--help] [--version] PROGRAM [ARGS] Run PROGRAMS ARGS to compile a file, generating dependencies as side-effects. Environment variables: depmode Dependency tracking mode. source Source file read by 'PROGRAMS ARGS'. object Object file output by 'PROGRAMS ARGS'. DEPDIR directory where to store dependencies. depfile Dependency file to output. tmpdepfile Temporary file to use when outputting dependencies. libtool Whether libtool is used (yes/no). Report bugs to . EOF exit $? ;; -v | --v*) echo "depcomp $scriptversion" exit $? ;; esac # Get the directory component of the given path, and save it in the # global variables '$dir'. Note that this directory component will # be either empty or ending with a '/' character. This is deliberate. set_dir_from () { case $1 in */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; *) dir=;; esac } # Get the suffix-stripped basename of the given path, and save it the # global variable '$base'. set_base_from () { base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` } # If no dependency file was actually created by the compiler invocation, # we still have to create a dummy depfile, to avoid errors with the # Makefile "include basename.Plo" scheme. make_dummy_depfile () { echo "#dummy" > "$depfile" } # Factor out some common post-processing of the generated depfile. # Requires the auxiliary global variable '$tmpdepfile' to be set. aix_post_process_depfile () { # If the compiler actually managed to produce a dependency file, # post-process it. if test -f "$tmpdepfile"; then # Each line is of the form 'foo.o: dependency.h'. # Do two passes, one to just change these to # $object: dependency.h # and one to simply output # dependency.h: # which is needed to avoid the deleted-header problem. { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" } > "$depfile" rm -f "$tmpdepfile" else make_dummy_depfile fi } # A tabulation character. tab=' ' # A newline character. nl=' ' # Character ranges might be problematic outside the C locale. # These definitions help. upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ lower=abcdefghijklmnopqrstuvwxyz digits=0123456789 alpha=${upper}${lower} if test -z "$depmode" || test -z "$source" || test -z "$object"; then echo "depcomp: Variables source, object and depmode must be set" 1>&2 exit 1 fi # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. depfile=${depfile-`echo "$object" | sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" # Avoid interferences from the environment. gccflag= dashmflag= # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case # here, because this file can only contain one case statement. if test "$depmode" = hp; then # HP compiler uses -M and no extra arg. gccflag=-M depmode=gcc fi if test "$depmode" = dashXmstdout; then # This is just like dashmstdout with a different argument. dashmflag=-xM depmode=dashmstdout fi cygpath_u="cygpath -u -f -" if test "$depmode" = msvcmsys; then # This is just like msvisualcpp but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvisualcpp fi if test "$depmode" = msvc7msys; then # This is just like msvc7 but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvc7 fi if test "$depmode" = xlc; then # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. gccflag=-qmakedep=gcc,-MF depmode=gcc fi case "$depmode" in gcc3) ## gcc 3 implements dependency tracking that does exactly what ## we want. Yay! Note: for some reason libtool 1.4 doesn't like ## it if -MD -MP comes after the -MF stuff. Hmm. ## Unfortunately, FreeBSD c89 acceptance of flags depends upon ## the command line argument order; so add the flags where they ## appear in depend2.am. Note that the slowdown incurred here ## affects only configure: in makefiles, %FASTDEP% shortcuts this. for arg do case $arg in -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; *) set fnord "$@" "$arg" ;; esac shift # fnord shift # $arg done "$@" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi mv "$tmpdepfile" "$depfile" ;; gcc) ## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. ## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. ## (see the conditional assignment to $gccflag above). ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like ## -MM, not -M (despite what the docs say). Also, it might not be ## supported by the other compilers which use the 'gcc' depmode. ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then gccflag=-MD, fi "$@" -Wp,"$gccflag$tmpdepfile" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The second -e expression handles DOS-style file names with drive # letters. sed -e 's/^[^:]*: / /' \ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" ## This next piece of magic avoids the "deleted header file" problem. ## The problem is that when a header file which appears in a .P file ## is deleted, the dependency causes make to die (because there is ## typically no way to rebuild the header). We avoid this by adding ## dummy dependencies for each header file. Too bad gcc doesn't do ## this for us directly. ## Some versions of gcc put a space before the ':'. On the theory ## that the space means something, we add a space to the output as ## well. hp depmode also adds that space, but also prefixes the VPATH ## to the object. Take care to not repeat it in the output. ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; sgi) if test "$libtool" = yes; then "$@" "-Wp,-MDupdate,$tmpdepfile" else "$@" -MDupdate "$tmpdepfile" fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files echo "$object : \\" > "$depfile" # Clip off the initial element (the dependent). Don't try to be # clever and replace this with sed code, as IRIX sed won't handle # lines with more than a fixed number of characters (4096 in # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # the IRIX cc adds comments like '#:fec' to the end of the # dependency line. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ | tr "$nl" ' ' >> "$depfile" echo >> "$depfile" # The second pass generates a dummy entry for each header file. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ >> "$depfile" else make_dummy_depfile fi rm -f "$tmpdepfile" ;; xlc) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; aix) # The C for AIX Compiler uses -M and outputs the dependencies # in a .u file. In older versions, this file always lives in the # current directory. Also, the AIX compiler puts '$object:' at the # start of each line; $object doesn't have directory information. # Version 6 uses the directory in both cases. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then tmpdepfile1=$dir$base.u tmpdepfile2=$base.u tmpdepfile3=$dir.libs/$base.u "$@" -Wc,-M else tmpdepfile1=$dir$base.u tmpdepfile2=$dir$base.u tmpdepfile3=$dir$base.u "$@" -M fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done aix_post_process_depfile ;; tcc) # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 # FIXME: That version still under development at the moment of writing. # Make that this statement remains true also for stable, released # versions. # It will wrap lines (doesn't matter whether long or short) with a # trailing '\', as in: # # foo.o : \ # foo.c \ # foo.h \ # # It will put a trailing '\' even on the last line, and will use leading # spaces rather than leading tabs (at least since its commit 0394caf7 # "Emit spaces for -MD"). "$@" -MD -MF "$tmpdepfile" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. # We have to change lines of the first kind to '$object: \'. sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" # And for each line of the second kind, we have to emit a 'dep.h:' # dummy dependency, to avoid the deleted-header problem. sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" rm -f "$tmpdepfile" ;; ## The order of this option in the case statement is important, since the ## shell code in configure will try each of these formats in the order ## listed in this file. A plain '-MD' option would be understood by many ## compilers, so we must ensure this comes after the gcc and icc options. pgcc) # Portland's C compiler understands '-MD'. # Will always output deps to 'file.d' where file is the root name of the # source file under compilation, even if file resides in a subdirectory. # The object file name does not affect the name of the '.d' file. # pgcc 10.2 will output # foo.o: sub/foo.c sub/foo.h # and will wrap long lines using '\' : # foo.o: sub/foo.c ... \ # sub/foo.h ... \ # ... set_dir_from "$object" # Use the source, not the object, to determine the base name, since # that's sadly what pgcc will do too. set_base_from "$source" tmpdepfile=$base.d # For projects that build the same source file twice into different object # files, the pgcc approach of using the *source* file root name can cause # problems in parallel builds. Use a locking strategy to avoid stomping on # the same $tmpdepfile. lockdir=$base.d-lock trap " echo '$0: caught signal, cleaning up...' >&2 rmdir '$lockdir' exit 1 " 1 2 13 15 numtries=100 i=$numtries while test $i -gt 0; do # mkdir is a portable test-and-set. if mkdir "$lockdir" 2>/dev/null; then # This process acquired the lock. "$@" -MD stat=$? # Release the lock. rmdir "$lockdir" break else # If the lock is being held by a different process, wait # until the winning process is done or we timeout. while test -d "$lockdir" && test $i -gt 0; do sleep 1 i=`expr $i - 1` done fi i=`expr $i - 1` done trap - 1 2 13 15 if test $i -le 0; then echo "$0: failed to acquire lock after $numtries attempts" >&2 echo "$0: check lockdir '$lockdir'" >&2 exit 1 fi if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each line is of the form `foo.o: dependent.h', # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this invocation # correctly. Breaking it into two sed invocations is a workaround. sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp2) # The "hp" stanza above does not work with aCC (C++) and HP's ia64 # compilers, which have integrated preprocessors. The correct option # to use with these is +Maked; it writes dependencies to a file named # 'foo.d', which lands next to the object file, wherever that # happens to be. # Much of this is similar to the tru64 case; see comments there. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then tmpdepfile1=$dir$base.d tmpdepfile2=$dir.libs/$base.d "$@" -Wc,+Maked else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d "$@" +Maked fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" # Add 'dependent.h:' lines. sed -ne '2,${ s/^ *// s/ \\*$// s/$/:/ p }' "$tmpdepfile" >> "$depfile" else make_dummy_depfile fi rm -f "$tmpdepfile" "$tmpdepfile2" ;; tru64) # The Tru64 compiler uses -MD to generate dependencies as a side # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # dependencies in 'foo.d' instead, so we check for that too. # Subdirectories are respected. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then # Libtool generates 2 separate objects for the 2 libraries. These # two compilations output dependencies in $dir.libs/$base.o.d and # in $dir$base.o.d. We have to check for both files, because # one of the two compilations can be disabled. We should prefer # $dir$base.o.d over $dir.libs/$base.o.d because the latter is # automatically cleaned when .libs/ is deleted, while ignoring # the former would cause a distcleancheck panic. tmpdepfile1=$dir$base.o.d # libtool 1.5 tmpdepfile2=$dir.libs/$base.o.d # Likewise. tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 "$@" -Wc,-MD else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d tmpdepfile3=$dir$base.d "$@" -MD fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done # Same post-processing that is required for AIX mode. aix_post_process_depfile ;; msvc7) if test "$libtool" = yes; then showIncludes=-Wc,-showIncludes else showIncludes=-showIncludes fi "$@" $showIncludes > "$tmpdepfile" stat=$? grep -v '^Note: including file: ' "$tmpdepfile" if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The first sed program below extracts the file names and escapes # backslashes for cygpath. The second sed program outputs the file # name when reading, but also accumulates all include files in the # hold buffer in order to output them again at the end. This only # works with sed implementations that can handle large buffers. sed < "$tmpdepfile" -n ' /^Note: including file: *\(.*\)/ { s//\1/ s/\\/\\\\/g p }' | $cygpath_u | sort -u | sed -n ' s/ /\\ /g s/\(.*\)/'"$tab"'\1 \\/p s/.\(.*\) \\/\1:/ H $ { s/.*/'"$tab"'/ G p }' >> "$depfile" echo >> "$depfile" # make sure the fragment doesn't end with a backslash rm -f "$tmpdepfile" ;; msvc7msys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; #nosideeffect) # This comment above is used by automake to tell side-effect # dependency tracking mechanisms from slower ones. dashmstdout) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done test -z "$dashmflag" && dashmflag=-M # Require at least two characters before searching for ':' # in the target name. This is to cope with DOS-style filenames: # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. "$@" $dashmflag | sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this sed invocation # correctly. Breaking it into two sed invocations is a workaround. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; dashXmstdout) # This case only exists to satisfy depend.m4. It is never actually # run, as this mode is specially recognized in the preamble. exit 1 ;; makedepend) "$@" || exit $? # Remove any Libtool call if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # X makedepend shift cleared=no eat=no for arg do case $cleared in no) set ""; shift cleared=yes ;; esac if test $eat = yes; then eat=no continue fi case "$arg" in -D*|-I*) set fnord "$@" "$arg"; shift ;; # Strip any option that makedepend may not understand. Remove # the object too, otherwise makedepend will parse it as a source file. -arch) eat=yes ;; -*|$object) ;; *) set fnord "$@" "$arg"; shift ;; esac done obj_suffix=`echo "$object" | sed 's/^.*\././'` touch "$tmpdepfile" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" rm -f "$depfile" # makedepend may prepend the VPATH from the source file name to the object. # No need to regex-escape $object, excess matching of '.' is harmless. sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process the last invocation # correctly. Breaking it into two sed invocations is a workaround. sed '1,2d' "$tmpdepfile" \ | tr ' ' "$nl" \ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" "$tmpdepfile".bak ;; cpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done "$@" -E \ | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ | sed '$ s: \\$::' > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" cat < "$tmpdepfile" >> "$depfile" sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; msvisualcpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi IFS=" " for arg do case "$arg" in -o) shift ;; $object) shift ;; "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") set fnord "$@" shift shift ;; *) set fnord "$@" "$arg" shift shift ;; esac done "$@" -E 2>/dev/null | sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" echo "$tab" >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" rm -f "$tmpdepfile" ;; msvcmsys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; none) exec "$@" ;; *) echo "Unknown depmode $depmode" 1>&2 exit 1 ;; esac exit 0 # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: stacks-1.35/config/install-sh000755 000765 000024 00000033255 12540305552 016764 0ustar00catchenstaff000000 000000 #!/bin/sh # install - install a program, script, or datafile scriptversion=2011-11-20.07; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # 'make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. nl=' ' IFS=" "" $nl" # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit=${DOITPROG-} if test -z "$doit"; then doit_exec=exec else doit_exec=$doit fi # Put in absolute file names if you don't have them in your path; # or use environment vars. chgrpprog=${CHGRPPROG-chgrp} chmodprog=${CHMODPROG-chmod} chownprog=${CHOWNPROG-chown} cmpprog=${CMPPROG-cmp} cpprog=${CPPROG-cp} mkdirprog=${MKDIRPROG-mkdir} mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} posix_glob='?' initialize_posix_glob=' test "$posix_glob" != "?" || { if (set -f) 2>/dev/null; then posix_glob= else posix_glob=: fi } ' posix_mkdir= # Desired mode of installed file. mode=0755 chgrpcmd= chmodcmd=$chmodprog chowncmd= mvcmd=$mvprog rmcmd="$rmprog -f" stripcmd= src= dst= dir_arg= dst_arg= copy_on_change=false no_target_directory= usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: --help display this help and exit. --version display version info and exit. -c (ignored) -C install only if different (preserve the last data modification time) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -s $stripprog installed files. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG " while test $# -ne 0; do case $1 in -c) ;; -C) copy_on_change=true;; -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 case $mode in *' '* | *' '* | *' '* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac shift;; -o) chowncmd="$chownprog $2" shift;; -s) stripcmd=$stripprog;; -t) dst_arg=$2 # Protect names problematic for 'test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac shift;; -T) no_target_directory=true;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac shift done if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dst_arg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dst_arg" shift # fnord fi shift # arg dst_arg=$arg # Protect names problematic for 'test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call 'install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 trap "ret=141; $do_exit" 13 trap "ret=143; $do_exit" 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names problematic for 'test' and other utilities. case $src in -* | [=\(\)!]) src=./$src;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dst_arg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dst_arg # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then if test -n "$no_target_directory"; then echo "$0: $dst_arg: Is a directory" >&2 exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else # Prefer dirname, but fall back on a substitute if dirname fails. dstdir=` (dirname "$dst") 2>/dev/null || expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$dst" : 'X\(//\)[^/]' \| \ X"$dst" : 'X\(//\)$' \| \ X"$dst" : 'X\(/\)' \| . 2>/dev/null || echo X"$dst" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q' ` test -d "$dstdir" dstdir_status=$? fi fi obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # Create intermediate dirs using mode 755 as modified by the umask. # This is like FreeBSD 'install' as of 1997-10-28. umask=`umask` case $stripcmd.$umask in # Optimize common cases. *[2367][2367]) mkdir_umask=$umask;; .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; *[0-7]) mkdir_umask=`expr $umask + 22 \ - $umask % 100 % 40 + $umask % 20 \ - $umask % 10 % 4 + $umask % 2 `;; *) mkdir_umask=$umask,go-w;; esac # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false case $umask in *[123567][0-7][0-7]) # POSIX mkdir -p sets u+wx bits regardless of umask, which # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 if (umask $mkdir_umask && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # The umask is ridiculous, or mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix='/';; [-=\(\)!]*) prefix='./';; *) prefix='';; esac eval "$initialize_posix_glob" oIFS=$IFS IFS=/ $posix_glob set -f set fnord $dstdir shift $posix_glob set +f IFS=$oIFS prefixes= for d do test X"$d" = X && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask=$mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=$dstdir/_inst.$$_ rmtmp=$dstdir/_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && eval "$initialize_posix_glob" && $posix_glob set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && $posix_glob set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then rm -f "$dsttmp" else # Rename the file to the real destination. $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. { # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { test ! -f "$dst" || $doit $rmcmd -f "$dst" 2>/dev/null || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } } || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: stacks-1.35/config/missing000755 000765 000024 00000015330 12540305552 016351 0ustar00catchenstaff000000 000000 #! /bin/sh # Common wrapper for a few potentially missing GNU programs. scriptversion=2013-10-28.13; # UTC # Copyright (C) 1996-2013 Free Software Foundation, Inc. # Originally written by Fran,cois Pinard , 1996. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. if test $# -eq 0; then echo 1>&2 "Try '$0 --help' for more information" exit 1 fi case $1 in --is-lightweight) # Used by our autoconf macros to check whether the available missing # script is modern enough. exit 0 ;; --run) # Back-compat with the calling convention used by older automake. shift ;; -h|--h|--he|--hel|--help) echo "\ $0 [OPTION]... PROGRAM [ARGUMENT]... Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due to PROGRAM being missing or too old. Options: -h, --help display this help and exit -v, --version output version information and exit Supported PROGRAM values: aclocal autoconf autoheader autom4te automake makeinfo bison yacc flex lex help2man Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and 'g' are ignored when checking the name. Send bug reports to ." exit $? ;; -v|--v|--ve|--ver|--vers|--versi|--versio|--version) echo "missing $scriptversion (GNU Automake)" exit $? ;; -*) echo 1>&2 "$0: unknown '$1' option" echo 1>&2 "Try '$0 --help' for more information" exit 1 ;; esac # Run the given program, remember its exit status. "$@"; st=$? # If it succeeded, we are done. test $st -eq 0 && exit 0 # Also exit now if we it failed (or wasn't found), and '--version' was # passed; such an option is passed most likely to detect whether the # program is present and works. case $2 in --version|--help) exit $st;; esac # Exit code 63 means version mismatch. This often happens when the user # tries to use an ancient version of a tool on a file that requires a # minimum version. if test $st -eq 63; then msg="probably too old" elif test $st -eq 127; then # Program was missing. msg="missing on your system" else # Program was found and executed, but failed. Give up. exit $st fi perl_URL=http://www.perl.org/ flex_URL=http://flex.sourceforge.net/ gnu_software_URL=http://www.gnu.org/software program_details () { case $1 in aclocal|automake) echo "The '$1' program is part of the GNU Automake package:" echo "<$gnu_software_URL/automake>" echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" echo "<$gnu_software_URL/autoconf>" echo "<$gnu_software_URL/m4/>" echo "<$perl_URL>" ;; autoconf|autom4te|autoheader) echo "The '$1' program is part of the GNU Autoconf package:" echo "<$gnu_software_URL/autoconf/>" echo "It also requires GNU m4 and Perl in order to run:" echo "<$gnu_software_URL/m4/>" echo "<$perl_URL>" ;; esac } give_advice () { # Normalize program name to check for. normalized_program=`echo "$1" | sed ' s/^gnu-//; t s/^gnu//; t s/^g//; t'` printf '%s\n' "'$1' is $msg." configure_deps="'configure.ac' or m4 files included by 'configure.ac'" case $normalized_program in autoconf*) echo "You should only need it if you modified 'configure.ac'," echo "or m4 files included by it." program_details 'autoconf' ;; autoheader*) echo "You should only need it if you modified 'acconfig.h' or" echo "$configure_deps." program_details 'autoheader' ;; automake*) echo "You should only need it if you modified 'Makefile.am' or" echo "$configure_deps." program_details 'automake' ;; aclocal*) echo "You should only need it if you modified 'acinclude.m4' or" echo "$configure_deps." program_details 'aclocal' ;; autom4te*) echo "You might have modified some maintainer files that require" echo "the 'autom4te' program to be rebuilt." program_details 'autom4te' ;; bison*|yacc*) echo "You should only need it if you modified a '.y' file." echo "You may want to install the GNU Bison package:" echo "<$gnu_software_URL/bison/>" ;; lex*|flex*) echo "You should only need it if you modified a '.l' file." echo "You may want to install the Fast Lexical Analyzer package:" echo "<$flex_URL>" ;; help2man*) echo "You should only need it if you modified a dependency" \ "of a man page." echo "You may want to install the GNU Help2man package:" echo "<$gnu_software_URL/help2man/>" ;; makeinfo*) echo "You should only need it if you modified a '.texi' file, or" echo "any other file indirectly affecting the aspect of the manual." echo "You might want to install the Texinfo package:" echo "<$gnu_software_URL/texinfo/>" echo "The spurious makeinfo call might also be the consequence of" echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" echo "want to install GNU make:" echo "<$gnu_software_URL/make/>" ;; *) echo "You might have modified some files without having the proper" echo "tools for further handling them. Check the 'README' file, it" echo "often tells you about the needed prerequisites for installing" echo "this package. You may also peek at any GNU archive site, in" echo "case some other package contains this missing '$1' program." ;; esac } give_advice "$1" | sed -e '1s/^/WARNING: /' \ -e '2,$s/^/ /' >&2 # Propagate the correct exit status (expected to be 127 for a program # not found, 63 for a program that failed due to version mismatch). exit $st # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: stacks-1.35/config/test-driver000755 000765 000024 00000010277 12540305553 017156 0ustar00catchenstaff000000 000000 #! /bin/sh # test-driver - basic testsuite driver script. scriptversion=2013-07-13.22; # UTC # Copyright (C) 2011-2013 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . # Make unconditional expansion of undefined variables an error. This # helps a lot in preventing typo-related bugs. set -u usage_error () { echo "$0: $*" >&2 print_usage >&2 exit 2 } print_usage () { cat <$log_file 2>&1 estatus=$? if test $enable_hard_errors = no && test $estatus -eq 99; then estatus=1 fi case $estatus:$expect_failure in 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; 0:*) col=$grn res=PASS recheck=no gcopy=no;; 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; *:*) col=$red res=FAIL recheck=yes gcopy=yes;; esac # Report outcome to console. echo "${col}${res}${std}: $test_name" # Register the test result, and other relevant metadata. echo ":test-result: $res" > $trs_file echo ":global-test-result: $res" >> $trs_file echo ":recheck: $recheck" >> $trs_file echo ":copy-in-global-log: $gcopy" >> $trs_file # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: