e-PCR-2.3.12/ 0000755 0011377 0001062 00000000000 11745334032 012401 5 ustar rotmistr contig e-PCR-2.3.12/stand/ 0000755 0011377 0001062 00000000000 11745334032 013512 5 ustar rotmistr contig e-PCR-2.3.12/stand/Makefile 0000644 0011377 0001062 00000005451 11745334032 015157 0 ustar rotmistr contig ## $Id: Makefile,v 1.7 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
LIBS = epcr
BINS = cmd_epcr cmd_famap cmd_fahash cmd_rpcr
srcdir = .
all links dirs clean dist clean-all install install-lib dist-clean depend:
for i in $(LIBS:%=lib%) $(BINS) ; do \
$(MAKE) -ef $(srcdir)/stand/Makefile.$$i $@ ; \
done
#
########################################################################
## $Log: Makefile,v $
## Revision 1.7 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.6 2004/05/27 20:36:03 rotmistr
## Version 2.1.0 with appropriate changes (see Changes) is ready for tests.
##
## Revision 1.5 2004/04/06 16:44:57 rotmistr
## *** empty log message ***
##
## Revision 1.4 2004/03/29 03:16:47 rotmistr
## *** empty log message ***
##
## Revision 1.3 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.2 2003/12/23 21:30:57 rotmistr
## - gaps/mismatches reporting
## - lo/hi fixup
## - reverse sts in re-PCR_main
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.3 2003/11/20 05:56:02 rotmistr
## Loading looks working
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.cmd_epcr 0000644 0011377 0001062 00000005723 11745334032 016734 0 ustar rotmistr contig ## $Id: Makefile.cmd_epcr,v 1.4 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
include $(srcdir)/stand/config.mk
SRC_FILES = e-PCR_main
SRC = $(SRC_FILES:%=$(srcdir)/%.cpp)
OBJ = $(SRC_FILES:%=$(objdir)/%.o)
HDR =
LIB = epcr
#LIBNAME = epcr
TARGET = $(tgtdir)/e-PCR
target: $(TARGET)
install:
cp $(TARGET) $(BINDIR)
install-lib:
$(TARGET): $(OBJ) $(LIB:%=$(tgtdir)/lib%.a)
-rm -f $@
$(CXX) $(LDFLAGS) -o $@ $(OBJ) $(LIB:%=-l%)
$(objdir)/e-PCR_main.o: $(srcdir)/e-PCR_main.cpp
depend:
$(CXX) $(CXXFLAGS) -M $(SRC) | $(FIXCMD) > $(tgtdir)/cmdepcr_depend.mk
-include $(tgtdir)/cmdepcr_depend.mk
########################################################################
## $Log: Makefile.cmd_epcr,v $
## Revision 1.4 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.3 2004/03/30 21:39:30 rotmistr
## Fixed build arguments usage
##
## Revision 1.2 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.cmd_famap 0000644 0011377 0001062 00000005727 11745334032 017073 0 ustar rotmistr contig ## $Id: Makefile.cmd_famap,v 1.4 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
include $(srcdir)/stand/config.mk
SRC_FILES = famap_main
SRC = $(SRC_FILES:%=$(srcdir)/%.cpp)
OBJ = $(SRC_FILES:%=$(objdir)/%.o)
HDR =
LIB = epcr
#LIBNAME = epcr
TARGET = $(tgtdir)/famap
target: $(TARGET)
install:
cp $(TARGET) $(BINDIR)
install-lib:
$(TARGET): $(OBJ) $(LIB:%=$(tgtdir)/lib%.a)
-rm -f $@
$(CXX) $(LDFLAGS) -o $@ $(OBJ) $(LIB:%=-l%)
$(objdir)/famap_main.o: $(srcdir)/famap_main.cpp
depend:
$(CXX) $(CXXFLAGS) -M $(SRC) | $(FIXCMD) > $(tgtdir)/cmdfamap_depend.mk
-include $(tgtdir)/cmdfamap_depend.mk
########################################################################
## $Log: Makefile.cmd_famap,v $
## Revision 1.4 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.3 2004/03/30 21:39:30 rotmistr
## Fixed build arguments usage
##
## Revision 1.2 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.cmd_fahash 0000644 0011377 0001062 00000006243 11745334032 017233 0 ustar rotmistr contig ## $Id: Makefile.cmd_fahash,v 1.2 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
include $(srcdir)/stand/config.mk
SRC_FILES = fahash_main
SRC = $(SRC_FILES:%=$(srcdir)/%.cpp)
OBJ = $(SRC_FILES:%=$(objdir)/%.o)
HDR =
LIB = epcr
#LIBNAME = epcr
TARGET = $(tgtdir)/fahash
target: $(TARGET)
install:
cp $(TARGET) $(BINDIR)
install-lib:
$(TARGET): $(OBJ) $(LIB:%=$(tgtdir)/lib%.a)
-rm -f $@
$(CXX) $(LDFLAGS) -o $@ $(OBJ) $(LIB:%=-l%)
$(objdir)/fahash_main.o: $(srcdir)/fahash_main.cpp
depend:
$(CXX) $(CXXFLAGS) -M $(SRC) | $(FIXCMD) > $(tgtdir)/cmdfahash_depend.mk
-include $(tgtdir)/cmdfahash_depend.mk
########################################################################
## $Log: Makefile.cmd_fahash,v $
## Revision 1.2 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.1 2004/05/27 20:36:03 rotmistr
## Version 2.1.0 with appropriate changes (see Changes) is ready for tests.
##
## Revision 1.1 2004/04/27 00:11:57 rotmistr
## Forgot to add...
##
## Revision 1.3 2004/03/30 21:39:30 rotmistr
## Fixed build arguments usage
##
## Revision 1.2 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.cmd_rpcr 0000644 0011377 0001062 00000005727 11745334032 016755 0 ustar rotmistr contig ## $Id: Makefile.cmd_rpcr,v 1.4 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
include $(srcdir)/stand/config.mk
SRC_FILES = re-PCR_main
SRC = $(SRC_FILES:%=$(srcdir)/%.cpp)
OBJ = $(SRC_FILES:%=$(objdir)/%.o)
HDR =
LIB = epcr
#LIBNAME = epcr
TARGET = $(tgtdir)/re-PCR
target: $(TARGET)
install:
cp $(TARGET) $(BINDIR)
install-lib:
$(TARGET): $(OBJ) $(LIB:%=$(tgtdir)/lib%.a)
-rm -f $@
$(CXX) $(LDFLAGS) -o $@ $(OBJ) $(LIB:%=-l%)
$(objdir)/re-PCR_main.o: $(srcdir)/re-PCR_main.cpp
depend:
$(CXX) $(CXXFLAGS) -M $(SRC) | $(FIXCMD) > $(tgtdir)/cmdrpcr_depend.mk
-include $(tgtdir)/cmdrpcr_depend.mk
########################################################################
## $Log: Makefile.cmd_rpcr,v $
## Revision 1.4 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.3 2004/03/30 21:39:30 rotmistr
## Fixed build arguments usage
##
## Revision 1.2 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.cmd_seqcmp 0000644 0011377 0001062 00000006021 11745334032 017263 0 ustar rotmistr contig ## $Id: Makefile.cmd_seqcmp,v 1.3 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
include $(srcdir)/stand/config.mk
SRC_FILES = seqcmp_main
SRC = $(SRC_FILES:%=$(srcdir)/%.cpp)
OBJ = $(SRC_FILES:%=$(objdir)/%.o)
HDR =
LIB = epcr
#LIBNAME = epcr
TARGET = $(tgtdir)/seqcmp
target: $(TARGET)
install:
install-lib:
$(TARGET): $(OBJ) $(LIB:%=$(tgtdir)/lib%.a)
-rm -f $@
$(CXX) $(LDFLAGS) -o $@ $(OBJ) $(LIB:%=-l%)
$(objdir)/seqcmp_main.o: $(srcdir)/seqcmp_main.cpp
depend:
$(CXX) $(CXXFLAGS) -M $(SRC) | $(FIXCMD) > $(tgtdir)/cmdseqcmp_depend.mk
-include $(tgtdir)/cmdseqcmp_depend.mk
########################################################################
## $Log: Makefile.cmd_seqcmp,v $
## Revision 1.3 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.2 2004/03/30 21:39:30 rotmistr
## Fixed build arguments usage
##
## Revision 1.1 2004/02/12 21:38:55 rotmistr
## Added seqcmp binary
##
## Revision 1.2 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.libepcr 0000644 0011377 0001062 00000010512 11745334032 016570 0 ustar rotmistr contig ## $Id: Makefile.libepcr,v 1.7 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
include $(srcdir)/stand/config.mk
SRC_FILES = \
faread \
fast_seqio_read \
fast_seqio_write \
hashset \
mmap \
align \
stsmatch_i \
stsmatch_m \
stsfilter \
stsmatch \
fahash_create \
fahash_create1 \
fahash_create2 \
fahash_lookup
SRC = $(SRC_FILES:%=$(srcdir)/%.cpp)
OBJ = $(SRC_FILES:%=$(objdir)/%.o)
HDR = defaults.h build_cfg.h \
bin-io.hpp \
strref.hpp \
faread.hpp \
fast_seqio.hpp \
hashset.hpp \
minilcs.hpp \
align.hpp \
sts_i.hpp \
stsmatch_i.hpp \
stsmatch_m.hpp \
stsmatch.hpp \
fahash.hpp \
sts.hpp
LIBNAME = epcr
TARGET = $(tgtdir)/libepcr.a
target: $(TARGET)
install:
install-lib:
cp $(TARGET) $(LIBDIR)
mkdir $(INCDIR)/$(LIBNAME)
cp $(HDR) $(INCDIR)/$(LIBNAME)
$(tgtdir)/libepcr.a: $(OBJ)
-rm -f $@
ar cru $@ $(OBJ)
ranlib $@
$(objdir)/fahash_lookup.o: $(srcdir)/fahash_lookup.cpp
$(objdir)/fahash_create.o: $(srcdir)/fahash_create.cpp
$(objdir)/fahash_create1.o: $(srcdir)/fahash_create1.cpp
$(objdir)/fahash_create2.o: $(srcdir)/fahash_create2.cpp
$(objdir)/faread.o: $(srcdir)/faread.cpp
$(objdir)/fast_seqio_read.o: $(srcdir)/fast_seqio_read.cpp
$(objdir)/fast_seqio_write.o: $(srcdir)/fast_seqio_write.cpp
$(objdir)/hashset.o: $(srcdir)/hashset.cpp
$(objdir)/mmap.o: $(srcdir)/mmap.cpp
$(objdir)/align.o: $(srcdir)/align.cpp
$(objdir)/stsmatch_i.o: $(srcdir)/stsmatch_i.cpp
$(objdir)/stsmatch_m.o: $(srcdir)/stsmatch_m.cpp
$(objdir)/stsfilter.o: $(srcdir)/stsfilter.cpp
$(objdir)/stsmatch.o: $(srcdir)/stsmatch.cpp
depend:
$(CXX) $(CXXFLAGS) -M $(SRC) | $(FIXCMD) > $(tgtdir)/libepcr_depend.mk
-include $(tgtdir)/libepcr_depend.mk
########################################################################
## $Log: Makefile.libepcr,v $
## Revision 1.7 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.6 2004/06/03 23:37:29 rotmistr
## New aligner added.
##
## Revision 1.5 2004/05/27 20:36:04 rotmistr
## Version 2.1.0 with appropriate changes (see Changes) is ready for tests.
##
## Revision 1.4 2004/03/30 21:06:58 rotmistr
## Fixes for setting default STS size range.
##
## Revision 1.3 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.2 2004/01/28 23:27:09 rotmistr
## "Best of overlapping" hit selection postprocessor added.
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.bcc55-w32 0000644 0011377 0001062 00000010754 11745334032 016472 0 ustar rotmistr contig ## $Id: Makefile.bcc55-w32,v 1.7 2004/09/03 21:28:56 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
!include $(srcdir)/stand/version.mk
BCC = bcc32 -I./ -DSTANDALONE -DUSE_WIN=1 -DVERSION=\"$(VERSION)\" -O2 \
-w-8026 -w-8027
LINK = ilink32
.c.obj:
$(BCC) -c $< -o $@
.cpp.obj:
$(BCC) -c $< -o $@
C_FILES = \
getopt.c
SRC_FILES = \
faread.cpp \
fast_seqio_read.cpp \
fast_seqio_write.cpp \
hashset.cpp \
mmap.cpp \
align.cpp \
stsmatch_i.cpp \
stsmatch_m.cpp \
stsfilter.cpp \
stsmatch.cpp \
fahash_create.cpp \
fahash_create1.cpp \
fahash_create2.cpp \
fahash_lookup.cpp
OBJ = $(SRC_FILES:.cpp=.obj) $(C_FILES:.c=.obj)
HDR = defaults.h build_cfg.h \
bin-io.hpp \
strref.hpp \
mmap.hpp \
faread.hpp \
fast_seqio.hpp \
hashset.hpp \
sts_i.hpp \
align.hpp \
minilcs.hpp \
stsmatch_i.hpp \
stsmatch_m.hpp \
stsmatch.hpp \
fahash.hpp \
sts.hpp \
mswin.h
all: includes objects e-PCR.exe famap.exe fahash.exe re-PCR.exe
clean:
-del *.obj
-del epcr/*
-rmdir epcr
clean-all: clean
-rm e-PCR.exe
-rm re-PCR.exe
-rm famap.exe
-rm fahash.exe
e-PCR.exe: e-PCR_main.obj $(OBJ)
$(LINK) c0x32.obj $** , e-PCR.exe, , cw32.lib import32.lib , ,
famap.exe: famap_main.obj $(OBJ)
$(LINK) c0x32.obj $** , famap.exe, , cw32.lib import32.lib , ,
fahash.exe: fahash_main.obj $(OBJ)
$(LINK) c0x32.obj $** , fahash.exe, , cw32.lib import32.lib , ,
re-PCR.exe: re-PCR_main.obj $(OBJ)
$(LINK) c0x32.obj $** , re-PCR.exe, , cw32.lib import32.lib , ,
# $(BCC) $(srcdir)/e-PCR_main.cpp $(OBJ) -tWC
includes: $(HDR)
-mkdir epcr
© $** epcr
objects: $(OBJ)
########################################################################
## $Log: Makefile.bcc55-w32,v $
## Revision 1.7 2004/09/03 21:28:56 rotmistr
## Fixes to compile with Borland C++ 5.5
##
## Revision 1.6 2004/09/03 19:07:06 rotmistr
## List of files changed
##
## Revision 1.5 2004/05/27 21:18:54 rotmistr
## Changes from ../Changes v.2.1.0 added
##
## Revision 1.4 2004/04/02 15:44:01 rotmistr
## *** empty log message ***
##
## Revision 1.3 2004/04/01 17:24:46 rotmistr
## *** empty log message ***
##
## Revision 1.2 2004/04/01 16:37:54 rotmistr
## Added getopt
##
## Revision 1.1 2004/04/01 05:59:11 rotmistr
## Compilable with Bcc5.5/win32
##
## Revision 1.4 2004/03/30 21:06:58 rotmistr
## Fixes for setting default STS size range.
##
## Revision 1.3 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.2 2004/01/28 23:27:09 rotmistr
## "Best of overlapping" hit selection postprocessor added.
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/Makefile.vc8 0000644 0011377 0001062 00000011030 11745334032 015644 0 ustar rotmistr contig ## $Id: Makefile.vc8,v 1.1 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
srcdir = .
!include $(srcdir)/stand/version.mk
CC = cl -I../ -D_WIN32 -D_CONSOLE -DSTANDALONE -DUSE_WIN=1 -DVERSION=\"$(VERSION)\" -O2 /EHsc /GR
LINK = cl
.c.obj:
$(CC) -c $< -o $@
.cpp.obj:
$(CC) -c $< -o $@
C_FILES = \
getopt.c
SRC_FILES = \
faread.cpp \
fast_seqio_read.cpp \
fast_seqio_write.cpp \
hashset.cpp \
mmap.cpp \
align.cpp \
stsmatch_i.cpp \
stsmatch_m.cpp \
stsfilter.cpp \
stsmatch.cpp \
fahash_create.cpp \
fahash_create1.cpp \
fahash_create2.cpp \
fahash_lookup.cpp
OBJ = $(SRC_FILES:.cpp=.obj) $(C_FILES:.c=.obj)
HDR = defaults.h build_cfg.h \
bin-io.hpp \
strref.hpp \
mmap.hpp \
faread.hpp \
fast_seqio.hpp \
hashset.hpp \
sts_i.hpp \
align.hpp \
minilcs.hpp \
stsmatch_i.hpp \
stsmatch_m.hpp \
stsmatch.hpp \
fahash.hpp \
sts.hpp \
mswin.h
all: includes objects e-PCR.exe famap.exe fahash.exe re-PCR.exe
clean:
-del *.obj
-del epcr/*
-rmdir epcr
clean-all: clean
-rm e-PCR.exe
-rm re-PCR.exe
-rm famap.exe
-rm fahash.exe
e-PCR.exe: e-PCR_main.obj $(OBJ)
$(LINK) e-PCR_main.obj /Fee-PCR.exe /ML /link /SUBSYSTEM:CONSOLE $(OBJ)
famap.exe: famap_main.obj $(OBJ)
$(LINK) famap_main.obj /Fefamap.exe /ML /link /SUBSYSTEM:CONSOLE $(OBJ)
fahash.exe: fahash_main.obj $(OBJ)
$(LINK) fahash_main.obj /Fefahash.exe /ML /link /SUBSYSTEM:CONSOLE $(OBJ)
re-PCR.exe: re-PCR_main.obj $(OBJ)
$(LINK) re-PCR_main.obj /Fere-PCR.exe /ML /link /SUBSYSTEM:CONSOLE $(OBJ)
includes: $(HDR)
objects: $(OBJ)
########################################################################
## $Log: Makefile.vc8,v $
## Revision 1.1 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.7 2004/09/03 21:28:56 rotmistr
## Fixes to compile with Borland C++ 5.5
##
## Revision 1.6 2004/09/03 19:07:06 rotmistr
## List of files changed
##
## Revision 1.5 2004/05/27 21:18:54 rotmistr
## Changes from ../Changes v.2.1.0 added
##
## Revision 1.4 2004/04/02 15:44:01 rotmistr
## *** empty log message ***
##
## Revision 1.3 2004/04/01 17:24:46 rotmistr
## *** empty log message ***
##
## Revision 1.2 2004/04/01 16:37:54 rotmistr
## Added getopt
##
## Revision 1.1 2004/04/01 05:59:11 rotmistr
## Compilable with Bcc5.5/win32
##
## Revision 1.4 2004/03/30 21:06:58 rotmistr
## Fixes for setting default STS size range.
##
## Revision 1.3 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.2 2004/01/28 23:27:09 rotmistr
## "Best of overlapping" hit selection postprocessor added.
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.5 2003/12/01 12:29:36 rotmistr
## Reverse PCR wordhash builder in progress
##
## Revision 1.4 2003/11/21 23:26:36 rotmistr
## Almost compilable
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/config.mk 0000644 0011377 0001062 00000011517 11745334032 015315 0 ustar rotmistr contig ## $Id: config.mk,v 1.7 2007/07/05 16:06:04 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
tgtdir = .
objdir = .
srcdir = .
COMMON_CC_FLAGS =
ifdef OPTIMIZE
CC_FLAGS = $(COMMON_CC_FLAGS) -O$(OPTIMIZE)
LD_FLAGS =
else
ifdef PROFILE
CC_FLAGS = $(COMMON_CC_FLAGS) -g -pg
LD_FLAGS = -g -pg
else
CC_FLAGS = $(COMMON_CC_FLAGS) -g2
LD_FLAGS = -g2
endif
endif
#arch = $(shell echo `uname -s`-`uname -m`)
prefix = /usr/local/
#$(arch)
BINDIR = $(prefix)/bin/
INCDIR = $(prefix)/include/
LIBDIR = $(prefix)/lib/
FIXCMD = perl -ne's/^([^\s\#]+)/\$$(objdir)\/$$1/;print'
include $(srcdir)/stand/version.mk
#########################################################################
# GNU compiler flags
CC = gcc
CXX = g++
CXXFLAGS = -I$(srcdir) -I$(INCDIR) $(CC_FLAGS) $(PART_CXXFLAGS) \
-DDEALLOCATE=0 $(LF64CCFLAGS) $(VERSION_FLAGS) -DSTANDALONE=1
LDFLAGS = $(LD_FLAGS) $(LF64LDFLAGS) -L$(tgtdir) -L$(LIBDIR) $(PART_LDFLAGS)
# $(PART_PRELIBS) $(LIBS:%=-l%) $(PART_POSTLIBS)
LF64CCFLAGS = `getconf LFS_CFLAGS`
LF64LDFLAGS = `getconf LFS_LDFLAGS` `getconf LFS_LIBS`
## Use following lines if you don't have getconf but need to
## explicitely turn on largefile support
# LF64CCFLAGS = -D_LARGEFILE64_SOURCE -DFILE_OFFSET_BITS=64
# LF64LDFLAGS =
## Use following lines for Mac OS X and other systems that lack *64 functions
# LF64CCFLAGS = -DNATIVE_LARGEFILE
# LF64LDFLAGS =
VERSION_FLAGS = -DVERSION=\"$(VERSION)\" \
-DVER_MAJOR=$(VER_MAJOR) \
-DVER_MINOR=$(VER_MINOR) \
-DVER_BUILD=$(VER_BUILD)
LIBS = seq epcr
src = $(SRC:%=$(srcdir)/%)
hdr = $(HDR:%=$(srcdir)/%)
all: links target
links:
if test -n "$(LIBNAME)" ; then \
test -L $(LIBNAME) || ln -s $(srcdir) $(LIBNAME) ; \
fi
dirs:
for i in $(INCDIR)/$(LIBNAME) $(BINDIR) $(LIBDIR) ; do \
test -d $$i || mkdir -p $$i ; \
done
clean:
-rm $(OBJ) $(HDR:%=%~) $(SRC:%=%~)
clean-all: clean
-rm $(TARGET)
dist-clean: clean-all
-rm *~
-test -L $(LIBNAME) && rm $(LIBNAME)
$(objdir)/%.o: $(srcdir)/%.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@
########################################################################
## $Log: config.mk,v $
## Revision 1.7 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.6 2004/09/08 18:30:59 rotmistr
## Fixed typo
##
## Revision 1.5 2004/09/03 15:54:48 rotmistr
## Compilation for Mac OS/X
##
## Revision 1.4 2004/06/03 23:37:29 rotmistr
## New aligner added.
##
## Revision 1.3 2004/03/31 05:04:11 rotmistr
## Search range fix
##
## Revision 1.2 2004/02/04 21:23:46 rotmistr
## - gcc-3.3.2 compatible
## - better postfiltering for reverse-e-PCR for discontiguos words
## - cgi added, that supports:
## -- contig to chromosome mapping
## -- simple mapviewer links
## -- unists links
## -- discontiguos words
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.8 2003/12/10 19:55:48 rotmistr
## Plain fasta interface is about to be substituted to blastdb interface
##
## Revision 1.7 2003/12/04 21:29:34 rotmistr
## Looks like faindex branch works better!
##
## Revision 1.6 2003/11/24 19:33:40 rotmistr
## Optimised. Added OneTimeRun flag.
##
## Revision 1.5 2003/11/23 03:40:53 rotmistr
## Looks like working, requires optimisation.
##
## Revision 1.4 2003/11/20 23:05:58 rotmistr
## Contiguos words work.
## Discontiguos need to be modified.
##
## Revision 1.3 2003/11/20 18:27:32 rotmistr
## Sample files updated
## Program does not crush
##
## Revision 1.2 2003/11/20 02:12:28 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/stand/version.mk 0000644 0011377 0001062 00000010221 11745334032 015524 0 ustar rotmistr contig ## $Id: version.mk,v 1.26 2008/06/18 14:48:20 rotmistr Exp $
########################################################################
##
## PUBLIC DOMAIN NOTICE
## National Center for Biotechnology Information
##
## This software/database is a "United States Government Work" under the
## terms of the United States Copyright Act. It was written as part of
## the author's official duties as a United States Government employee and
## thus cannot be copyrighted. This software/database is freely available
## to the public for use. The National Library of Medicine and the U.S.
## Government have not placed any restriction on its use or reproduction.
##
## Although all reasonable efforts have been taken to ensure the accuracy
## and reliability of the software and data, the NLM and the U.S.
## Government do not and cannot warrant the performance or results that
## may be obtained by using this software or data. The NLM and the U.S.
## Government disclaim all warranties, express or implied, including
## warranties of performance, merchantability or fitness for any particular
## purpose.
##
## Please cite the author in any work or product based on this material.
##
########################################################################
#set -x
#echo "HELLO!!!"
VER_MAJOR=2
VER_MINOR=3
VER_BUILD=12
VERSION = $(VER_MAJOR).$(VER_MINOR).$(VER_BUILD)
########################################################################
## $Log: version.mk,v $
## Revision 1.26 2008/06/18 14:48:20 rotmistr
## *** empty log message ***
##
## Revision 1.25 2008/04/28 16:39:19 rotmistr
## Applied patch to build with gcc-4.3
##
## Revision 1.24 2008/03/26 16:04:35 rotmistr
## Added support for blastdb files
##
## Revision 1.23 2007/07/11 20:49:33 rotmistr
## Made 64bit-compatible
##
## Revision 1.22 2007/07/05 16:06:04 rotmistr
## Made things compileable by MS Visual C++ 8.0
##
## Revision 1.21 2005/06/14 16:46:51 rotmistr
## Changed report format for floppy tails
##
## Revision 1.20 2005/02/11 20:42:59 rotmistr
## Fixed "margin" bug, added primer search from file
##
## Revision 1.19 2004/10/26 17:16:41 rotmistr
## Added 5'-end masking for primers
##
## Revision 1.18 2004/09/03 15:54:48 rotmistr
## Compilation for Mac OS/X
##
## Revision 1.17 2004/06/08 16:14:59 rotmistr
## *** empty log message ***
##
## Revision 1.16 2004/06/07 16:25:03 rotmistr
## Bug fixes to previos version.
##
## Revision 1.15 2004/06/03 23:37:29 rotmistr
## New aligner added.
##
## Revision 1.14 2004/04/27 00:01:55 rotmistr
## Second version of reverse hash file started
##
## Revision 1.13 2004/04/06 04:53:18 rotmistr
## All is compileable with BCC5.5 and runnable on WIndows
##
## Revision 1.12 2004/04/01 17:24:46 rotmistr
## *** empty log message ***
##
## Revision 1.11 2004/03/30 19:08:08 rotmistr
## default STS size is tunnable now
##
## Revision 1.10 2004/03/26 17:02:18 rotmistr
## Compat-options are now allowed everywhere, and multiple fasta files can be used.
##
## Revision 1.9 2004/03/25 19:36:52 rotmistr
## API: separate left and right primers mism/gaps in forward API
##
## Revision 1.8 2004/03/23 22:36:02 rotmistr
## 2.0 release
##
## Revision 1.7 2004/02/18 05:44:40 rotmistr
## Changes in CGI: sort order, separate misalignments for l and r primers, reload button
##
## Revision 1.6 2004/02/12 21:39:29 rotmistr
## New version
##
## Revision 1.5 2004/01/28 23:27:09 rotmistr
## "Best of overlapping" hit selection postprocessor added.
##
## Revision 1.4 2004/01/08 23:22:47 rotmistr
## Fixed init error in faread,
## Adjusted output to standard,
## Added output format style and output file to parameters.
##
## Revision 1.3 2004/01/07 16:57:48 rotmistr
## Fragment size is now configurable.
##
## Revision 1.2 2004/01/06 21:54:28 rotmistr
## Statistics for word repetitions API added
##
## Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
## Package that includes e-PCR, reverse e-PCR, and sequence data preparation
## program for reverse e-PCR looks ready
##
## Revision 1.2 2003/11/20 02:12:32 rotmistr
## Fixed id, log tags and copyright notice
##
########################################################################
e-PCR-2.3.12/BUILD.html 0000644 0011377 0001062 00000022067 11745334031 014134 0 ustar rotmistr contig
Electronic PCR commandline tools: build instructions
Electronic PCR commandline tools: build instructions
Version: 2.3.12
-
Build
-
Unix/gcc instructions
- Make arguments
- Mac OS/X with gcc
- Windows/BorlandC++ instructions
- Windows/MS-VisualC++8.0 instructions
-
Files
- Sources common to forward and reverse e-PCR binaries
- Forward e-PCR source files
- Reverse e-PCR source files
- Extra files
- Build files
Build e-PCR and reverse e-PCR (re-PCR, famap) binaries with GNU
make and GCC.
Build
Unix/gcc instructions
- Unpack archive
- Edit stand/config.mk if nesessary
- run gmake links depend all OPTIMIZE=6
Make arguments
One can use following arguments to make:
- OPTIMIZE=[0-9]
- to pass with -O argument to
compiler (default is build debug version)
- srcdir={path-to-src}
- to set path to sources
- objdir={path-to-obj}
- to set path where to
place .o
- tgtdir={target-path}
- to set path where to put
targets (libepcr.a and executable files)
Mac OS/X with gcc
Use LF64LDFLAGS= LF64CCFLAGS=-DNATIVE_LARGEFILES (yes,
space after first "=") argument
with gmake since Mac OS/2 does not have (and does not need) *64 file
functions and off64_t
Windows/BorlandC++ instructions
- Unpack archive
- Edit stand/Makefile.bcc55-w32 if nesessary
- run make all -f stand/Makefile.bcc55-w32
Windows/MS-VisualC++8.0 instructions
- Unpack archive
- Edit stand/Makefile.vc8 if nesessary
- run nmake all -f stand/Makefile.vc8
Files
e-PCR package includes two tool sets: forward
e-PCR (e-PCR) and reverse e-PCR (re-PCR, fahash and famap). These
binaries share some source files, that are compiled as
libepcr.a library.
Directory stand/ contains makefiles to use
with GCC. Change stand/config.mk to update compiler
and compiling options. Makefiles should be OK for GNU
make.
Sources common to forward and reverse e-PCR binaries
- build_cfg.h
- macroses that control compilation
with/without NCBI toolkit
- defaults.h
- defaults for e-PCR program (used also in library)
- mswin.h
- windows compatibility defines and declarations
- native64.h
- native 64bit file access compatibility defines and declarations
- strref.hpp
- class for passing reference to string data
- sts_i.hpp
- generic STS interface class
- hashset.hpp
- hash calculating class
declaration (allows set of discontiguos words)
- hashset.cpp
- hash calculating class
implementation
- align.hpp
- align or compare two sequences allowing
mismatches and gaps, declarations
- align.cpp
- align or compare two sequences allowing
mismatches and gaps, implementation
- minilcs.hpp
- align two sequences templat class
- faread.hpp
- read fasta files, declarations
- faread.cpp
- read fasta files, implementation
- mmap.hpp
- mmap(2) wrapper for huge files and no
page boundary restriction, declarations
- mmap.cpp
- mmap(2) wrapper for huge files and no
page boundary restriction, implementation
- getopt.c
- getopt implementation -- to compile for
windows
Forward e-PCR source files
- e-PCR_main.cpp
- main for e-PCR commandline program
- stsmatch_i.hpp
- STS lookup algorithm declarations
- stsmatch_i.cpp
- STS lookup algorithm implementation
- stsfilter.cpp
- Postprocessor for STS lookup
- stsmatch_m.hpp
- STS and STS hash table
implementation for mmapable UniSTS file, declarations
- stsmatch_m.cpp
- STS and STS hash table
implementation for mmapable UniSTS file, implementation
- stsmatch.hpp
- Some useful callbacks
declarations
- stsmatch.cpp
- Some useful callbacks,
implementation
Reverse e-PCR source files
- bin-io.hpp
- Generic read/write integers and strings
- fahash_defines.h
- Internal defines for fahash
- fahash_internal.hpp
- Internal defines for fahash
- fahash.hpp
- Hash sequence words in file, declarations
- fahash_create.cpp
- Hash sequence words in file,
creating hash file, abstract
- fahash_create1.cpp
- Hash sequence words in file,
creating hash file version 1 implementation
- fahash_create2.cpp
- Hash sequence words in file,
creating hash file version 2 implementation
- fahash_lookup.cpp
- Hash sequence words in file,
lookup algorithm implementation
- fast_seqio.hpp
- Fast access to regions of
sequences, declarations
- fast_seqio_read.cpp
- Fast access to regions of
sequences, implementation
- fast_seqio_write.cpp
- Fast access to regions of
sequences, create sequence file implementation
- famap_main.cpp
- main for commandline tool to
create/dump mmapable file
- fahash_main.cpp
- main for commandline tool to
create hash file
- sts.hpp
- simple implementation for STS class
- re-PCR_main.cpp
- main for reverse e-PCR commandline tool
Extra files
- seqcmp_main.cpp
- Main file for align.?pp
test
Build files
- Makefile
- Master makefile
- stand/version.mk
- Version definitions
- stand/config.mk
- Compiler options
- stand/Makefile.libepcr
- libepcr makefile
- stand/Makefile.cmd_epcr
- e-PCR makefile
- stand/Makefile.cmd_repcr
- re-PCR makefile
- stand/Makefile.cmd_famap
- famap makefile
- stand/Makefile.cmd_fahash
- fahash makefile
- stand/Makefile.cmd_seqcmp
- seqcmp makefile
- stand/Makefile.bcc55-w32
- makefile for BorlandC++/win32
- stand/Makefile.vc8
- makefile for MS Visual C++ 8.0
e-PCR-2.3.12/BUILD.txt 0000644 0011377 0001062 00000013355 11745334031 014007 0 ustar rotmistr contig
Electronic PCR commandline tools: build instructions
Version: 2.3.12
_________________________________________________________________
Build e-PCR and reverse e-PCR (re-PCR, famap) binaries with GNU make
and GCC.
_________________________________________________________________
Build
Unix/gcc instructions
1. Unpack archive
2. Edit stand/config.mk if nesessary
3. run gmake links depend all OPTIMIZE=6
Make arguments
One can use following arguments to make:
OPTIMIZE=[0-9]
to pass with -O argument to compiler (default is build debug
version)
srcdir={path-to-src}
to set path to sources
objdir={path-to-obj}
to set path where to place .o
tgtdir={target-path}
to set path where to put targets (libepcr.a and executable
files)
Mac OS/X with gcc
Use LF64LDFLAGS= LF64CCFLAGS=-DNATIVE_LARGEFILES (yes, space after
first "=") argument with gmake since Mac OS/2 does not have (and does
not need) *64 file functions and off64_t
Windows/BorlandC++ instructions
1. Unpack archive
2. Edit stand/Makefile.bcc55-w32 if nesessary
3. run make all -f stand/Makefile.bcc55-w32
Windows/MS-VisualC++8.0 instructions
1. Unpack archive
2. Edit stand/Makefile.vc8 if nesessary
3. run nmake all -f stand/Makefile.vc8
_________________________________________________________________
Files
e-PCR package includes two tool sets: forward e-PCR (e-PCR) and
reverse e-PCR (re-PCR, fahash and famap). These binaries share some
source files, that are compiled as libepcr.a library.
Directory stand/ contains makefiles to use with GCC. Change
stand/config.mk to update compiler and compiling options. Makefiles
should be OK for GNU make.
Sources common to forward and reverse e-PCR binaries
build_cfg.h
macroses that control compilation with/without NCBI toolkit
defaults.h
defaults for e-PCR program (used also in library)
mswin.h
windows compatibility defines and declarations
native64.h
native 64bit file access compatibility defines and declarations
strref.hpp
class for passing reference to string data
sts_i.hpp
generic STS interface class
hashset.hpp
hash calculating class declaration (allows set of discontiguos
words)
hashset.cpp
hash calculating class implementation
align.hpp
align or compare two sequences allowing mismatches and gaps,
declarations
align.cpp
align or compare two sequences allowing mismatches and gaps,
implementation
minilcs.hpp
align two sequences templat class
faread.hpp
read fasta files, declarations
faread.cpp
read fasta files, implementation
mmap.hpp
mmap(2) wrapper for huge files and no page boundary
restriction, declarations
mmap.cpp
mmap(2) wrapper for huge files and no page boundary
restriction, implementation
getopt.c
getopt implementation -- to compile for windows
Forward e-PCR source files
e-PCR_main.cpp
main for e-PCR commandline program
stsmatch_i.hpp
STS lookup algorithm declarations
stsmatch_i.cpp
STS lookup algorithm implementation
stsfilter.cpp
Postprocessor for STS lookup
stsmatch_m.hpp
STS and STS hash table implementation for mmapable UniSTS file,
declarations
stsmatch_m.cpp
STS and STS hash table implementation for mmapable UniSTS file,
implementation
stsmatch.hpp
Some useful callbacks declarations
stsmatch.cpp
Some useful callbacks, implementation
Reverse e-PCR source files
bin-io.hpp
Generic read/write integers and strings
fahash_defines.h
Internal defines for fahash
fahash_internal.hpp
Internal defines for fahash
fahash.hpp
Hash sequence words in file, declarations
fahash_create.cpp
Hash sequence words in file, creating hash file, abstract
fahash_create1.cpp
Hash sequence words in file, creating hash file version 1
implementation
fahash_create2.cpp
Hash sequence words in file, creating hash file version 2
implementation
fahash_lookup.cpp
Hash sequence words in file, lookup algorithm implementation
fast_seqio.hpp
Fast access to regions of sequences, declarations
fast_seqio_read.cpp
Fast access to regions of sequences, implementation
fast_seqio_write.cpp
Fast access to regions of sequences, create sequence file
implementation
famap_main.cpp
main for commandline tool to create/dump mmapable file
fahash_main.cpp
main for commandline tool to create hash file
sts.hpp
simple implementation for STS class
re-PCR_main.cpp
main for reverse e-PCR commandline tool
Extra files
seqcmp_main.cpp
Main file for align.?pp test
Build files
Makefile
Master makefile
stand/version.mk
Version definitions
stand/config.mk
Compiler options
stand/Makefile.libepcr
libepcr makefile
stand/Makefile.cmd_epcr
e-PCR makefile
stand/Makefile.cmd_repcr
re-PCR makefile
stand/Makefile.cmd_famap
famap makefile
stand/Makefile.cmd_fahash
fahash makefile
stand/Makefile.cmd_seqcmp
seqcmp makefile
stand/Makefile.bcc55-w32
makefile for BorlandC++/win32
stand/Makefile.vc8
makefile for MS Visual C++ 8.0
_________________________________________________________________
e-PCR-2.3.12/README.html 0000644 0011377 0001062 00000051250 11745334031 014226 0 ustar rotmistr contig
Electronic PCR commandline tools: operating instructions
Electronic PCR commandline tools: operating instructions
Version: 2.3.12
-
Forward e-PCR
- Example
- Synopsis
- Description
-
Options
- General options
- Hash building options
- Hit quality options
- Alignment algorithms options
- Report options
- Ouput formats
- Exit codes
-
Reverse e-PCR
- Example
- Synopsis
- Description
-
Options
- Common options
- famap options
- Fahash
- Commands
- Search options
-
Output format
- For primer lookup
- For STS lookup
- Exit codes
- Bugs and features
- File formats
Use e-PCR to map sequences using STS
database
Use re-PCR to map STSes or short primers in sequence
database
Use famap and fahash to prepare
sequence database for re-PCR searches.
Forward e-PCR
Example
work> e-PCR -w9 -f 1 -m100 mystsdb.sts D=100-400 myfastafile.fa N=1 G=1 T=3
Synopsis
e-PCR [-hV] [posix-options] stsfile [fasta ...] [compat-options]
where posix-options are:
-m ## Margin (default 50)
-w ## Wordsize (default 7)
-n ## Max mismatches allowed (default 0)
-g ## Max indels allowed (default 0)
-f ## Use ## discontiguos words
-o ## Set output file
-t ## Set output format:
1 - classic, range (pos1..pos2)
2 - classic, midpoint
3 - tabular
4 - tabular with alignment in comments (slow)
-d ##-## Set default sts size
-p +- Turn hits postprocess on/off
-v +- Verbose on/Off
-a a|f Use presize alignmens (only if gaps>0), slow
a - Allways or f - as Fallback
-x +- Use 5'-end lowercase masking of primers (default -)
-u +- Uppercase all primers (default -)
and compat-options (duplicate posix-options) are:
M=## Margin (default 50)
W=## Wordsize (default 7)
N=## Max mismatches allowed (default 0)
G=## Max indels allowed (default 0)
F=## Use ## discontinuos words
O=## Set output file to ##
T=## Set output format (1..4)
D=##-## Set default sts size
P=+- Postprocess hits on/off
V=+- Verbose on/Off
A=a|f Use presize alignmens (only if gaps>0), slow
a - Allways or f - as Fallback
X=+- Use 5'-end lowercase masking of primers (default -)
U=+- Uppercase all primers (default -)
-mid Same as T=2
Description
e-PCR parses stsfile in unists
format, then reads nucleotide sequence data in
FASTA format from files listed in
commandline if any, or from stdin otherwise. For input
sequences e-PCR finds matches and prints output in one of
three formats.
Options
Two sets of options are used: POSIX-compatible and
old-style provided for compatibility with previous versions of
e-PCR.
Posix-style options can appear only before first
parameter not starting with '-'. Argument '--' explicitely stops
parsing arguments as posix options.
Compatibility options can appear anywhere in commandline.
'-mid' can appear anywhere and do not stop posix options
recognision.
General options
- -V
- Print version, exit after parsing
commandline
- -h
- Print help, exit after parsing
commandline
Hash building options
- -w wordsize | W=wordsize
- Set word size for
primers hash (nucleotide positions). Longer word size decreases
hash collision rate, but increases memory usage. Also no
mismatches are allowed within word size near "inner" boundary of
primers unless one uses discontiguous words, and no
gaps are ever allowed in that region.
- -f wordcnt | W=wordcnt
- Set discontiguous word
count for primers hash (1 means "use contiguous
words"). Discontiguous words increase number of hash
tables and decrease "effective" word size (thus increasing
hash collision rate), so make search significantly slower,
but increase sencitivity by allowing mismatches within
word size. Reasonable values are 1 (contiguous words)
and 3.
- -d lo-hi | D=lo-hi
- Set ddefault STS size
range - values used for STSs that have no size associated
in file.
Hit quality options
- -m margin | M=margin
- Set maximal allowed
deviation of hit product size from expected STS size.
- -n mism | N=mism
- Set maximal number of
mismatches allowed in primer-to-sequence alignment
(per primer!).
- -g mism | G=mism
- Set maximal number of
gaps allowed in primer-to-sequence alignment (per primer!).
Alignment algorithms options
- -a a|f | A=a|f
- Use NW algorithm to align
primers to sequence: a - always, f - as fallback if fast
algorithm gives no hit at this position.
- -x +|- | X=+|-
- Turn on/off recognising of
lowercase characters at 5'-ends of primers as nucleotides
that don't need to be aligned to sequence (floppy tails).
- -u +|- | U=+|-
- Uppercase primers. To use
with files prepared for ``-x=+'' mode, but requiring full
primer alignment.
If STS file contains primers with lowercase charactars, you have
to use either -x+ or -u+ flag.
Report options
- -o output | O=output
- Set output file.
- -t 1|2|3|4 | T=1|2|3|4
- Set output format.
- -p +|- | P=+|-
-
Set hit grouping on/off: when using discontiguous words
and gaps, some hits may be reported multiple times with
little different quality. This option controls reporting
only best hit of group of overlapping hits. Default
depends on F and G values.
- -v +|- | V=+|-
-
Report sequence ids to stderr on/off.
Ouput formats
- 1: Traditional: reports whitespace-separated
-
- Sequence FASTA identifier
- POS1..POS2 -- start and end positions of hit
(includes length floppy tail)
- STS identifier (col. 1 from STS file)
- STS description (columns 5..last from STS file)
In this format product size equals to POS2-POS1+1
- 2: Traditional midpoint: reports whitespace-separated
-
- Sequence FASTA identifier
- POS -- middle point position of hit
- STS identifier (col. 1 from STS file)
- STS description (columns 5..last from STS file)
- 3: Tab-separated detailed
-
- Sequence FASTA identifier
- STS identifier (col. 1 from STS file)
- +|- -- strand of hit (order of primers in hit)
- POS1 -- start position of hit (does not include
floppy tail if any)
- POS2 -- end position of hit (does not include
floppy tail)
- SIZE/MIN..MAX -- observed size of hit/expected
size range of STS
- MISM -- Total number of mismatches for two primers
- GAPS -- Total number of gaps for two primers
- STS description (columns 5..last from STS file)
In this format product size may be greater then
POS2-POS1+1 for probes with floppy tails
- 4: Tab-separated detailed with alignment
-
Is same as format 3, but also containing visualisations of
alignments in comment lines (lines starting with ``#'')
Exit codes
Zero on success, nonzero on fail
Reverse e-PCR
Example
work> famap -tN -b genome.famap org/chr_*.fa
work> fahash -b genome.hash -w 12 -f3 ${PWD}/genome.famap
work> re-PCR -s genome.hash -n1 -g1 ACTATTGATGATGA AGGTAGATGTTTTT 120-200
Synopsis
famap [-hV]
famap -b mmapped-file [-t cvt] [fasta-file ...]
famap -d mmapped-file [ord ...]
famap -l mmapped-file [ord ...]
where cvt is one of: off n N nx NX
fahash [-hV]
fahash -b hash-file [build-options] mmapped-file ...
fahash -T hash-file [-o output]
where:
-b hash-file Build hash tables (hash-file) from sequence files,
-T hash-file Print word usage statistics for hash-file
-o outfile Set output file name for -T
build-options:
-w wordsize Set word size when building hash tables
-f period Set discontiguity when building hash tables
-k Skip repeats when building indexfile
-F min,max Set watermarks for fragment size (in Mb) for -v1
-v 1|2 Build file of format version 1 or 2
-c cachesize Use cache size cachesize (for -v2)
re-PCR [-hV]
re-PCR -p hash-file [-g gaps] [-n mism] [primer ...]
re-PCR -P hash-file [-g gaps] [-n mism] [primer-file ...]
re-PCR -s hash-file [search-options] [-O output] [left right lo hi [...]]
re-PCR -S hash-file [search-options] [-O output] [-C bcnt] [stsfile ...]
where:
-p hash-file Perform primer lookup using hash-file
-P hash-file Perform primer lookup using hash-file
-s hash-file Perform STS lookup using hash-file, STSs in cmdline
-S hash-file Perform STS lookup using hash-file, STSs in file
search-options:
-n mism Set max allowed mismatches per primer for lookup
-g gaps Set max allowed indels per primer for lookup
-m margin Set variability for STS size for lookup
-d min-max Set default STS size (for STSs without size set)
-r +|- Enable/disable reverse STS lookup
-O +|- Enable/disable syscall optimisation
-C batchcnt Set number of STSes per batch
-o outfile Set output file name
Description
Reverse e-PCR (re-PCR) performs STS or
primer lookup against sequence database. Two files are
required for database: mmapped-file with sequence data in fast
random-accessible format and hash-file, that keeps
precalculated positions of all words of sequence
database
Use famap to build mmapped-file from FASTA
files.
Use fahash to build hash-file, and output
word usage statistics.
Use re-PCR to perform STS and primer searches.
Discontiguous words are supported by re-PCR as well as
contiguous.
Options
Common options
- -V
- Print version, exit after parsing
commandline
- -h
- Print help, exit after parsing
commandline
famap options
- -b mmapped-file
- Build famap-file from input fasta
file(s). If no fasta files are set in commandline, use
stdin as input.
- -d mmapped-file
- Dump famap-file contents in
fasta format. If ord number(s) are set, print only
sequences with given ordinals.
- -l mmapped-file
- List fama-file sequence
identifiers. If ord number(s) are set, print only
sequences with given ordinals.
- -t cvt-table
- Use compiled-in table to
convert input.
- n
- Nucleotides. Allowed characters are
[actgACTGnN]. Other letters are converted to n or N.
Rest of symbols are ignored. Case is preserved.
- nx
- Nucleotides with extended ambiquity
codes iupac_na, lowercase are allowed. Other letters
are converted to n or N.
Rest of symbols are ignored. Case is preserved.
- N
- Nucleotides. Allowed characters are
[ACTGN]. [actgn] are converted to uppercase.
Other letters are converted to N.
Rest of symbols are ignored.
- NX
- Nucleotides with extended ambiquity
codes iupac_na, lowercase are converted to uppercase.
Other letters are converted to N.
Rest of symbols are ignored.
Fahash
- -b hash-file
- Build hash-file for
mmapped-file(s).
- -T hash-file
- Dump word usage statustics for
hash-file.
- -v version
- Build hash-file of version 1 or 2
(2 is default).
- -w wordsize
- Build hash-file for word
wordsize nucleotides long.
- -f wordcnt
- Build hash-file for
wordcnt discontiguous words. 1 stands for
contiguous words.
- -F min,max
- Use memory watermarks (Mbytes)
for hash table size (for -v 1).
- -c cachesize
- Set cache size for -v 2.
- -o output-file
- Use output-file for output
result of -T.
Commands
- -p hash-file
- Perform lookup for primers
given in commandline.
- -s hash-file
- Perform lookup for STSes
given in commandline.
- -S hash-file
- Perform lookup for STSes
taken from unists file(s) given in commandline.
Search options
- -n mism
- Number of mismatches allowed per
primer.
- -g gaps
- Number of gaps allowed per
primer.
- -m margin
- Maximal deviation of observed
product size to expected STS size.
- -d lo-hi
- Set ddefault STS size
range - values used for STSs that have no size associated
in file.
- -r +|-
- Enable|disable flipped STS lookup
(default is "enabled").
- -O +|-
- Enable|disable syscall optimisation.
Since lookup is i/o expensive, enabling this parameter may
improve search performance diskwise. On the other hand, it
takes significantly more memory and CPU.
- -C batchcount
- How many STSs from input file
to look at one pass. May effect on performance, especialy
when used with -O +.
- -o output-file
- Use output-file for output.
Output format
Is tab-separated file with following fields:
For primer lookup
- Primer ID
- Sequence ID
- Strand
- Hit start
- Hit end
- Mismatches
- Gaps
- Size
For STS lookup
- STS ID
- Sequence ID
- Strand
- Hit start
- Hit end
- Mismatches
- Gaps
- Observed Size/Expected size range
Exit codes
Zero on success, non-zero on errors
Bugs and features
- Mmapped-file path is hardcoded to hash-file as it is
in commandline when hash-file is being built, which means
that when one performs searches mmapped-file should be
accessible with same name from current directory, as it is
hardcoded.
- Mmapped-file is a proprietary format, that could be
substituted with megablast database format, but is not
(yet?) for performance reasons.
- If sequence sizes are large, it may be tricky to
create database with discontiguous words because of memory
usage requirements. Changing parameter -F (for -v 1) or -c
(for -v 2) may help.
File formats
- STS database
- Is single-tab (i.e. two tabs in a
row mean "empty field") separated file with following
fields:
- STS id (required).
- First (left) primer (required).
- Second (right) primer (required).
- Product size (optional): can be number for strict
size, or two numbers separated by dash for size
range.
- Additional info, that can be used by applications
(optional).
Primers should be in iupac_na encoding, everything that is not
ACTG or actg is translated to N or n. Primers sequences should
be uppercase,
unless you want to use file with e-PCR -x+ flag - then several
first nucleotides of primers may be lowercase-masked. If
primers are not fully uppercase and you don't use -x+ flag,
you have to use -u+ flag with e-PCR.
- Primers file
- Is single-tab (i.e. two tabs in a
row mean "empty field") separated file with following
fields:
- Primer id (required).
- Primer sequence.
e-PCR-2.3.12/README.txt 0000644 0011377 0001062 00000036120 11745334031 014100 0 ustar rotmistr contig
Electronic PCR commandline tools: operating instructions
Version: 2.3.12
_________________________________________________________________
Use e-PCR to map sequences using STS database
Use re-PCR to map STSes or short primers in sequence database
Use famap and fahash to prepare sequence database for re-PCR searches.
_________________________________________________________________
Forward e-PCR
Example
work> e-PCR -w9 -f 1 -m100 mystsdb.sts D=100-400 myfastafile.fa N=1 G=1 T=3
Synopsis
e-PCR [-hV] [posix-options] stsfile [fasta ...] [compat-options]
where posix-options are:
-m ## Margin (default 50)
-w ## Wordsize (default 7)
-n ## Max mismatches allowed (default 0)
-g ## Max indels allowed (default 0)
-f ## Use ## discontiguos words
-o ## Set output file
-t ## Set output format:
1 - classic, range (pos1..pos2)
2 - classic, midpoint
3 - tabular
4 - tabular with alignment in comments (slow)
-d ##-## Set default sts size
-p +- Turn hits postprocess on/off
-v +- Verbose on/Off
-a a|f Use presize alignmens (only if gaps>0), slow
a - Allways or f - as Fallback
-x +- Use 5'-end lowercase masking of primers (default -)
-u +- Uppercase all primers (default -)
and compat-options (duplicate posix-options) are:
M=## Margin (default 50)
W=## Wordsize (default 7)
N=## Max mismatches allowed (default 0)
G=## Max indels allowed (default 0)
F=## Use ## discontinuos words
O=## Set output file to ##
T=## Set output format (1..4)
D=##-## Set default sts size
P=+- Postprocess hits on/off
V=+- Verbose on/Off
A=a|f Use presize alignmens (only if gaps>0), slow
a - Allways or f - as Fallback
X=+- Use 5'-end lowercase masking of primers (default -)
U=+- Uppercase all primers (default -)
-mid Same as T=2
Description
e-PCR parses stsfile in unists format, then reads nucleotide sequence
data in FASTA format from files listed in commandline if any, or from
stdin otherwise. For input sequences e-PCR finds matches and prints
output in one of three formats.
Options
Two sets of options are used: POSIX-compatible and old-style provided
for compatibility with previous versions of e-PCR.
Posix-style options can appear only before first parameter not
starting with '-'. Argument '--' explicitely stops parsing arguments
as posix options.
Compatibility options can appear anywhere in commandline. '-mid' can
appear anywhere and do not stop posix options recognision.
General options
-V
Print version, exit after parsing commandline
-h
Print help, exit after parsing commandline
Hash building options
-w wordsize | W=wordsize
Set word size for primers hash (nucleotide positions). Longer
word size decreases hash collision rate, but increases memory
usage. Also no mismatches are allowed within word size near
"inner" boundary of primers unless one uses discontiguous
words, and no gaps are ever allowed in that region.
-f wordcnt | W=wordcnt
Set discontiguous word count for primers hash (1 means "use
contiguous words"). Discontiguous words increase number of hash
tables and decrease "effective" word size (thus increasing hash
collision rate), so make search significantly slower, but
increase sencitivity by allowing mismatches within word size.
Reasonable values are 1 (contiguous words) and 3.
-d lo-hi | D=lo-hi
Set ddefault STS size range - values used for STSs that have no
size associated in file.
Hit quality options
-m margin | M=margin
Set maximal allowed deviation of hit product size from expected
STS size.
-n mism | N=mism
Set maximal number of mismatches allowed in primer-to-sequence
alignment (per primer!).
-g mism | G=mism
Set maximal number of gaps allowed in primer-to-sequence
alignment (per primer!).
Alignment algorithms options
-a a|f | A=a|f
Use NW algorithm to align primers to sequence: a - always, f -
as fallback if fast algorithm gives no hit at this position.
-x +|- | X=+|-
Turn on/off recognising of lowercase characters at 5'-ends of
primers as nucleotides that don't need to be aligned to
sequence (floppy tails).
-u +|- | U=+|-
Uppercase primers. To use with files prepared for ``-x=+''
mode, but requiring full primer alignment.
If STS file contains primers with lowercase charactars, you have to
use either -x+ or -u+ flag.
Report options
-o output | O=output
Set output file.
-t 1|2|3|4 | T=1|2|3|4
Set output format.
-p +|- | P=+|-
Set hit grouping on/off: when using discontiguous words and
gaps, some hits may be reported multiple times with little
different quality. This option controls reporting only best hit
of group of overlapping hits. Default depends on F and G
values.
-v +|- | V=+|-
Report sequence ids to stderr on/off.
Ouput formats
1: Traditional: reports whitespace-separated
+ Sequence FASTA identifier
+ POS1..POS2 -- start and end positions of hit (includes length
floppy tail)
+ STS identifier (col. 1 from STS file)
+ STS description (columns 5..last from STS file)
In this format product size equals to POS2-POS1+1
2: Traditional midpoint: reports whitespace-separated
+ Sequence FASTA identifier
+ POS -- middle point position of hit
+ STS identifier (col. 1 from STS file)
+ STS description (columns 5..last from STS file)
3: Tab-separated detailed
+ Sequence FASTA identifier
+ STS identifier (col. 1 from STS file)
+ +|- -- strand of hit (order of primers in hit)
+ POS1 -- start position of hit (does not include floppy tail
if any)
+ POS2 -- end position of hit (does not include floppy tail)
+ SIZE/MIN..MAX -- observed size of hit/expected size range of
STS
+ MISM -- Total number of mismatches for two primers
+ GAPS -- Total number of gaps for two primers
+ STS description (columns 5..last from STS file)
In this format product size may be greater then POS2-POS1+1 for
probes with floppy tails
4: Tab-separated detailed with alignment
Is same as format 3, but also containing visualisations of
alignments in comment lines (lines starting with ``#'')
Exit codes
Zero on success, nonzero on fail
_________________________________________________________________
Reverse e-PCR
Example
work> famap -tN -b genome.famap org/chr_*.fa
work> fahash -b genome.hash -w 12 -f3 ${PWD}/genome.famap
work> re-PCR -s genome.hash -n1 -g1 ACTATTGATGATGA AGGTAGATGTTTTT 120-200
Synopsis
famap [-hV]
famap -b mmapped-file [-t cvt] [fasta-file ...]
famap -d mmapped-file [ord ...]
famap -l mmapped-file [ord ...]
where cvt is one of: off n N nx NX
fahash [-hV]
fahash -b hash-file [build-options] mmapped-file ...
fahash -T hash-file [-o output]
where:
-b hash-file Build hash tables (hash-file) from sequence files,
-T hash-file Print word usage statistics for hash-file
-o outfile Set output file name for -T
build-options:
-w wordsize Set word size when building hash tables
-f period Set discontiguity when building hash tables
-k Skip repeats when building indexfile
-F min,max Set watermarks for fragment size (in Mb) for -v1
-v 1|2 Build file of format version 1 or 2
-c cachesize Use cache size cachesize (for -v2)
re-PCR [-hV]
re-PCR -p hash-file [-g gaps] [-n mism] [primer ...]
re-PCR -P hash-file [-g gaps] [-n mism] [primer-file ...]
re-PCR -s hash-file [search-options] [-O output] [left right lo hi [...]]
re-PCR -S hash-file [search-options] [-O output] [-C bcnt] [stsfile ...]
where:
-p hash-file Perform primer lookup using hash-file
-P hash-file Perform primer lookup using hash-file
-s hash-file Perform STS lookup using hash-file, STSs in cmdline
-S hash-file Perform STS lookup using hash-file, STSs in file
search-options:
-n mism Set max allowed mismatches per primer for lookup
-g gaps Set max allowed indels per primer for lookup
-m margin Set variability for STS size for lookup
-d min-max Set default STS size (for STSs without size set)
-r +|- Enable/disable reverse STS lookup
-O +|- Enable/disable syscall optimisation
-C batchcnt Set number of STSes per batch
-o outfile Set output file name
Description
Reverse e-PCR (re-PCR) performs STS or primer lookup against sequence
database. Two files are required for database: mmapped-file with
sequence data in fast random-accessible format and hash-file, that
keeps precalculated positions of all words of sequence database
Use famap to build mmapped-file from FASTA files.
Use fahash to build hash-file, and output word usage statistics.
Use re-PCR to perform STS and primer searches.
Discontiguous words are supported by re-PCR as well as contiguous.
Options
Common options
-V
Print version, exit after parsing commandline
-h
Print help, exit after parsing commandline
famap options
-b mmapped-file
Build famap-file from input fasta file(s). If no fasta files
are set in commandline, use stdin as input.
-d mmapped-file
Dump famap-file contents in fasta format. If ord number(s) are
set, print only sequences with given ordinals.
-l mmapped-file
List fama-file sequence identifiers. If ord number(s) are set,
print only sequences with given ordinals.
-t cvt-table
Use compiled-in table to convert input.
n
Nucleotides. Allowed characters are [actgACTGnN]. Other
letters are converted to n or N. Rest of symbols are
ignored. Case is preserved.
nx
Nucleotides with extended ambiquity codes iupac_na,
lowercase are allowed. Other letters are converted to n
or N. Rest of symbols are ignored. Case is preserved.
N
Nucleotides. Allowed characters are [ACTGN]. [actgn] are
converted to uppercase. Other letters are converted to N.
Rest of symbols are ignored.
NX
Nucleotides with extended ambiquity codes iupac_na,
lowercase are converted to uppercase. Other letters are
converted to N. Rest of symbols are ignored.
Fahash
-b hash-file
Build hash-file for mmapped-file(s).
-T hash-file
Dump word usage statustics for hash-file.
-v version
Build hash-file of version 1 or 2 (2 is default).
-w wordsize
Build hash-file for word wordsize nucleotides long.
-f wordcnt
Build hash-file for wordcnt discontiguous words. 1 stands for
contiguous words.
-F min,max
Use memory watermarks (Mbytes) for hash table size (for -v 1).
-c cachesize
Set cache size for -v 2.
-o output-file
Use output-file for output result of -T.
Commands
-p hash-file
Perform lookup for primers given in commandline.
-s hash-file
Perform lookup for STSes given in commandline.
-S hash-file
Perform lookup for STSes taken from unists file(s) given in
commandline.
Search options
-n mism
Number of mismatches allowed per primer.
-g gaps
Number of gaps allowed per primer.
-m margin
Maximal deviation of observed product size to expected STS
size.
-d lo-hi
Set ddefault STS size range - values used for STSs that have no
size associated in file.
-r +|-
Enable|disable flipped STS lookup (default is "enabled").
-O +|-
Enable|disable syscall optimisation. Since lookup is i/o
expensive, enabling this parameter may improve search
performance diskwise. On the other hand, it takes significantly
more memory and CPU.
-C batchcount
How many STSs from input file to look at one pass. May effect
on performance, especialy when used with -O +.
-o output-file
Use output-file for output.
Output format
Is tab-separated file with following fields:
For primer lookup
* Primer ID
* Sequence ID
* Strand
* Hit start
* Hit end
* Mismatches
* Gaps
* Size
For STS lookup
* STS ID
* Sequence ID
* Strand
* Hit start
* Hit end
* Mismatches
* Gaps
* Observed Size/Expected size range
Exit codes
Zero on success, non-zero on errors
Bugs and features
* Mmapped-file path is hardcoded to hash-file as it is in
commandline when hash-file is being built, which means that when
one performs searches mmapped-file should be accessible with same
name from current directory, as it is hardcoded.
* Mmapped-file is a proprietary format, that could be substituted
with megablast database format, but is not (yet?) for performance
reasons.
* If sequence sizes are large, it may be tricky to create database
with discontiguous words because of memory usage requirements.
Changing parameter -F (for -v 1) or -c (for -v 2) may help.
_________________________________________________________________
File formats
STS database
Is single-tab (i.e. two tabs in a row mean "empty field")
separated file with following fields:
+ STS id (required).
+ First (left) primer (required).
+ Second (right) primer (required).
+ Product size (optional): can be number for strict size, or
two numbers separated by dash for size range.
+ Additional info, that can be used by applications (optional).
Primers should be in iupac_na encoding, everything that is not
ACTG or actg is translated to N or n. Primers sequences should
be uppercase, unless you want to use file with e-PCR -x+ flag -
then several first nucleotides of primers may be
lowercase-masked. If primers are not fully uppercase and you
don't use -x+ flag, you have to use -u+ flag with e-PCR.
Primers file
Is single-tab (i.e. two tabs in a row mean "empty field")
separated file with following fields:
+ Primer id (required).
+ Primer sequence.
_________________________________________________________________
e-PCR-2.3.12/Changes 0000644 0011377 0001062 00000007505 11745334031 013702 0 ustar rotmistr contig version 2.3.12
affected: few *.cpp files
description:
- Added include of cstdio to make it compilable with gcc 4.4 (thanks to Ubuntu and
Debian-med teams)
version 2.3.11
affected: stsmatch_i.?pp
description:
- When parsing commandline if -d was used before -w or -f default size was reset to
program's default
version 2.3.10
affected: includes in a few files
description:
- Applied user-provided patch to add includes for gcc-4.3 compatibility
version 2.3.9
affected: fahash_lookup.cpp
description:
- Fixed a bug leading to false negatives when run without optimization
version 2.3.8
affected: e-PCR_main.cpp build_cfg.h
added: ncbi/Makefile ncbi/Makefile.INCLUDE
description:
- Added possibility to compile with NCBI C++ toolkit and read blastdb instead of fasta
version 2.3.7
affected: build_cfg.h fast_seqio*.?pp fahash*.?pp stsmatch_m.cpp
description:
- Fixed reverse e-pcr code to work on Linux 64 bit architecture
version 2.3.6
added: stand/Makefile.vc8
affected: mswin.h *_main.cpp
description:
- Fixed some warnings and made it compileable by MS Visual C++ 8.0
affected:
version 2.3.5
affected: fahash_create2.cpp
description:
- Made more details to report seek failure
version 2.3.4
affected: re-PCR_main.cpp, fahash_main.cpp
description:
- Minor changes to progress reporting
version 2.3.3
affected: e-PCR_main.cpp (forward e-PCR) output
description:
- Attention: Hit positions do not include floppy tails anymore
- TODO: same with reverse e-PCR
- Fixed typo in BUILD.*
version 2.3.2
affected: stsmatch_i.?pp, e-PCR_main.cpp (forward e-PCR)
description:
- Progress indication is more detailed and controlled
version 2.3.1
affected: sts_i.hpp, stsmatch_m.?pp (forward e-PCR)
description:
- Bugfix: some hits were lost with large margin
- Bugfix: re-PCR for win32 now works again
- Can read primers from file
version 2.3.0
affected: sts_i.hpp, stsmatch_m.?pp (forward e-PCR)
description:
- Support for overhang nucleotides in primers as masked characters
version 2.2.3
affected: Makefiles
description:
- Compiling under Mac OS X
version 2.2.2
affected: API, re-PCR, e-PCR, commandline
description:
- Better consistency for misalignments found and alignments shown
version 2.2.1
affected: API, re-PCR, commandline
description:
- New output for re-PCR is available (with alignments)
version 2.2.0
affected: Building, API, data format, e-PCR, re-PCR, commandline
description:
- API: Alignment now is performed by IAlign interface
- seqcmp class is obsolete
- Added new alignment method, more presize but slow (fixes bug)
- New output is available, with graphic representation of alignments
version 2.1.0
affected: Building, API, data format, e-PCR, commandline
description:
- e-PCR, re-PCR: fixed bug, that forbidded overlapping primers
- e-PCR : fixed `empty description is shown as to "?"'
- re-PCR : hash file version 2, new build algorithm
new "fahash" program generates hash files
re-PCR is for lookup only
version 2.0.5
affected: Building, e-PCR, famap, commandline
description: All tools are working for windows. Fasta conversion is added.
version 2.0.4
affected: Building
description: Now compileable with Borlang C++ Builder 5.5 for win32
version 2.0.3
affected: commandline parsing, API, defaults
description: Changed defaults for M (Margin); now can be changed from
commandline
version 2.0.2
affected: e-PCR commandline parsing
description: Fixed commandline processing -- compat options are allowed
anywhere
version 2.0.1
affected: API
description: independent report for left and right primer misalignments
in forward e-pcr API
e-PCR-2.3.12/defaults.h 0000644 0011377 0001062 00000004355 11745334031 014367 0 ustar rotmistr contig // $Id: defaults.h,v 1.2 2004/03/30 18:52:19 rotmistr Exp $
/* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* ========================================================================= */
#ifndef EPCR_DEFAULTS__H
#define EPCR_DEFAULTS__H
#define ePCR_WDSIZE_DEFAULT 7
#define ePCR_WDSIZE_MIN 3
#define ePCR_WDSIZE_MAX 8
//// Number of mismatches allowed
#define ePCR_MMATCH_DEFAULT 0
#define ePCR_MMATCH_MIN 0
#define ePCR_MMATCH_MAX 10
#define ePCR_GAPS_DEFAULT 0
#define ePCR_GAPS_MIN 0
#define ePCR_GAPS_MAX 5
//// Margin (allowed deviation in product size)
#define ePCR_MARGIN_DEFAULT 50
#define ePCR_MARGIN_MIN 0
#define ePCR_MARGIN_MAX 10000
#define ePCR_DEFAULT_size_lo 100
#define ePCR_DEFAULT_size_hi 350
#endif
/*
* $Log: defaults.h,v $
* Revision 1.2 2004/03/30 18:52:19 rotmistr
* Updated default STS size
*
* Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
* Package that includes e-PCR, reverse e-PCR, and sequence data preparation
* program for reverse e-PCR looks ready
*
* Revision 1.2 2003/11/20 02:12:29 rotmistr
* Fixed id, log tags and copyright notice
*
*/
e-PCR-2.3.12/fahash_defines.h 0000644 0011377 0001062 00000003643 11745334031 015506 0 ustar rotmistr contig // $Id: fahash_defines.h,v 1.2 2004/04/27 00:01:54 rotmistr Exp $
/* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* ========================================================================= */
#ifndef FAHASH_DEFINES__HPP
#define FAHASH_DEFINES__HPP
#define SIGNATURE "Rev e-PCR index\0"
#define FILE_VERSION 0x00010000U
#define FILE_VERSION2 0x00020000U
#define BYTE_ORDER_WORD 0x01234567U
#define HEADER_SIZE 4096
#endif
/*
* $Log: fahash_defines.h,v $
* Revision 1.2 2004/04/27 00:01:54 rotmistr
* Second version of reverse hash file started
*
* Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
* Package that includes e-PCR, reverse e-PCR, and sequence data preparation
* program for reverse e-PCR looks ready
*
* Revision 1.2 2003/11/20 02:12:29 rotmistr
* Fixed id, log tags and copyright notice
*
*/
e-PCR-2.3.12/fahash_internal.hpp 0000644 0011377 0001062 00000004360 11745334031 016242 0 ustar rotmistr contig /* $Id: fahash_internal.hpp,v 1.3 2007/07/11 20:49:29 rotmistr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* =========================================================================
*
* Author: Kirill Rotmistrovsky
*
* ========================================================================= */
#ifndef FAHASH_INTERNAL__HPP
#define FAHASH_INTERNAL__HPP
#define SYSERROR(a) throw runtime_error(a+": "+strerror(errno))
typedef AFaIndexerBase::THashElement THashElement;
static const THashElement kHighBit = THashElement( 1 ) << ( 8 * sizeof(THashElement) - 1 );
static const unsigned kKilobyte = 1024;
static const unsigned kMegabyte = 1024 * kKilobyte;
static const unsigned kGigabyte = 1024 * kMegabyte;
static const unsigned kMinFragSize = kGigabyte / 4 / 4 * 3;
static const unsigned kMaxFragSize = kGigabyte / 4 / 4 * 6;
//static const unsigned min_frag_size=100*Megabyte/4*3;
//static const unsigned max_frag_size=100*Megabyte/4*6;
#endif
/*
* $Log: fahash_internal.hpp,v $
* Revision 1.3 2007/07/11 20:49:29 rotmistr
* Made 64bit-compatible
*
* Revision 1.2 2004/09/03 19:06:41 rotmistr
* Code formatting changes
*
*/
e-PCR-2.3.12/build_cfg.h 0000644 0011377 0001062 00000011156 11745334032 014474 0 ustar rotmistr contig /* $Id: build_cfg.h,v 1.13 2008/03/26 16:04:29 rotmistr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* =========================================================================
*
* Author: Kirill Rotmistrovsky
*
* ========================================================================= */
#ifndef EPCR_BUILD__HPP
#define EPCR_BUILD__HPP
#ifdef STANDALONE
#include
namespace std {}
#define BEGIN_SCOPE(a) namespace a {
#define END_SCOPE(a) }
#define BEGIN_NCBI_SCOPE BEGIN_SCOPE(ncbi) USING_SCOPE(std);
#define END_NCBI_SCOPE END_SCOPE(ncbi)
#define USING_SCOPE(a) using namespace a
#define USING_NCBI_SCOPE USING_SCOPE(ncbi)
#ifdef _WIN32
#include
#define FILE_BINARY "b"
#define FILE_TEXT "t"
//#warning "Using Borland C/C++ Builder config"
#define madvise(a,b,c) // no madvise
#define MADV_SEQUENTIAL 0
#define MADV_DONTNEED 0
#ifdef __cplusplus
BEGIN_NCBI_SCOPE
#endif
typedef char Int1;
typedef short Int2;
typedef int Int4;
typedef long long Int8;
typedef unsigned char Uint1;
typedef unsigned short Uint2;
typedef unsigned int Uint4;
typedef unsigned long long Uint8;
#ifdef __cplusplus
END_NCBI_SCOPE
#endif
#else // _WIN32
#include
#include
#include
#ifdef NATIVE_LARGEFILES
#include
#endif // NATIVE_LARGEFILES
#include
#ifdef __cplusplus
BEGIN_NCBI_SCOPE
#endif
typedef int8_t Int1;
typedef int16_t Int2;
typedef int32_t Int4;
typedef int64_t Int8;
typedef uint8_t Uint1;
typedef uint16_t Uint2;
typedef uint32_t Uint4;
typedef uint64_t Uint8;
#ifdef __cplusplus
END_NCBI_SCOPE
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
int madvise(void* addr, size_t len, int advice);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // _WIN32
#else // STANDALONE
#include
#include
#include
#include
#endif // STANDALONE
#define EPCR_SCOPE pcr_tools
#ifndef FILE_BINARY
#define FILE_BINARY
#endif
#ifndef FILE_TEXT
#define FILE_TEXT
#endif
#endif
/*
* $Log: build_cfg.h,v $
* Revision 1.13 2008/03/26 16:04:29 rotmistr
* Added support for blastdb files
*
* Revision 1.12 2007/07/11 20:49:29 rotmistr
* Made 64bit-compatible
*
* Revision 1.11 2007/07/05 16:23:08 rotmistr
* Forgot two changes
*
* Revision 1.10 2007/07/05 16:05:58 rotmistr
* Made things compileable by MS Visual C++ 8.0
*
* Revision 1.9 2004/09/03 21:28:49 rotmistr
* Fixes to compile with Borland C++ 5.5
*
* Revision 1.8 2004/09/03 15:54:43 rotmistr
* Compilation for Mac OS/X
*
* Revision 1.7 2004/05/27 20:35:46 rotmistr
* Version 2.1.0 with appropriate changes (see Changes) is ready for tests.
*
* Revision 1.6 2004/04/15 14:18:22 rotmistr
* Fix to compile with NCBI toolkit (CGI)
*
* Revision 1.5 2004/04/06 04:53:17 rotmistr
* All is compileable with BCC5.5 and runnable on WIndows
*
* Revision 1.4 2004/04/01 16:37:40 rotmistr
* Cleaned after adding windows capabilities
*
* Revision 1.3 2004/04/01 05:57:52 rotmistr
* Compilable with borland C++
*
* Revision 1.2 2004/02/04 21:23:21 rotmistr
* - gcc-3.3.2 compatible
* - better postfiltering for reverse-e-PCR for discontiguos words
* - cgi added, that supports:
* -- contig to chromosome mapping
* -- simple mapviewer links
* -- unists links
* -- discontiguos words
*
* Revision 1.1.1.1 2003/12/23 18:17:27 rotmistr
* Package that includes e-PCR, reverse e-PCR, and sequence data preparation
* program for reverse e-PCR looks ready
*
*/
e-PCR-2.3.12/getopt.c 0000644 0011377 0001062 00000007475 11745334032 014064 0 ustar rotmistr contig /* $Id: getopt.c,v 1.2 2004/09/03 19:59:25 rotmistr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* =========================================================================
*
* Author: Kirill Rotmistrovsky
*
* ========================================================================= */
#include
#include
int optind=1;
int optopt=-1;
int opterr=0;
const char* optarg=0;
static char * nextarg=0;
int getopt(int argc, char ** argv, const char* optstring)
{
while(optind=argc) {
fprintf(stderr,
"getopt: need argument for %c\n",
optopt);
++opterr;
return -1;
}
optarg=argv[optind];
}
nextarg=0;
++optind;
}
else {
optarg=0;
if(nextarg[1]) ++nextarg;
else { nextarg=0; ++optind; }
}
return optopt;
} else {
fprintf(stderr,
"getopt: invalid option %c\n",
optopt);
if(nextarg[1]) ++nextarg;
else { nextarg=0; ++optind; }
++opterr;
}
} while(0);
break;
case ':': // ':' should not be used as option
fprintf(stderr,"getopt: bad option %c\n",optopt);
if(nextarg[1]) ++nextarg;
else { nextarg=0; ++optind; }
++opterr;
break;
}
}
return optopt=-1;
}
/*
* $Log: getopt.c,v $
* Revision 1.2 2004/09/03 19:59:25 rotmistr
* *** empty log message ***
*
* Revision 1.1 2004/04/02 15:43:55 rotmistr
* *** empty log message ***
*
*/
e-PCR-2.3.12/bin-io.hpp 0000644 0011377 0001062 00000011175 11745334032 014274 0 ustar rotmistr contig /* $Id: bin-io.hpp,v 1.5 2008/04/28 16:38:45 rotmistr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* =========================================================================
*
* Author: Kirill Rotmistrovsky
*
* ========================================================================= */
#ifndef EPCR_BIN_IO__HPP
#define EPCR_BIN_IO__HPP
#include
#include
#include
#include
#include
#include
#include
BEGIN_NCBI_SCOPE
BEGIN_SCOPE(EPCR_SCOPE)
// This code is being used INTERNALLY by e-PCR library
enum EByteOrder { eHiEndian = 0x78563412, eLoEndian = 0x12345678 };
template
inline T BoCvt( T x, bool do_swap )
{
if( do_swap ) {
unsigned char * a = (unsigned char*) &x;
unsigned char * b = a + sizeof( x ) - 1;
for( ; a < b ; ++a, --b ) swap( *a, *b );
}
return x;
}
template
inline void Write( int fd, const T& t, unsigned sz = 1 )
{
if( (size_t) write( fd, &t, sz * sizeof(T) ) != sz * sizeof( T ) )
throw runtime_error( "write failed: " + string( std::strerror( errno ) ) );
}
template<>
inline void Write( int fd, const string& t, unsigned )
{
Write( fd, Uint4( t.length() ) );
if( (size_t)write( fd, t.data(), t.length() ) != t.length() )
throw runtime_error( "write failed: " + string( strerror( errno ) ) );
}
template
inline void Write( FILE* f, const T& t, unsigned sz = 1 )
{
if( (size_t)fwrite( &t, sizeof( T ), sz, f ) != sz )
throw runtime_error( "write failed: " + string( strerror( errno ) ) );
}
template<>
inline void Write(FILE* f, const string& t, unsigned )
{
Write( f, Uint4( t.length() ) );
if((size_t)fwrite( t.data(), 1, t.length(), f ) != t.length() )
throw runtime_error( "write failed: " + string( strerror( errno ) ) );
}
template
inline T Read( int fd )
{
T t( -1 );
if( (size_t)read( fd, &t, sizeof( T ) ) != sizeof( T ) && errno )
throw runtime_error( "read failed: " + string( strerror( errno ) ) );
return t;
}
template<>
inline string Read( int fd )
{
vector t( Read( fd ) );
if( t.size() && (size_t)read( fd, &t[0], t.size() ) != t.size() && errno )
throw runtime_error( "read failed: " + string( strerror( errno ) ) );
return string( &t[0], t.size() );
}
inline off64_t SeekAlign( int fd, unsigned page = 4096 )
{
off64_t c = lseek64( fd, 0, SEEK_CUR );
if( c % page ) c = lseek64( fd, page - c % page, SEEK_CUR );
return c;
}
inline off64_t SeekAlign( FILE* f, unsigned page = 4096 )
{
off64_t c = ftello64( f );
if( c % page ) {
fseeko64( f, page - c % page, SEEK_CUR );
c = ftello64( f );
}
return c;
}
END_SCOPE(EPCR_SCOPE)
END_NCBI_SCOPE
#endif
/*
* $Log: bin-io.hpp,v $
* Revision 1.5 2008/04/28 16:38:45 rotmistr
* Applied patch to build with gcc-4.3
*
* Revision 1.4 2007/07/11 20:49:29 rotmistr
* Made 64bit-compatible
*
* Revision 1.3 2004/04/01 05:57:52 rotmistr
* Compilable with borland C++
*
* Revision 1.2 2004/02/04 21:23:21 rotmistr
* - gcc-3.3.2 compatible
* - better postfiltering for reverse-e-PCR for discontiguos words
* - cgi added, that supports:
* -- contig to chromosome mapping
* -- simple mapviewer links
* -- unists links
* -- discontiguos words
*
* Revision 1.1.1.1 2003/12/23 18:17:27 rotmistr
* Package that includes e-PCR, reverse e-PCR, and sequence data preparation
* program for reverse e-PCR looks ready
*
*/
e-PCR-2.3.12/e-PCR_main.cpp 0000644 0011377 0001062 00000074210 11745334032 014763 0 ustar rotmistr contig /* $Id: e-PCR_main.cpp,v 1.25 2008/06/18 14:45:33 rotmistr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* =========================================================================
*
* Author: Kirill Rotmistrovsky
*
* ========================================================================= */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifndef STANDALONE
//#include
//#include
#include
#include
#endif
USING_NCBI_SCOPE;
USING_SCOPE(EPCR_SCOPE);
class CMain
{
public:
enum EAlignMode { eNever, eAlways, eFallback };
CMain(int c, char ** v):
argc(c),argv(v),done(false),ofmt(1),
postprocess(true),have_postprocess(false),verbose(0),
m_MaxMismatch(ePCR_MMATCH_DEFAULT),
m_MaxGaps(ePCR_GAPS_DEFAULT),
m_AlignL(0), m_AlignR(0), m_AlignMode(eNever)
#ifndef STANDALONE
, m_blastdbs( false )
#endif
{
stsFileHash.SetHash(CHashSet(ePCR_WDSIZE_DEFAULT,0));
pcrMachine.SetMargin(ePCR_MARGIN_DEFAULT);
}
int Run();
protected:
int Execute();
int ParseCmdline();
int Help(FILE* = stdout);
int Version();
void ParseVerbose(const char * opt);
protected:
int argc;
char ** argv;
bool done;
protected:
// CPcrMachineCompat pcrMachine;
CPcrMachine pcrMachine;
CStsFileHash stsFileHash;
string stsfile;
list fafiles;
int ofmt;
string ofile;
bool postprocess, have_postprocess;
int verbose;
int m_MaxMismatch, m_MaxGaps;
IAlign * m_AlignL, * m_AlignR;
EAlignMode m_AlignMode;
#ifndef STANDALONE
bool m_blastdbs;
string m_gilist;
#endif
};
int CMain::Help(FILE* out)
{
done=true;
fprintf(out,
"usage: [-hV] [posix-options] stsfile [fasta ...] "
"[compat-options]\n"
"where posix-options are:\n");
fprintf(out,"\t-m ##\tMargin (default %d)\n",ePCR_MARGIN_DEFAULT);
fprintf(out,"\t-w ##\tWordsize (default %d)\n",ePCR_WDSIZE_DEFAULT);
fprintf(out,"\t-n ##\tMax mismatches allowed (default %d)\n",
ePCR_MMATCH_DEFAULT);
fprintf(out,"\t-g ##\tMax indels allowed (default %d)\n",
ePCR_GAPS_DEFAULT);
fprintf(out,"\t-f ##\tUse ## discontiguos words, slow if ##>1\n");
fprintf(out,"\t-o ##\tSet output file\n");
fprintf(out,"\t-t ##\tSet output format:\n"
"\t\t1 - classic, range (pos1..pos2)\n"
"\t\t2 - classic, midpoint\n"
"\t\t3 - tabular\n"
"\t\t4 - tabular with alignment in comments (slow)\n"
);
fprintf(out,"\t-d##-##\tSet default size range (default %d-%d)\n",
ePCR_DEFAULT_size_lo,ePCR_DEFAULT_size_hi);
fprintf(out,"\t-p +-\tTurn hits postprocess on/off\n");
fprintf(out,"\t-v ##\tVerbosity flags\n");
fprintf(out,"\t-a a|f\tUse presize alignmens (only if gaps>0), slow\n"
"\t\t a - Allways or f - as Fallback\n");
fprintf(out,"\t-x +-\tUse 5'-end lowercase masking of primers "
"(default %s)\n",stsFileHash.AllowOverhang()?"+":"-");
fprintf(out,"\t-u +-\tUppercase all primers "
"(default %s)\n",stsFileHash.UnmaskPrimers()?"+":"-");
#ifndef STANDALONE
fprintf(out,"\t-b +-\tInput sequences are in blastdb\n");
fprintf(out,"\t-l file\tLimit blastdb sequences to list of gis from the file\n");
#endif
fprintf(out,"and compat-options (duplicate posix-options) are:\n");
fprintf(out,"\tM=##\tMargin (default %d)\n",ePCR_MARGIN_DEFAULT);
fprintf(out,"\tW=##\tWordsize (default %d)\n",ePCR_WDSIZE_DEFAULT);
fprintf(out,"\tN=##\tMax mismatches allowed (default %d)\n",
ePCR_MMATCH_DEFAULT);
fprintf(out,"\tG=##\tMax indels allowed (default %d)\n",
ePCR_GAPS_DEFAULT);
fprintf(out,"\tF=##\tUse ## discontinuos words\n");
fprintf(out,"\tO=##\tSet output file to ##\n");
fprintf(out,"\tT=##\tSet output format (1..3)\n");
fprintf(out,"\tD=##-##\tSet default size range\n");
fprintf(out,"\tP=+-\tPostprocess hits on/off\n");
fprintf(out,"\tV=##\tVerbosity flags\n");
fprintf(out,"\tA=a|f\tUse presize alignmens (only if gaps>0), slow\n"
"\t\t a - Allways or f - as Fallback\n");
fprintf(out,"\tX=+-\tUse 5'-end lowercase masking of primers "
"(default %s)\n",stsFileHash.AllowOverhang()?"+":"-");
fprintf(out,"\tU=+-\tUppercase all primers "
"(default %s)\n",stsFileHash.UnmaskPrimers()?"+":"-");
#ifndef STANDALONE
fprintf(out,"\tB=+-\tInput sequences are in blastdb\n");
fprintf(out,"\tL=file\tLimit blastdb sequences to list of gis from the file\n");
#endif
fprintf(out,"\t-mid\tSame as T=2\n");
fprintf(out,"verbosity flags are (flags may be changed in future):\n"
"\t- set all progress reporting off (default)\n"
"\t+ switch error reporting to basic (same as Sl)\n"
"\tt display time\n"
"\tl display fasta identifiers\n"
"\to display sequence offset (currently: 3' position of first primer)\n"
"\tp display percent of sequence processed\n"
"\ts report every sequence start\n"
"\te report every sequence end\n"
"\tS newline after sequence start report\n"
"\tE newline after sequence end report\n"
"\tP newline after sequence progress report\n");
return 0;
}
void SetDefaultSize(CStsFileHash& stsFileHash, const char * str)
{
char * x=0;
int hi=0, lo=strtol(str,&x,10);
if(x!=0 && *x=='-') {
hi=atoi(x+1);
}
else hi=lo;
if(lo>0 && hi>=lo) stsFileHash.SetDefaultSize(lo,hi);
else throw runtime_error("bad range: "+string(str));
}
int CMain::ParseCmdline()
{
int optchar;
while((optchar=getopt(argc,argv,"+hVf:m:n:w:g:o:t:p:v:d:a:x:u:b:l:"))!=-1) {
switch(optchar) {
case 'h': Help(); break;
case 'V': Version(); break;
case 'a':
m_AlignMode=(optarg[0]=='a'?eAlways:
optarg[0]=='f'?eFallback:eNever);
if(m_AlignMode==eNever) {
fprintf(stderr,"? Unknown alignment mode `%s' ignored\n",
optarg);
}
break;
case 'm':
if(strcmp(optarg,"id")==0) ofmt=2;
else pcrMachine.SetMargin(atoi(optarg));
break;
case 'w': stsFileHash.SetHash(CHashSet(atoi(optarg), stsFileHash.GetWordCount())); break;
case 'n': m_MaxMismatch=atoi(optarg); break;
case 'g': m_MaxGaps=atoi(optarg); break;
case 'f': stsFileHash.SetHash(CHashSet(stsFileHash.GetWordSize(), atoi(optarg))); break;
case 'd': SetDefaultSize(stsFileHash,optarg); break;
case 'o': ofile=optarg; break;
case 't': ofmt=atoi(optarg); break;
case 'p': have_postprocess=true;
postprocess=*optarg=='+'?true:*optarg=='-'?false:postprocess;
break;
#ifndef STANDALONE
case 'b': m_blastdbs=*optarg=='+'?true:*optarg=='-'?false:m_blastdbs;
break;
case 'l': m_gilist = optarg; break;
#endif
case 'v':
ParseVerbose(optarg);
break;
case 'x':
if(*optarg=='+')
stsFileHash.SetFlags(CStsFileHash::fAllowOverhang,true);
else if(*optarg=='-')
stsFileHash.SetFlags(CStsFileHash::fAllowOverhang,false);
break;
case 'u':
if(*optarg=='+')
stsFileHash.SetFlags(CStsFileHash::fUnmaskPrimers,true);
else if(*optarg=='-')
stsFileHash.SetFlags(CStsFileHash::fUnmaskPrimers,false);
break;
}
}
if(done) return 0;
if(optind >= argc) { Help(stderr); return 1; }
// Parse compat options
for(; optindGetOverhangChars(ISts::eLeft);
//int ovhg2 = sts->GetOverhangChars(ISts::eRight);
// pos1 += ovhg1;
// pos2 -= ovhg2;
if (show_midpt)
sprintf(position,"%d", 1 + (pos1+pos2-1)/2);
else
sprintf(position,"%d..%d",pos1+1,pos2);
fprintf(m_Out,"%-10s %-16s %-14.*s %.*s\n",
seq_label,position,
sts->GetName().length(),sts->GetName().data(),
sts->GetDescription().length(),
sts->GetDescription().data());
return 1;
}
class CPcrMachineCallbackTabular:public CPcrMachineCallback
{
public:
CPcrMachineCallbackTabular(FILE * out, bool showalign, int gaps):
CPcrMachineCallback(out),
m_ShowAlign(showalign),m_Matrix(127,gaps),m_SeqData(0),m_SeqLength(0) {}
virtual ~CPcrMachineCallbackTabular() throw () {}
virtual void CbkMatch(const ISts * sts, unsigned pos1, unsigned pos2,
const SScore* score) ;
virtual void CbkSequenceData(const char * data, unsigned size) {
// delete[] m_SeqData;
// m_SeqData=new char[(m_SeqLength=size)+1];
// memcpy(m_SeqData,data,size);
// m_SeqData[size]=0;
m_SeqData=data;
m_SeqLength=size;
}
protected:
bool m_ShowAlign;
CLcsMatrix m_Matrix;
const char * m_SeqData;
unsigned m_SeqLength;
};
void CPcrMachineCallbackTabular::CbkMatch (
const ISts * sts,
unsigned pos1, unsigned pos2,
const SScore* score)
{
int mism=score->mism_l+score->mism_r;
int gaps=score->gaps_l+score->gaps_r;
int len1=sts->GetPrimerLength(ISts::eLeft);
int len2=sts->GetPrimerLength(ISts::eRight);
int ovhg1=sts->GetOverhangChars(ISts::eLeft);
int ovhg2=sts->GetOverhangChars(ISts::eRight);
const char * data1=sts->GetPrimerData(ISts::eLeft);
const char * data2=sts->GetPrimerData(ISts::eRight);
if(m_ShowAlign && m_SeqData && pos2<=m_SeqLength) {
vector left, right;
m_Matrix.Build(
m_SeqData+pos2-len2-ovhg2,m_SeqData+m_SeqLength-ovhg2,data2,len2);
m_Matrix.Graph(
m_SeqData+pos2-len2-ovhg2,m_SeqData+m_SeqLength-ovhg2,data2,len2,
right);
m_Matrix.Stat(
m_SeqData+pos2-len2-ovhg2,m_SeqData+m_SeqLength-ovhg2,data2,len2);
mism = m_Matrix.GetMismatches();
gaps = m_Matrix.GetGaps();
m_Matrix.Build >(
m_SeqData+pos1+len1+ovhg1-1,m_SeqData,data1+len1-1,len1);
m_Matrix.Graph >(
m_SeqData+pos1+len1+ovhg1-1,m_SeqData,data1+len1-1,len1,left);
m_Matrix.Stat >(
m_SeqData+pos1+len1+ovhg1-1,m_SeqData,data1+len1-1,len1);
mism += m_Matrix.GetMismatches();
gaps += m_Matrix.GetGaps();
int l=max(int(m_SeqId.length()),int(sts->GetName().length()));
int d=score->actlen-len1-len2;
string stsname(sts->GetName().data(),sts->GetName().length());
fprintf(m_Out,
"#####################################"
"#####################################\n"
"# STS %*s %s...%d...%s\n"
"# %.*s %s %d %s\n"
"# Seq %*s %s...%d...%s\n",
l,stsname.c_str(),
left[0].c_str(),d,right[0].c_str(),
l," ",
left[2].c_str(),d,right[2].c_str(),
l,m_SeqId.c_str(),
left[1].c_str(),d,right[1].c_str());
}
pos1 += ovhg1;
pos2 -= ovhg2;
fprintf(m_Out,"%s\t%.*s\t%c\t%d\t%d\t%d/%d-%d\t%d\t%d\t%.*s\n",
m_SeqId.c_str(),
sts->GetName().length(),sts->GetName().data(),
sts->GetDirection(),
pos1+1,pos2,
score->actlen,
sts->GetSizeLo(),sts->GetSizeHi(),
mism, gaps,
sts->GetDescription().length(),
sts->GetDescription().data());
}
class CPcrFastaProcessor:public IFastaReaderCallback
{
public:
virtual ~CPcrFastaProcessor() throw () { if( !m_NoCopySeq ) free(m_Sequence); }
CPcrFastaProcessor(CPcrMachine* pmachine, bool noCopySeq = false ):
m_Sequence(0),m_Size(0),m_Capacity(0), m_NoCopySeq( noCopySeq )
{ m_PcrMachine=pmachine; }
virtual void CbkDefline(const char * , unsigned ) {}
virtual void CbkIdent(const char * ident, unsigned length);
virtual void CbkSeqline(const char * data, unsigned length);
virtual void CbkEntryEnd();
protected:
CPcrMachine * m_PcrMachine;
string m_Ident;
char * m_Sequence;
unsigned m_Size;
unsigned m_Capacity;
bool m_NoCopySeq;
};
void CPcrFastaProcessor::CbkIdent(const char * ident, unsigned length)
{
m_Ident.assign(ident,length);
}
void CPcrFastaProcessor::CbkSeqline(const char * seq, unsigned length)
{
if( m_NoCopySeq ) {
assert( m_Size == 0 && m_Sequence == 0 );
m_Size = length;
m_Sequence = const_cast(seq);
} else {
while(m_Size+length>=m_Capacity)
m_Sequence=(char*)realloc(m_Sequence,m_Capacity+=16192);
memcpy(m_Sequence+m_Size,seq,length);
m_Size+=length;
}
}
void CPcrFastaProcessor::CbkEntryEnd()
{
if( !m_NoCopySeq ) { if(m_Sequence) m_Sequence[m_Size]=0; }
m_PcrMachine->ProcessSequence(m_Ident.c_str(),m_Sequence,m_Size);
m_Size=0;
if( m_NoCopySeq ) { m_Sequence = 0; }
m_Ident.clear();
}
int CMain::Execute()
{
stsFileHash.SetOneTimeRun(true);
do {
CStsFileCallbackDefault cbk;
stsFileHash.ReadStsFile(stsfile, &cbk);
} while(0);
do {
FILE * out=ofile.length()?fopen64(ofile.c_str(),"w"):stdout;
if(out==0)
throw runtime_error(ofile+": "+strerror(errno));
auto_ptr cbk(0);
switch(ofmt) {
case 4:
case 3:
cbk.reset(new CPcrMachineCallbackTabular(out,ofmt==4,m_MaxGaps));
break;
case 2:
case 1:
default:
cbk.reset(new CPcrMachineCallbackClassic(out,ofmt==2));
break;
}
// if(!have_postprocess) {
// if(pcrMachine.GetMaxIndels() ||
// stsFileHash.GetHash().GetWordCount()>1) {
// postprocess=true;
// } else {
// postprocess=false;
// }
// }
CPcrMachinePostprocess post(cbk.get());
if(postprocess)
pcrMachine.SetCallback(&post);
else
pcrMachine.SetCallback(cbk.get());
CPcrProgressCallback pgscbk(verbose);
if(verbose) pcrMachine.SetProgressCallback(&pgscbk);
pcrMachine.SetStsHash(&stsFileHash);
if(m_MaxGaps) {
switch(m_AlignMode) {
case eNever:
m_AlignL=new CAlignFast(m_MaxMismatch,m_MaxGaps);
m_AlignR=new CAlignFast(m_MaxMismatch,m_MaxGaps);
break;
case eAlways:
m_AlignL=new CAlignLCS(m_MaxMismatch,m_MaxGaps);
m_AlignR=new CAlignLCS(m_MaxMismatch,m_MaxGaps);
break;
case eFallback:
m_AlignL=new CAlignCompromise(m_MaxMismatch,m_MaxGaps);
m_AlignR=new CAlignCompromise(m_MaxMismatch,m_MaxGaps);
break;
default:
throw logic_error("Invalig align mode");
}
}
else if(m_MaxMismatch) {
m_AlignL=new CAlignNoGaps(m_MaxMismatch);
m_AlignR=new CAlignNoGaps(m_MaxMismatch);
}
else {
m_AlignL=new CAlignExact();
m_AlignR=new CAlignExact();
}
pcrMachine.SetAligner(m_AlignL,m_AlignR);
CPcrFastaProcessor processor(&pcrMachine
#ifndef STANDALONE
, m_blastdbs
#endif
);
if(fafiles.size()) {
for(list::const_iterator f=fafiles.begin();
f!=fafiles.end(); ++f) {
#ifndef STANDALONE
if( m_blastdbs ) {
vector volumes;
try {
CSeqDB::FindVolumePaths( *f, CSeqDB::eNucleotide, volumes, 0, true );
} catch(exception& e) {
cerr << "? Warning: CSeqDB::FindVolumePaths( \"" << *f << "\", CSeqDB::eNucleotide, volumes, 0, true ); failed with error: " << e.what() << "\n";
volumes.clear();
volumes.push_back( *f );
} catch(...) {
cerr << "? Warning: CSeqDB::FindVolumePaths( \"" << *f << "\", CSeqDB::eNucleotide, volumes, 0, true ); failed with unknown exception\n";
volumes.clear();
volumes.push_back( *f );
}
for( vector::const_iterator v = volumes.begin(); v != volumes.end(); ++v ) {
auto_ptr seqDB( 0 );
try {
CSeqDBGiList * lst = (m_gilist.length() ? new CSeqDBFileGiList( m_gilist ) : 0 );
seqDB.reset( new CSeqDB( *v, CSeqDB::eNucleotide, lst ) );
} catch(exception& e) {
throw runtime_error( "Failed to open blastdb volume " + *v + ": " + e.what() );
} catch(...) {
throw runtime_error( "Failed to open blastdb volume " + *v + ": unknown error" );
}
processor.CbkFileBegin();
for( CSeqDBIter i = seqDB->Begin(); i; ++i ) {
list > ids = seqDB->GetSeqIDs( i.GetOID() );
if( ids.size() == 0 ) {
ostringstream err;
err << "Bad entry in " << *f << " (" << *v << ") " << " oid " << i.GetOID() << ": no seqids\n";
throw runtime_error( err.str() );
//cerr << "? Warning: " << err.str();
//continue;
}
string ident;
for( list >::const_iterator x = ids.begin(); x != ids.end(); ++x ) {
if( x != ids.begin() ) ident += "|";
ident += (*x)->AsFastaString();
}
processor.CbkEntryBegin();
processor.CbkIdent( ident.c_str(), ident.length() );
string seq;
seqDB->GetSequenceAsString( i.GetOID(), seq );
processor.CbkSeqline( seq.c_str(), seq.length() );
processor.CbkEntryEnd();
}
}
processor.CbkFileEnd();
} else {
#endif
if(*f=="-") {
CFastaReader reader("/dev/stdin");
reader.SetCvtTable(CFastaReader::sm_NucleotidesUc);
reader.ReadFile(&processor);
} else {
CFastaReader reader(*f);
reader.SetCvtTable(CFastaReader::sm_NucleotidesUc);
reader.ReadFile(&processor);
}
#ifndef STANDALONE
}
#endif
}
}
else {
CFastaReader reader("/dev/stdin");
reader.SetCvtTable(CFastaReader::sm_NucleotidesUc);
reader.ReadFile(&processor);
}
delete m_AlignR;
delete m_AlignL;
fclose(out);
} while(0);
return 0;
}
int CMain::Version()
{
done=true;
puts("e-PCR cmdline tool version " VERSION);
return 0;
}
int CMain::Run()
{
if(int rc=ParseCmdline() ) return rc;
if(done) return 0;
return Execute();
}
int main(int argc, char ** argv)
{
// try {
CMain app(argc,argv);
return app.Run();
/*
}
catch(logic_error& e) {
fprintf(stderr,"! Fatal: Internal error %s\n",e.what());
}
catch(exception& e) {
fprintf(stderr,"! Fatal: %s\n",e.what());
}
catch(...) {
fprintf(stderr,"! Fatal: Unknown error\n");
}
return 100;
*/
}
/*
* $Log: e-PCR_main.cpp,v $
* Revision 1.25 2008/06/18 14:45:33 rotmistr
* Fixed problem with -d x-X parameter being reset if -w or some others are used after it.
*
* Revision 1.24 2008/06/16 16:02:40 rotmistr
* *** empty log message ***
*
* Revision 1.23 2008/04/28 16:38:45 rotmistr
* Applied patch to build with gcc-4.3
*
* Revision 1.22 2008/03/27 14:36:58 rotmistr
* Added assert.h to make it compiling with VC8
*
* Revision 1.21 2008/03/26 16:04:29 rotmistr
* Added support for blastdb files
*
* Revision 1.20 2007/07/05 16:05:58 rotmistr
* Made things compileable by MS Visual C++ 8.0
*
* Revision 1.19 2005/06/14 16:46:41 rotmistr
* Changed report format for floppy tails
*
* Revision 1.18 2004/10/26 17:16:33 rotmistr
* Added 5'-end masking for primers
*
* Revision 1.17 2004/06/08 20:32:51 rotmistr
* Fixup for gap+insert special case
*
* Revision 1.16 2004/06/08 16:14:55 rotmistr
* *** empty log message ***
*
* Revision 1.15 2004/06/03 23:37:19 rotmistr
* New aligner added.
*
* Revision 1.14 2004/04/06 04:53:17 rotmistr
* All is compileable with BCC5.5 and runnable on WIndows
*
* Revision 1.13 2004/04/01 16:37:41 rotmistr
* Cleaned after adding windows capabilities
*
* Revision 1.12 2004/04/01 05:57:52 rotmistr
* Compilable with borland C++
*
* Revision 1.11 2004/03/30 21:06:53 rotmistr
* Fixes for setting default STS size range.
*
* Revision 1.10 2004/03/30 19:11:18 rotmistr
* STS default size
*
* Revision 1.9 2004/03/30 19:08:03 rotmistr
* default STS size is tunnable now
*
* Revision 1.8 2004/03/26 17:02:13 rotmistr
* Compat-options are now allowed everywhere, and multiple fasta files can be used.
*
* Revision 1.7 2004/03/25 19:36:52 rotmistr
* API: separate left and right primers mism/gaps in forward API
*
* Revision 1.6 2004/03/23 22:35:25 rotmistr
* Fixed processing of -mid flag in cmdline
* Fixed destructor for fasta reader
* Removed cgi
*
* Revision 1.5 2004/03/07 06:35:59 rotmistr
* Many bugfixes and optimisations -- cgi is to go to production
*
* Revision 1.4 2004/02/04 21:23:22 rotmistr
* - gcc-3.3.2 compatible
* - better postfiltering for reverse-e-PCR for discontiguos words
* - cgi added, that supports:
* -- contig to chromosome mapping
* -- simple mapviewer links
* -- unists links
* -- discontiguos words
*
* Revision 1.3 2004/01/28 23:27:02 rotmistr
* "Best of overlapping" hit selection postprocessor added.
*
* Revision 1.2 2004/01/08 23:22:41 rotmistr
* Fixed init error in faread,
* Adjusted output to standard,
* Added output format style and output file to parameters.
*
* Revision 1.1.1.1 2003/12/23 18:17:28 rotmistr
* Package that includes e-PCR, reverse e-PCR, and sequence data preparation
* program for reverse e-PCR looks ready
*
*/
e-PCR-2.3.12/fahash.hpp 0000644 0011377 0001062 00000024327 11745334032 014354 0 ustar rotmistr contig /* $Id: fahash.hpp,v 1.15 2007/07/11 20:49:29 rotmistr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* =========================================================================
*
* Author: Kirill Rotmistrovsky
*
* ========================================================================= */
#ifndef EPCR_HASH__HPP
#define EPCR_HASH__HPP
#include
#include
#include
#include
#include
#include