clustalx-2.1/version.h0000644000175000017500000000104111470725216014650 0ustar ddineenddineen/* * clustal version strings, maybe dependent on build state * as controlled by compile.h. * * Created: 17-01-07,Nigel Brown(EMBL). * * Changes: * * 31-01-07,Nigel Brown(EMBL): fixed _CLU_VERSION_STRING_ typo. * 27-06-07,Nigel Brown(EMBL): public release 2.0. */ #ifndef VERSION_H #define VERSION_H #include "clustalW/clustalw_version.h" //AW: not needed anymore #include "compile.h" #define CLUSTALX_NAME "ClustalX" /* version number between X and W are same for now */ #define CLUSTALX_VERSION CLUSTALW_VERSION #endif //VERSION_H clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/qconfig.pri0000644000175000017500000000076511470725216023525 0ustar ddineenddineen#configuration CONFIG += no_mocdepend release stl qt_no_framework QT_ARCH = i386 QT_EDITION = OpenSource QT_CONFIG += qt3support accessibility minimal-config small-config medium-config large-config full-config reduce_exports ipv6 clock-monotonic mremap getaddrinfo ipv6ifname getifaddrs inotify jpeg mng png gif tiff freetype zlib nis iconv release #versioning QT_VERSION = 4.3.1 QT_MAJOR_VERSION = 4 QT_MINOR_VERSION = 3 QT_PATCH_VERSION = 1 QMAKE_RPATHDIR += "/usr/local/Trolltech/Qt-4.3.1/lib" clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/yacc.prf0000644000175000017500000000406711470725216024630 0ustar ddineenddineen# # Yacc extra-compiler for handling files specified in the YACCSOURCES variable # { yacc_decl.name = Yacc header yacc_decl.input = YACCSOURCES yacc_decl.variable_out = GENERATED_FILES isEmpty(QMAKE_YACCFLAGS_MANGLE) { QMAKE_YACCFLAGS_MANGLE = -p ${QMAKE_FILE_BASE} QMAKE_YACC_HEADER = y.tab.h QMAKE_YACC_SOURCE = y.tab.c } else { QMAKE_YACCFLAGS_MANGLE ~= s/\\$base/${QMAKE_FILE_BASE}/g #backwards compat QMAKE_YACC_HEADER ~= s/\\$base/${QMAKE_FILE_BASE}/g QMAKE_YACC_SOURCE ~= s/\\$base/${QMAKE_FILE_BASE}/g } QMAKE_YACCDECLFLAGS = $$QMAKE_YACCFLAGS !yacc_no_name_mangle:QMAKE_YACCDECLFLAGS += $$QMAKE_YACCFLAGS_MANGLE yacc_decl.commands = \ $$QMAKE_YACC $$QMAKE_YACCDECLFLAGS ${QMAKE_FILE_IN}$$escape_expand(\n\t) \ $$QMAKE_DEL_FILE $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_H)} $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_CPP)}$$escape_expand(\n\t) \ $$QMAKE_MOVE $${QMAKE_YACC_HEADER} $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_H)}$$escape_expand(\n\t) \ $$QMAKE_MOVE $${QMAKE_YACC_SOURCE} $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_CPP)}$$escape_expand(\n\t) yacc_decl.output = $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_H)} silent:yacc_decl.commands = @echo Yacc ${QMAKE_FILE_IN} && $$yacc_decl.commands QMAKE_EXTRA_COMPILERS += yacc_decl } { yacc_impl.name = source for ${QMAKE_FILE_IN} yacc_impl.input = YACCSOURCES yacc_impl.variable_out = GENERATED_SOURCES yacc_impl.commands = $$escape_expand(\n) # We don't want any commands where, but if command is empty no rules are created yacc_impl.depends = $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_H)} # Make sure we depend on the step above yacc_impl.output = $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_yacc$${first(QMAKE_EXT_CPP)} # Faked output from this step, output really created in step above QMAKE_EXTRA_COMPILERS += yacc_impl } clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/warn_on.prf0000644000175000017500000000024111470725216025342 0ustar ddineenddineenCONFIG -= warn_off QMAKE_CFLAGS += $$QMAKE_CFLAGS_WARN_ON QMAKE_CXXFLAGS += $$QMAKE_CXXFLAGS_WARN_ON QMAKE_OBJECTIVE_CFLAGS += $$QMAKE_OBJECTIVE_CFLAGS_WARN_ON clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/unix/thread.prf0000644000175000017500000000106311470725216026134 0ustar ddineenddineen!isEmpty(QMAKE_CFLAGS_THREAD) { QMAKE_CFLAGS += $$QMAKE_CFLAGS_THREAD QMAKE_EXPORT_CFLAGS += $$QMAKE_CFLAGS_THREAD } !isEmpty(QMAKE_CXXFLAGS_THREAD) { QMAKE_CXXFLAGS += $$QMAKE_CXXFLAGS_THREAD QMAKE_EXPORT_CXXFLAGS += $$QMAKE_CXXFLAGS_THREAD } INCLUDEPATH += $$QMAKE_INCDIR_THREAD LIBS += $$QMAKE_LIBS_THREAD !isEmpty(QMAKE_LFLAGS_THREAD):QMAKE_LFLAGS += $$QMAKE_LFLAGS_THREAD !isEmpty(QMAKE_CC_THREAD):QMAKE_CC = $$QMAKE_CC_THREAD !isEmpty(QMAKE_CXX_THREAD):QMAKE_CXX = $$QMAKE_CXX_THREAD !isEmpty(QMAKE_LINK_THREAD):QMAKE_LINK = $$QMAKE_LINK_THREAD clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/unix/0000755000175000017500000000000011470725216024154 5ustar ddineenddineenclustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/uic.prf0000644000175000017500000001043411470725216024464 0ustar ddineenddineen isEmpty(QMAKE_UIC3) { win32:QMAKE_UIC3 = $$[QT_INSTALL_BINS]\uic3.exe else:QMAKE_UIC3 = $$[QT_INSTALL_BINS]/uic3 } isEmpty(QMAKE_UIC) { win32:QMAKE_UIC = $$[QT_INSTALL_BINS]\uic.exe else:QMAKE_UIC = $$[QT_INSTALL_BINS]/uic } isEmpty(UI_DIR):UI_DIR = . isEmpty(UI_SOURCES_DIR):UI_SOURCES_DIR = $$UI_DIR isEmpty(UI_HEADERS_DIR):UI_HEADERS_DIR = $$UI_DIR isEmpty(QMAKE_MOD_UIC):QMAKE_MOD_UIC = ui_ # Allow FORMS3 to contain old UIC3 forms, while FORMS contains new # UIC files in the same project. However, if CONFIG+=uic3 and no # FORMS3 is defined, FORMS may only contain old UIC3 files. !uic3|!isEmpty(FORMS3) { # If we have CONFIG+=uic3 and no FORMS3, then don't do this step, # as UIC3 files don't need a ui_.h file uic.commands = $$QMAKE_UIC ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT} uic.depend_command = "$$QMAKE_UIC" -d "${QMAKE_FILE_IN}" uic.output = $$UI_HEADERS_DIR/$${QMAKE_MOD_UIC}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_H)} uic.input = FORMS uic.variable_out = GENERATED_FILES uic.CONFIG += no_link target_predeps uic.name = UIC ${QMAKE_FILE_IN} silent:uic.commands = @echo uic ${QMAKE_FILE_IN} && $$uic.commands QMAKE_EXTRA_COMPILERS += uic INCREDIBUILD_XGE += uic } INCLUDEPATH += $$UI_HEADERS_DIR uic3 { isEmpty(FORMS3) { UIC3_FORMS = FORMS !build_pass:message("Project contains CONFIG+=uic3, but no files in FORMS3; .ui files in FORMS treated as UIC3 form files.") } else { UIC3_FORMS = FORMS3 } uic3_decl.commands = $$QMAKE_UIC3 ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT} uic3_decl.dependency_type = TYPE_UI uic3_decl.output = $$UI_HEADERS_DIR/${QMAKE_FILE_BASE}$${first(QMAKE_EXT_H)} uic3_decl.input = $$UIC3_FORMS uic3_decl.CONFIG += no_link uic3_decl.variable_out = GENERATED_FILES UIC3_HEADERS uic3_decl.name = UIC3 Decl ${QMAKE_FILE_IN} silent:uic3_decl.commands = @echo uic3 ${QMAKE_FILE_IN} && $$uic3_decl.commands QMAKE_EXTRA_COMPILERS += uic3_decl uic3_impl.commands = $$QMAKE_UIC3 -impl ${QMAKE_FILE_BASE}$${first(QMAKE_EXT_H)} ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT} uic3_impl.dependency_type = TYPE_UI uic3_impl.output = $$UI_SOURCES_DIR/${QMAKE_FILE_BASE}$${first(QMAKE_EXT_CPP)} uic3_impl.depends = $$UI_HEADERS_DIR/${QMAKE_FILE_BASE}$${first(QMAKE_EXT_H)} uic3_impl.input = $$UIC3_FORMS uic3_impl.variable_out = GENERATED_SOURCES uic3_impl.name = UIC3 Impl ${QMAKE_FILE_IN} silent:uic3_impl.commands = @echo uic3 -impl ${QMAKE_FILE_IN} && $$uic3_impl.commands QMAKE_EXTRA_COMPILERS += uic3_impl ### add a moc step load(moc) uic3_moc.commands = $$moc_header.commands uic3_moc.output = $$moc_header.output uic3_moc.depends = $$UI_HEADERS_DIR/${QMAKE_FILE_BASE}$${first(QMAKE_EXT_H)} uic3_moc.input = UIC3_HEADERS uic3_moc.variable_out = GENERATED_SOURCES uic3_moc.name = $$moc_header.name QMAKE_EXTRA_COMPILERS += uic3_moc } defineReplace(imageCollectionCmd) { unset(EMBEDDED_IMAGES) RET = for(image, $$list($$split(1))) { EMBEDDED_IMAGES += $$image count(EMBEDDED_IMAGES, 5) { isEmpty(RET): RET += echo $$EMBEDDED_IMAGES > images.tmp $$escape_expand(\n\t) else: RET += echo $$EMBEDDED_IMAGES >> images.tmp $$escape_expand(\n\t) unset(EMBEDDED_IMAGES) } } !isEmpty(EMBEDDED_IMAGES):RET += echo $$EMBEDDED_IMAGES >> images.tmp $$escape_expand(\n\t) !isEmpty(RET) { RET += $$QMAKE_UIC3 -embed $$TARGET -f images.tmp -o $$2 $$escape_expand(\n\t) return($$RET) } return($$QMAKE_UIC3 -embed $$TARGET $$1 -o $$2) } image_collection.output = qmake_image_collection$${first(QMAKE_EXT_CPP)} image_collection.variable_out = SOURCES image_collection.input = IMAGES image_collection.CONFIG += combine image_collection.name = UIC3 Image collection in ${QMAKE_FILE_OUT} !win32 { image_collection.commands = $$QMAKE_UIC3 -embed $$TARGET ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT} silent:image_collection.commands = @echo uic3 -embed ${QMAKE_FILE_IN} && $$image_collection.commands } else { image_collection.commands = ${QMAKE_FUNC_imageCollectionCmd} silent:image_collection.commands = @echo uic3 -embed $$TARGET -f images.tmp && $image_collection.commands } QMAKE_EXTRA_COMPILERS += image_collection clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/static.prf0000644000175000017500000000043211470725216025170 0ustar ddineenddineenCONFIG -= shared dll contains(TEMPLATE, ".*lib"):{ CONFIG += staticlib unix:!embedded { QMAKE_CFLAGS += $$QMAKE_CFLAGS_STATIC_LIB QMAKE_CXXFLAGS += $$QMAKE_CXXFLAGS_STATIC_LIB } } !static_and_shared:fix_output_dirs:fixExclusiveOutputDirs(static, shared) clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/resources.prf0000644000175000017500000000237611470725216025724 0ustar ddineenddineenisEmpty(QMAKE_RCC) { win32:QMAKE_RCC = $$[QT_INSTALL_BINS]\rcc.exe else:QMAKE_RCC = $$[QT_INSTALL_BINS]/rcc } isEmpty(RCC_DIR):RCC_DIR = . isEmpty(QMAKE_RESOURCE_PREFIX):QMAKE_RESOURCE_PREFIX = /tmp/ isEmpty(QMAKE_MOD_RCC):QMAKE_MOD_RCC = qrc resource_combine { rcc.CONFIG += combine rcc.output = $$RCC_DIR/$${first(QMAKE_MOD_RCC)}_combined$${first(QMAKE_EXT_CPP)} } else { rcc.output = $$RCC_DIR/$${first(QMAKE_MOD_RCC)}_${QMAKE_FILE_BASE}$${first(QMAKE_EXT_CPP)} } !contains(QMAKE_RESOURCE_FLAGS, -root):!isEmpty(QMAKE_RESOURCE_ROOT):QMAKE_RESOURCE_FLAGS += -root $$QMAKE_RESOURCE_ROOT !contains(QMAKE_RESOURCE_FLAGS, -name) { resource_combine { isEmpty(QMAKE_RESOURCE_NAME):!isEmpty(TARGET):QMAKE_RESOURCE_NAME = $$TARGET !isEmpty(QMAKE_RESOURCE_NAME):QMAKE_RESOURCE_FLAGS += -name $$QMAKE_RESOURCE_NAME } else { QMAKE_RESOURCE_FLAGS += -name ${QMAKE_FILE_BASE} } } rcc.commands = "$$QMAKE_RCC" $$QMAKE_RESOURCE_FLAGS "${QMAKE_FILE_IN}" -o "${QMAKE_FILE_OUT}" rcc.depend_command = "$$QMAKE_RCC" -list $$QMAKE_RESOURCE_FLAGS "${QMAKE_FILE_IN}" rcc.input = RESOURCES rcc.variable_out = SOURCES rcc.name = RCC ${QMAKE_FILE_IN} silent:rcc.commands = @echo rcc "${QMAKE_FILE_IN}" && $$rcc.commands QMAKE_EXTRA_COMPILERS += rcc clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/release.prf0000644000175000017500000000041511470725216025322 0ustar ddineenddineenCONFIG -= debug QMAKE_CFLAGS += $$QMAKE_CFLAGS_RELEASE QMAKE_CXXFLAGS += $$QMAKE_CXXFLAGS_RELEASE QMAKE_OBJECTIVE_CFLAGS += $$QMAKE_OBJECTIVE_CFLAGS_RELEASE QMAKE_LFLAGS += $$QMAKE_LFLAGS_RELEASE !debug_and_release:fix_output_dirs:fixExclusiveOutputDirs(release, debug)clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/qt_functions.prf0000644000175000017500000000435711470725216026427 0ustar ddineenddineendefineReplace(qtLibraryTarget) { unset(LIBRARY_NAME) LIBRARY_NAME = $$1 mac:!static:contains(QT_CONFIG, qt_framework) { QMAKE_FRAMEWORK_BUNDLE_NAME = $$LIBRARY_NAME export(QMAKE_FRAMEWORK_BUNDLE_NAME) } contains(TEMPLATE, .*lib):CONFIG(debug, debug|release) { !debug_and_release|build_pass { mac:RET = $$member(LIBRARY_NAME, 0)_debug else:win32:RET = $$member(LIBRARY_NAME, 0)d } } isEmpty(RET):RET = $$LIBRARY_NAME return($$RET) } defineTest(qtAddLibrary) { INCLUDEPATH = $$QMAKE_INCDIR_QT/$$1 $$INCLUDEPATH LIB_NAME = $$1 unset(LINKAGE) mac { CONFIG(qt_framework, qt_framework|qt_no_framework) { #forced QMAKE_FRAMEWORKPATH *= $${QMAKE_LIBDIR_QT} FRAMEWORK_INCLUDE = $$QMAKE_LIBDIR_QT/$${LIB_NAME}.framework/Headers !qt_no_framework_direct_includes:exists($$FRAMEWORK_INCLUDE) { INCLUDEPATH -= $$FRAMEWORK_INCLUDE INCLUDEPATH = $$FRAMEWORK_INCLUDE $$INCLUDEPATH } LINKAGE = -framework $${LIB_NAME} } else:!qt_no_framework { #detection for(frmwrk_dir, $$list($$QMAKE_LIBDIR_QT $$QMAKE_LIBDIR $$(DYLD_FRAMEWORK_PATH) /Library/Frameworks)) { exists($${frmwrk_dir}/$${LIB_NAME}.framework) { QMAKE_FRAMEWORKPATH *= $${frmwrk_dir} FRAMEWORK_INCLUDE = $$frmwrk_dir/$${LIB_NAME}.framework/Headers !qt_no_framework_direct_includes:exists($$FRAMEWORK_INCLUDE) { INCLUDEPATH -= $$FRAMEWORK_INCLUDE INCLUDEPATH = $$FRAMEWORK_INCLUDE $$INCLUDEPATH } LINKAGE = -framework $${LIB_NAME} break() } } } } isEmpty(LINKAGE) { CONFIG(debug, debug|release) { win32:LINKAGE = -l$${LIB_NAME}d mac:LINKAGE = -l$${LIB_NAME}_debug } isEmpty(LINKAGE):LINKAGE = -l$${LIB_NAME} } !isEmpty(QMAKE_LSB) { QMAKE_LFLAGS *= --lsb-libpath=$$$$QMAKE_LIBDIR_QT QMAKE_LFLAGS *= -L/opt/lsb/lib QMAKE_LFLAGS *= --lsb-shared-libs=$${LIB_NAME} } LIBS += $$LINKAGE export(LIBS) export(INCLUDEPATH) export(QMAKE_FRAMEWORKPATH) export(QMAKE_LFLAGS) return(true) } clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/qt_config.prf0000644000175000017500000000124711470725216025657 0ustar ddineenddineenexists($$_QMAKE_CACHE_):QMAKE_QT_CONFIG = $$fromfile($$_QMAKE_CACHE_, QMAKE_QT_CONFIG) isEmpty(QMAKE_QT_CONFIG)|!exists($$QMAKE_QT_CONFIG) { !isEmpty(QT_BUILD_TREE):QMAKE_QT_CONFIG = $$QT_BUILD_TREE/mkspecs/qconfig.pri else:exists($$_QMAKE_CACHE_):infile($$_QMAKE_CACHE_, QT_BUILD_TREE):QMAKE_QT_CONFIG = $$fromfile($$_QMAKE_CACHE_, QT_BUILD_TREE)/mkspecs/qconfig.pri else:exists($$[QT_INSTALL_DATA]/mkspecs/qconfig.pri):QMAKE_QT_CONFIG = $$[QT_INSTALL_DATA]/mkspecs/qconfig.pri } !exists($$QMAKE_QT_CONFIG)|!include($$QMAKE_QT_CONFIG) { debug(1, "Cannot load qconfig.pri!") } else { debug(1, "Loaded .qconfig.pri from ($$QMAKE_QT_CONFIG)") } load(qt_functions) clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/qt.prf0000644000175000017500000001162711470725216024335 0ustar ddineenddineenCONFIG *= moc thread #handle defines win32 { shared:DEFINES += QT_DLL !contains(DEFINES, QT_NODLL) { QT_ENV_DLL = $$(QT_DLL) QT_ENV_NO_DLL = $$(QT_NODLL) !isEmpty(QT_ENV_DLL):isEmpty(QT_ENV_NO_DLL):DEFINES += QT_DLL contains(DEFINES, QT_MAKEDLL)|contains(DEFINES, QT_DLL):QMAKE_QT_DLL = 1 } } CONFIG(release, debug|release):DEFINES += QT_NO_DEBUG no_keywords:DEFINES += QT_NO_KEYWORDS plugin { #Qt plugins static:DEFINES += QT_STATICPLUGIN DEFINES += QT_PLUGIN } #handle includes INCLUDEPATH = $$QMAKE_INCDIR_QT $$INCLUDEPATH #prepending prevents us from picking up "stale" includes win32:INCLUDEPATH += $$QMAKE_INCDIR_QT/ActiveQt # As order does matter for static libs, we reorder the QT variable here TMPLIBS = script svg qt3support sql xml opengl gui network core for(QTLIB, $$list($$TMPLIBS)) { contains(QT, $$QTLIB): QT_ORDERED += $$QTLIB } QT = $$QT_ORDERED for(QTPLUG, $$list($$lower($$unique(QTPLUGIN)))) { qplugin_style = !qt_debug:!qt_release { CONFIG(debug, debug|release):qplugin_style = debug else:qplugin_style = release } else:CONFIG(qt_debug, qt_debug|qt_release) { qplugin_style = debug } else { qplugin_style = release } # Check if the plugin is known to Qt. We can use this to determine # the plugin path. Unknown plugins must rely on the default link path. ACCESSIBLEPLUGINS = qtaccessiblewidgets qtaccessiblecompatwidgets CODECPLUGINS = qcncodecs qjpcodecs qkrcodecs qtwcodecs DECORATIONPLUGINS = qdecorationdefault qdecorationwindows GFXDRIVERPLUGINS = qgfxvnc qscreenvfb qgfxsnap qgfxvga16 qgfxmatrox qgfxvoodoo qgfxtransformed qgfxshadowfb IMAGEPLUGINS = qgif qmng qjpeg INPUTPLUGINS = qimsw-multi MOUSEDRIVERPLUGINS = qtslibmousehandler SQLPLUGINS = qsqldb2 qsqloci qsqltds qsqlodbc qsqlpsql qsqlibase qsqlmysql qsqlite2 qsqlite ALLQTPLUGINS = $$ACCESSIBLEPLUGINS $$CODECPLUGINS $$DECORATIONPLUGINS $$GFXDRIVERPLUGINS $$IMAGEPLUGINS $$INPUTPLUGINS $$MOUSEDRIVERPLUGINS $$SQLPLUGINS QT_PLUGINPATH = contains(ALLQTPLUGINS, $$QTPLUG) { # Determine the plugin path contains(ACCESSIBLEPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/accessible/ contains(CODECPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/codecs/ contains(DECORATIONPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/decorations/ contains(GFXDRIVERPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/gfxdrivers/ contains(IMAGEPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/imageformats/ contains(INPUTPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/inputmethods/ contains(MOUSEDRIVERPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/mousedrivers/ contains(SQLPLUGINS, $$QTPLUG): QT_PLUGINPATH = $$[QT_INSTALL_PLUGINS]/sqldrivers/ } # Generate the plugin linker line target_qt:isEqual(TARGET, QTPLUG) { warning($$TARGET cannot have a QTPLUGIN of $$QTPLUG) } else { QT_LINKAGE = -l$${QTPLUG} win32 { CONFIG(debug, debug|release):QT_LINKAGE = -l$${QTPLUG}d } else:mac { isEqual(qplugin_style, debug):QT_LINKAGE = -l$${QTPLUG}_debug } } !isEmpty(QT_PLUGINPATH): LIBS += -L$$QT_PLUGINPATH LIBS += $$QT_LINKAGE } #specific module settings !isEmpty(QT_BUILD_TREE):QMAKE_LIBDIR = $$QT_BUILD_TREE/lib $$QMAKE_LIBDIR #as above, prepending prevents us from picking up "stale" libs QMAKE_LIBDIR += $$QMAKE_LIBDIR_QT for(QTLIB, $$list($$lower($$unique(QT)))) { unset(qlib_style) !qt_debug:!qt_release { CONFIG(debug, debug|release):qlib_style = debug else:qlib_style = release } else:CONFIG(qt_debug, qt_debug|qt_release) { qlib_style = debug } else { qlib_style = release } unset(qlib) isEqual(QTLIB, gui):qlib = QtGui else:isEqual(QTLIB, network):qlib = QtNetwork else:isEqual(QTLIB, xml):qlib = QtXml else:isEqual(QTLIB, opengl):qlib = QtOpenGL else:isEqual(QTLIB, sql):qlib = QtSql else:isEqual(QTLIB, core):qlib = QtCore else:isEqual(QTLIB, canvas):qlib = QtCanvas else:isEqual(QTLIB, qt3support):qlib = Qt3Support else:isEqual(QTLIB, svg):qlib = QtSvg else:isEqual(QTLIB, script):qlib = QtScript else:message("Unknown QT: $$QTLIB"):qlib = !isEmpty(qlib) { target_qt:isEqual(TARGET, qlib) { warning($$TARGET cannot have a QT of $$QTLIB) } else { DEFINES *= $$upper(QT_$${QTLIB}_LIB) exists($$QMAKE_INCDIR_QT/$$qlib) { INCLUDEPATH -= $$QMAKE_INCDIR_QT/$$qlib INCLUDEPATH = $$QMAKE_INCDIR_QT/$$qlib $$INCLUDEPATH } isEqual(QTLIB, opengl):CONFIG += opengl isEqual(QTLIB, qt3support):DEFINES *= QT3_SUPPORT qtAddLibrary($$qlib) } } } qt_compat { !qt_compat_no_warning:QTDIR_build:warning(***USE of COMPAT inside of QTDIR!**) #just for us INCLUDEPATH *= $$QMAKE_INCDIR_QT/Qt DEFINES *= QT_COMPAT } clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/moc.prf0000644000175000017500000000670211470725216024465 0ustar ddineenddineen #global defaults isEmpty(QMAKE_MOC) { win32:QMAKE_MOC = $$[QT_INSTALL_BINS]\moc.exe else:QMAKE_MOC = $$[QT_INSTALL_BINS]/moc } isEmpty(MOC_DIR):MOC_DIR = . isEmpty(QMAKE_H_MOD_MOC):QMAKE_H_MOD_MOC = moc_ isEmpty(QMAKE_EXT_CPP_MOC):QMAKE_EXT_CPP_MOC = .moc # This function on Windows puts the includes into a .inc file which moc will read, if the project # has more than 30 includes. We do this to overcome a command-line limit on Win < XP # Otherwise the function simply returns the normal command-line for moc defineReplace(mocCmd) { win32:count($$list($$INCPATH), 40, >) { EOC = $$escape_expand(\n\t) if(contains(TEMPLATE, "vc.*")|contains(TEMPLATE_PREFIX, "vc")) { # the VCPROJ generator will replace the \r\h with the coded \r\n: # No other generator understands the \h if(win32-msvc.net|win32-msvc2005): EOC = $$escape_expand(\r\h) else: EOC = $$escape_expand(\\)$$escape_expand(\n\t) } if(win32-msvc.net|win32-msvc2005): if(contains(TEMPLATE, "vc.*")|contains(TEMPLATE_PREFIX, "vc")): EOC = $$escape_expand(\r\h) INCLUDETEMP = mocinclude.tmp unset(INCFILELIST) RET = for(incfile, $$list($$INCPATH)) { INCFILELIST = -I$$incfile isEmpty(RET): RET += @echo $$INCFILELIST> $$INCLUDETEMP $$EOC else: RET += @echo $$INCFILELIST>> $$INCLUDETEMP $$EOC } !isEmpty(INCFILELIST):RET += @echo $$INCFILELIST>> $$INCLUDETEMP $$EOC RET += $$QMAKE_MOC $(DEFINES) @$$INCLUDETEMP $$join(QMAKE_COMPILER_DEFINES, " -D", -D) $$1 -o $$2 return($$RET) } return($$QMAKE_MOC $(DEFINES) $(INCPATH) $$join(QMAKE_COMPILER_DEFINES, " -D", -D) $$1 -o $$2) } #moc headers moc_header.CONFIG = moc_verify moc_header.dependency_type = TYPE_C moc_header.commands = ${QMAKE_FUNC_mocCmd} moc_header.output = $$MOC_DIR/$${QMAKE_H_MOD_MOC}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_CPP)} moc_header.input = HEADERS moc_header.variable_out = SOURCES moc_header.name = MOC ${QMAKE_FILE_IN} silent:moc_header.commands = @echo moc ${QMAKE_FILE_IN} && $$moc_header.commands QMAKE_EXTRA_COMPILERS += moc_header INCREDIBUILD_XGE += moc_header #moc sources moc_source.CONFIG = no_link moc_verify moc_source.dependency_type = TYPE_C moc_source.commands = ${QMAKE_FUNC_mocCmd} moc_source.output = $$MOC_DIR/$${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}$${QMAKE_EXT_CPP_MOC} moc_source.input = SOURCES OBJECTIVE_SOURCES moc_source.name = MOC ${QMAKE_FILE_IN} silent:moc_source.commands = @echo moc ${QMAKE_FILE_IN} && $$moc_source.commands QMAKE_EXTRA_COMPILERS += moc_source INCREDIBUILD_XGE += moc_source #make sure we can include these files INCLUDEPATH += $$MOC_DIR #auto depend on moc unix:!no_mocdepend { moc_source.depends += $$first(QMAKE_MOC) moc_header.depends += $$first(QMAKE_MOC) !contains(TARGET, moc) { #auto build moc isEmpty(QMAKE_MOC_SRC):QMAKE_MOC_SRC = "$(QTDIR)/src/tools/moc" make_moc.target = $$first(QMAKE_MOC) make_moc.commands = (cd $$QMAKE_MOC_SRC && $(MAKE)) QMAKE_EXTRA_TARGETS += make_moc } } #generate a mocclean build_pass|isEmpty(BUILDS):mocclean.depends = compiler_moc_header_clean compiler_moc_source_clean else:mocclean.CONFIG += recursive QMAKE_EXTRA_TARGETS += mocclean #generate a mocables build_pass|isEmpty(BUILDS):mocables.depends = compiler_moc_header_make_all compiler_moc_source_make_all else:mocables.CONFIG += recursive QMAKE_EXTRA_TARGETS += mocables clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/lex.prf0000644000175000017500000000176611470725216024504 0ustar ddineenddineen# # Lex extra-compiler for handling files specified in the LEXSOURCES variable # { lex.name = Lex ${QMAKE_FILE_IN} lex.input = LEXSOURCES lex_included { lex.CONFIG += no_link } else { lex.variable_out = GENERATED_SOURCES } isEmpty(QMAKE_LEXFLAGS_MANGLE):QMAKE_LEXFLAGS_MANGLE = -P${QMAKE_FILE_BASE} QMAKE_LEXEXTRAFLAGS = $$QMAKE_LEXFLAGS !yacc_no_name_mangle:QMAKE_LEXEXTRAFLAGS += $$QMAKE_LEXFLAGS_MANGLE lex.commands = $$QMAKE_LEX $$QMAKE_LEXEXTRAFLAGS ${QMAKE_FILE_IN}$$escape_expand(\n\t) \ $$QMAKE_DEL_FILE $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_lex$${first(QMAKE_EXT_CPP)}$$escape_expand(\n\t) \ $$QMAKE_MOVE lex.${QMAKE_FILE_BASE}.c $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_lex$${first(QMAKE_EXT_CPP)}$$escape_expand(\n\t) lex.output = $${QMAKE_CPP_MOD_MOC}${QMAKE_FILE_BASE}_lex$${first(QMAKE_EXT_CPP)} silent:lex.commands = @echo Lex ${QMAKE_FILE_IN} && $$lex.commands QMAKE_EXTRA_COMPILERS += lex } clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/exclusive_builds.prf0000644000175000017500000001071711470725216027261 0ustar ddineenddineen# fixExclusiveOutputDirs(1config, 2config) # Change all output paths that references 2config to have the string 1config in them defineTest(fixExclusiveOutputDirs) { unset(firstBuild) unset(secondBuild) unset(appendFirstBuild) firstBuild = $$1 secondBuild = $$2 count(ARGS, 2, greaterThan):isEqual($$list($$lower($$3)), false):appendFirstBuild = false else:appendFirstBuild = true isEmpty(QMAKE_DIR_REPLACE):QMAKE_DIR_REPLACE += OBJECTS_DIR MOC_DIR RCC_DIR lessThan(firstBuild, $$secondBuild):eval($${firstBuild}_and_$${secondBuild}_target:QMAKE_DIR_REPLACE += DESTDIR) else:eval($${secondBuild}_and_$${firstBuild}_target:QMAKE_DIR_REPLACE += DESTDIR) for(fix, QMAKE_DIR_REPLACE) { isEmpty($$fix)|isEqual($$fix, .) { eval($$fix = $${firstBuild}) } else:contains($$list($$first($$fix)), .*$${secondBuild}.*) { eval($$fix ~= s/$${secondBuild}/$${firstBuild}/gi) } else:isEqual(appendFirstBuild, true):!contains($$list($$first($$fix)), .*$${firstBuild}.*) { contains($$list($${first($$fix)}), .*/$):eval($$fix = $${first($$fix)}$${firstBuild}) else:eval($$fix = $${first($$fix)}-$${firstBuild}) } export($$fix) } return(true) } # addExclusiveBuilds(1config, 1name, 2config, 2name) # Adds two BUILDS which are exclusive to each other. defineTest(addExclusiveBuilds) { unset(firstBuild) unset(firstBuildName) unset(secondBuild) unset(secondBuildName) firstBuild = $$1 firstBuildName = $$2 secondBuild = $$3 secondBuildName = $$4 contains(TEMPLATE, subdirs) { eval(sub_$${firstBuildName}.target = $$firstBuild) export(sub_$${firstBuildName}.target) eval(sub_$${firstBuildName}.CONFIG = recursive) export(sub_$${firstBuildName}.CONFIG) eval(sub_$${secondBuildName}.target = $$secondBuild) export(sub_$${secondBuildName}.target) eval(sub_$${secondBuildName}.CONFIG = recursive) export(sub_$${secondBuildName}.CONFIG) QMAKE_EXTRA_TARGETS += sub_$${firstBuildName} sub_$${secondBuildName} export(QMAKE_EXTRA_TARGETS) } else:!build_pass { first_BUILDS = second_BUILDS = suffix_BUILDS = Build isEmpty(BUILDS): BUILDPERMUTATIONS = $$suffix_BUILDS else: BUILDPERMUTATIONS = $$BUILDS for(permutation, BUILDPERMUTATIONS) { permutation ~= s/$${suffix_BUILDS}$// isEmpty(permutation): permutationName = else: permutationName = -$$permutation # Makefile target rule eval($${firstBuildName}$${permutation}.target = $${firstBuild}$$lower($${permutationName})) export($${firstBuildName}$${permutation}.target) # IDE name eval($${firstBuildName}$${permutation}.name = $${firstBuildName}$${permutationName}) export($${firstBuildName}$${permutation}.name) # prl import CONFIG option eval($${firstBuildName}$${permutation}.PRL_CONFIG = $${firstBuild}$${permutation}) export($${firstBuildName}$${permutation}.PRL_CONFIG) # Individual CONFIG option eval($${firstBuildName}$${permutation}.CONFIG = $${firstBuild} $${firstBuildName}Build $$eval($${permutation}.CONFIG)) export($${firstBuildName}$${permutation}.CONFIG) eval($${secondBuildName}$${permutation}.target = $${secondBuild}$$lower($${permutationName})) export($${secondBuildName}$${permutation}.target) eval($${secondBuildName}$${permutation}.name = $${secondBuildName}$${permutationName}) export($${secondBuildName}$${permutation}.name) eval($${secondBuildName}$${permutation}.PRL_CONFIG = $${secondBuild}$${permutation}) export($${secondBuildName}$${permutation}.PRL_CONFIG) eval($${secondBuildName}$${permutation}.CONFIG = $${secondBuild} $${secondBuildName}Build $$eval($${permutation}.CONFIG)) export($${secondBuildName}$${permutation}.CONFIG) first_BUILDS += $${firstBuildName}$${permutation} second_BUILDS += $${secondBuildName}$${permutation} } # A mutual exclusive block. CONFIG($${firstBuild}, $${firstBuild}|$${secondBuild}): BUILDS = $$first_BUILDS $$second_BUILDS else: BUILDS = $$second_BUILDS $$first_BUILDS export(BUILDS) } else { eval($${firstBuildName}Build:fixExclusiveOutputDirs($$firstBuild, $$secondBuild, false)) eval($${secondBuildName}Build:fixExclusiveOutputDirs($$secondBuild, $$firstBuild, false)) } return(true) } clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/default_pre.prf0000644000175000017500000000011611470725216026172 0ustar ddineenddineenload(exclusive_builds) CONFIG = lex yacc warn_on debug uic resources $$CONFIG clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/default_post.prf0000644000175000017500000000041211470725216026370 0ustar ddineenddineenCONFIG(debug, debug|release):load(debug) else:load(release) debug_and_release:load(debug_and_release) incredibuild_xge { CONFIG -= incredibuild_xge CONFIG = incredibuild_xge $$CONFIG } QMAKE_INCDIR += $$QMAKE_INCDIR_POST QMAKE_LIBDIR += $$QMAKE_LIBDIR_POST clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/features/0000755000175000017500000000000011470725216023171 5ustar ddineenddineenclustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/common/unix.conf0000644000175000017500000000127211470725216024477 0ustar ddineenddineen# # qmake configuration for common unix # QMAKE_LEX = flex QMAKE_LEXFLAGS += QMAKE_YACC = yacc QMAKE_YACCFLAGS += -d QMAKE_YACCFLAGS_MANGLE += -p $base -b $base QMAKE_YACC_HEADER = $base.tab.h QMAKE_YACC_SOURCE = $base.tab.c QMAKE_SEPARATE_DEBUG_INFO = (test -z \"$(DESTDIR)\" || cd \"$(DESTDIR)\" ; targ=`basename $(TARGET)`; objcopy --only-keep-debug \"\$\$targ\" \"\$\$targ.debug\" && objcopy --strip-debug \"\$\$targ\" && objcopy --add-gnu-debuglink=\"\$\$targ.debug\" \"\$\$targ\" && chmod -x \"\$\$targ.debug\" ) ; QMAKE_INSTALL_SEPARATE_DEBUG_INFO = test -z "$(DESTDIR)" || cd \"$(DESTDIR)\" ; $(INSTALL_FILE) `basename $(TARGET)`.debug $(INSTALL_ROOT)/\$\$target_path/ clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/common/linux.conf0000644000175000017500000000246711470725216024662 0ustar ddineenddineen# # qmake configuration for common linux # QMAKE_CFLAGS_THREAD += -D_REENTRANT QMAKE_CXXFLAGS_THREAD += $$QMAKE_CFLAGS_THREAD QMAKE_INCDIR = QMAKE_LIBDIR = QMAKE_INCDIR_X11 = /usr/X11R6/include QMAKE_LIBDIR_X11 = /usr/X11R6/lib QMAKE_INCDIR_QT = $$[QT_INSTALL_HEADERS] QMAKE_LIBDIR_QT = $$[QT_INSTALL_LIBS] QMAKE_INCDIR_OPENGL = /usr/X11R6/include QMAKE_LIBDIR_OPENGL = /usr/X11R6/lib QMAKE_LIBS = QMAKE_LIBS_DYNLOAD = -ldl QMAKE_LIBS_X11 = -lXext -lX11 -lm QMAKE_LIBS_X11SM = -lSM -lICE QMAKE_LIBS_NIS = -lnsl QMAKE_LIBS_OPENGL = -lGLU -lGL QMAKE_LIBS_OPENGL_QT = -lGL QMAKE_LIBS_THREAD = -lpthread QMAKE_MOC = $$[QT_INSTALL_BINS]/moc QMAKE_UIC = $$[QT_INSTALL_BINS]/uic QMAKE_AR = ar cqs QMAKE_RANLIB = QMAKE_TAR = tar -cf QMAKE_GZIP = gzip -9f QMAKE_COPY = cp -f QMAKE_COPY_FILE = $(COPY) QMAKE_COPY_DIR = $(COPY) -r QMAKE_MOVE = mv -f QMAKE_DEL_FILE = rm -f QMAKE_DEL_DIR = rmdir QMAKE_STRIP = strip QMAKE_STRIPFLAGS_LIB += --strip-unneeded QMAKE_CHK_DIR_EXISTS = test -d QMAKE_MKDIR = mkdir -p QMAKE_INSTALL_FILE = install -m 644 -p QMAKE_INSTALL_PROGRAM = install -m 755 -p include(unix.conf)clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/common/g++.conf0000644000175000017500000000315411470725216024071 0ustar ddineenddineen# # qmake configuration for common gcc # QMAKE_CC = gcc QMAKE_CFLAGS += -pipe QMAKE_CFLAGS_DEPS += -M QMAKE_CFLAGS_WARN_ON += -Wall -W QMAKE_CFLAGS_WARN_OFF += -w QMAKE_CFLAGS_RELEASE += -O2 QMAKE_CFLAGS_DEBUG += -g QMAKE_CFLAGS_SHLIB += -fPIC QMAKE_CFLAGS_STATIC_LIB += -fPIC QMAKE_CFLAGS_YACC += -Wno-unused -Wno-parentheses QMAKE_CFLAGS_HIDESYMS += -fvisibility=hidden QMAKE_CFLAGS_PRECOMPILE += -x c-header -c ${QMAKE_PCH_INPUT} -o ${QMAKE_PCH_OUTPUT} QMAKE_CFLAGS_USE_PRECOMPILE += -include ${QMAKE_PCH_OUTPUT_BASE} QMAKE_CXX = g++ QMAKE_CXXFLAGS += $$QMAKE_CFLAGS QMAKE_CXXFLAGS_DEPS += $$QMAKE_CFLAGS_DEPS QMAKE_CXXFLAGS_WARN_ON += $$QMAKE_CFLAGS_WARN_ON QMAKE_CXXFLAGS_WARN_OFF += $$QMAKE_CFLAGS_WARN_OFF QMAKE_CXXFLAGS_RELEASE += $$QMAKE_CFLAGS_RELEASE QMAKE_CXXFLAGS_DEBUG += $$QMAKE_CFLAGS_DEBUG QMAKE_CXXFLAGS_SHLIB += $$QMAKE_CFLAGS_SHLIB QMAKE_CXXFLAGS_STATIC_LIB += $$QMAKE_CFLAGS_STATIC_LIB QMAKE_CXXFLAGS_YACC += $$QMAKE_CFLAGS_YACC QMAKE_CXXFLAGS_HIDESYMS += $$QMAKE_CFLAGS_HIDESYMS -fvisibility-inlines-hidden QMAKE_CXXFLAGS_PRECOMPILE += -x c++-header -c ${QMAKE_PCH_INPUT} -o ${QMAKE_PCH_OUTPUT} QMAKE_CXXFLAGS_USE_PRECOMPILE = $$QMAKE_CFLAGS_USE_PRECOMPILE QMAKE_LINK = g++ QMAKE_LINK_SHLIB = g++ QMAKE_LFLAGS += QMAKE_LFLAGS_RELEASE += QMAKE_LFLAGS_DEBUG += QMAKE_LFLAGS_APP += QMAKE_LFLAGS_SHLIB += -shared QMAKE_LFLAGS_PLUGIN += $$QMAKE_LFLAGS_SHLIB QMAKE_LFLAGS_SONAME += -Wl,-soname, QMAKE_LFLAGS_THREAD += QMAKE_RPATH = -Wl,-rpath, QMAKE_PCH_OUTPUT_EXT = .gch # -Bsymbolic-functions (ld) support QMAKE_LFLAGS_BSYMBOLIC_FUNC = -Wl,-Bsymbolic-functions QMAKE_LFLAGS_DYNAMIC_LIST = -Wl,--dynamic-list, clustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/common/0000755000175000017500000000000011470725216022643 5ustar ddineenddineenclustalx-2.1/usr/local/Trolltech/Qt-4.3.1/mkspecs/0000755000175000017500000000000011470725216021353 5ustar ddineenddineenclustalx-2.1/usr/local/Trolltech/Qt-4.3.1/0000755000175000017500000000000011470725216017706 5ustar ddineenddineenclustalx-2.1/usr/local/Trolltech/0000755000175000017500000000000011470725216016661 5ustar ddineenddineenclustalx-2.1/usr/local/0000755000175000017500000000000011470725216014721 5ustar ddineenddineenclustalx-2.1/usr/0000755000175000017500000000000011470725216013627 5ustar ddineenddineenclustalx-2.1/mainwindow.h0000644000175000017500000002773711470725216015363 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * CHANGES: * Mark: Jan 16th 2007. I am working on Bugzilla bug 11. Allowing the user to * paste sequences into the lower profile even though there has only been one * profile added. I have changed the pasteSequences() function. * Mark: Jan 18th 2007. Bugzilla bug 3. Changed the way Files are opened * * 12-4-07, Mark Larkin : Changed destructor. We dont need to delete QObjects. * * 25-04-07,Nigel Brown(EMBL): Added keyPressEvent() and keyReleaseEvent(). * * 16-5-07, Mark Larkin: Added a submenu to the tree menu to allow the selection of * NJ or UPGMA for tree construction. Also added support functions. * Also added a sub menu for iteration selection. * * 22-5-2007, Mark Larkin: added function hideLowScoreSegs() * * 23-5-2007, Mark Larkin: added resetAndHideLowScoreSeqs() * * 24-05-07,Nigel Brown(EMBL): simplified default parameter loading mechanism; * removed setPathToExecutable(), removeExecutableNameFromPath(), * executablePath members. Added loadDefaultParamFile(). * * 2007-12-03, Andreas Wilm (UCD): moved loadSequences() with optional * file argument to public slot * ****************************************************************************/ #ifndef MAINWINDOW_H #define MAINWINDOW_H #include #include #include "AlignmentViewerWidget.h" #include "AlignmentFormatOptions.h" #include "TreeFormatOptions.h" #include "ProteinGapParameters.h" #include "AlignmentParameters.h" #include "ColumnScoreParams.h" #include "LowScoringSegParams.h" #include "PairwiseParams.h" #include "clustalW/Clustal.h" #include "clustalW/general/userparams.h" #include "clustalW/alignment/Sequence.h" #include "ColorFileXmlParser.h" #include "ColorParameters.h" #include "SearchForString.h" #include "version.h" class QAction; class QActionGroup; class QLabel; class QMenu; class QComboBox; class QStringList; class QGroupBox; class QTextEdit; class QListView; class QListWidget; class QListWidgetItem; class QTextCursor; class QFont; class QTableView; class QScrollArea; class QScrollBar; class QCheckBox; class QVBoxLayout; class QHBoxLayout; class QKeyEvent; /** * This class is an extension of the widget QMainWindow. It contains an * alignment widget, a histogram widget and a sequence name widget. * It is currently used as an alignment viewer, but it will be possible to extend its * functionality to an alignment editor. */ class MainWindow : public QMainWindow { Q_OBJECT public: MainWindow(); ~MainWindow(); public slots: void loadSequences(QString myFile = ""); void commandLineHelp(); // the following are public so they can be called from main.cpp at startup void setProfileModeCmd(); void loadProfile1(string myFile); void loadProfile2(string myFile); protected: /* * slots are functions, they can be linked to signals */ void showEvent(QShowEvent* event); private slots: void changeMode(int mode); void enableProfileActions(); void enableSequenceActions(); // Functions for menu actions // File menu void appendSequences(); void saveSequencesAs(); void loadProfile1(); void loadProfile2(); void saveProfile1As(); void saveProfile2As(); void writeAlignmentAsPostscript(); void writeProfile1AsPostscript(); void writeViewer1AsPostscript(int type); void writeProfile2AsPostscript(); void quitProgram(); // Edit menu void cutSequences(); void pasteSequences(); void selectAllSequences(); void selectProfile1(); void selectProfile2(); void addProfile2ToProfile1(); void clearSequenceSelection(); void clearRangeSelection(); void searchForString(); void removeAllGaps(); void removeGapOnlyColumns(); //Alignment menu void doCompleteAlignment(); void produceGuideTreeOnly(); void doAlignmentFromGuideTree(); void realignSelectedSequences(); void realignSelectedResidueRange(); void alignProfile2ToProfile1(); void alignProfilesFromGuideTrees(); void alignSequencesToProfile1(); void alignSequencesToProfile1FromTree(); void outputFormatOptions(); void setDefaultParameters(); // Submenu void resetNewGapsBeforeAlignment(); void resetAllGapsBeforeAlignment(); void pairwiseAlignmentParameters(); void multipleAlignmentParameters(); void proteinGapParameters(); void secondaryStructureParameters(); // Tree menu void drawNJTree(); void bootstrapNJTree(); void excludePositionsWithGaps(); void correctForMultipleSubstitutions(); void treeOutputFormatOptions(); // Colors menu void backgroundColoring(); void blackAndWhite(); void defaultColors(); void loadColorParameterFile(); // Quality menu void showLowScoreSeg(); void showExceptionalRes(); void lowScoreSegParams(); void columnScoreParams(); void saveColumnScoresToFile(); // Help menu void generalHelp(); void inputOutputHelp(); void editAlignHelp(); void multiAlignHelp(); void profileAlignHelp(); void secondaryStructureHelp(); void treesHelp(); void colorsHelp(); void alignQualityHelp(); void referencesHelp(); // Clustering algorithm submenu void setUPGMA(); // Mark 16-5-07 void setNJ(); // Mark 16-5-07 // Iteration submenu void setNoIteration(); // Mark 16-5-07 void setTreeIteration(); // Mark 16-5-07 void setAlignIteration(); // Mark 16-5-07 void scrollLockClicked(); void searchFromStart(); void searchAgain(); private: void createActions(); void createMenus(); void createAlignModeMenu(); void createFontMenu(); void createHorizontalBox(); void errorHandler(int errorCode, string offendingSeq=NULL); void calculateLowScoreAlign(); void calculateLowScoreProfile(); void recalculateLowScoreSegments(); void loadDefaultParamFile(const QString &fileName, ColorParameters* colorParam, bool showMessage = true); //- nige bool readInParamFile(QString file, ColorParameters* colorParam, bool showMessage = true); void setUpSearchForStringDialog(); bool findNextInstanceOfString(const std::vector* seqsSelected, int firstSeq, Viewer viewerSearched); vector getAllSelectedSeqsVector(); bool allSeqsInAlignmentOrProfileSelected(std::vector* seqsSelected); bool anySeqsSelected(); void setAllPtrsToZero(); void hideLowScoreSegs(); void resetAndHideLowScoreSeqs(); QVBoxLayout *mainlayout; QWidget *sideFiller; QHBoxLayout *layoutHbox; AlignmentViewerWidget* alignmentViewer; // Displayes alignment AlignmentViewerWidget* profile2Viewer; // displays profile 2 clustalw::Clustal* clustalObj; clustalw::Alignment* alignPtr; int profileMode; int multiMode; int currentMode; QWidget *w; QMenu *fileMenu; QMenu *editMenu; QMenu *alignmentMenu; QMenu *treeMenu; QMenu *colorMenu; QMenu *qualityMenu; QMenu *helpMenu; list toplevelMenuList;// AW used for enabling/disabling menus QMenu *treeSubMenu; // Mark 16-5-07 QMenu *iterationSubMenu; // Mark 16-5-07 QComboBox *alignmentModeMenu; QComboBox *fontMenu; /* * Each command in Qt is usually implemented as an action. * These are the commands for the file menu */ QActionGroup *alignmentGroup; QAction *actionLoadSequences; QAction *actionAppendSequences; QAction *actionSaveSeqAs; QAction *actionLoadProfile1; QAction *actionLoadProfile2; QAction *actionSaveProfile1As; QAction *actionSaveProfile2As; QAction *actionWriteAlignmentAsPost; QAction *actionWriteProfile1AsPost; QAction *actionWriteProfile2AsPost; QAction *actionQuit; /* * Actions for the clustal edit menu */ QAction *actionCutSequences; QAction *actionPasteSequences; QAction *actionSelectAllSequences; QAction *actionSelectProfile1; QAction *actionSelectProfile2; QAction *actionAddProfile2ToProfile1; QAction *actionClearSequenceSelection; QAction *actionClearRangeSelection; QAction *actionSearchForString; QAction *actionRemoveAllGaps; QAction *actionRemoveGapOnlyColumns; /* * Actions for the Alignment menu */ QAction *actionDoCompleteAlign; QAction *actionDoGuideTreeOnly; QAction *actionDoAlignmentFromGuideTree; QAction *actionRealignSelectSeq; QAction *actionRealignSelectRange; QAction *actionAlignProfile1ToProfile2; QAction *actionAlignProfileFromTree; QAction *actionAlignSequencesToProfile1; QAction *actionAlignSequencesToProfile1FromTree; QMenu *menuAlignParameters; QAction *actionOutputFormatOptions; QAction *actionResetNewGapsBeforeAlignment; QAction *actionResetAllGapsBeforeAlignment; QAction *actionPairwiseAlignmentParameters; QAction *actionMultipleAlignmentParameters; QAction *actionProteinGapParameters; QAction *actionSecondaryStructureParameters; QAction *actionSetDefaultParameters; /* * Actions for the Tree menu */ QAction *actionDrawNJTree; QAction *actionBootstrapNJTree; QAction *actionExcludePosWithGap; QAction *actionCorrectForMultiSub; QAction *actionTreeOutputFormatOptions; /* * Actions for the color Menu */ QAction *actionBackColoring; QAction *actionBlackAndWhite; QAction *actionDefaultColors; QAction *actionLoadColorParameterFile; /* * Actions for the quality Menu */ QAction *actionShowLowScoreSeg; QAction *actionShowExceptionalRes; QAction *actionLowScoreSegParams; QAction *actionColumnScoreParams; QAction *actionSaveColumnScoresToFile; /* * Actions for the help menu */ QAction *actionGeneralHelp; QAction *actionInputOutputHelp; QAction *actionEditAlignHelp; QAction *actionMultiAlignHelp; QAction *actionProfileAlignHelp; QAction *actionSecondaryStructureHelp; QAction *actionTreesHelp; QAction *actionColorsHelp; QAction *actionAlignQualityHelp; QAction *actionCommandLineHelp; QAction *actionReferencesHelp; /** Mark 16-5-07 * Actions for the Clustering algorithm sub menu */ QAction *actionUseUPGMA; // Mark 16-5-07 QAction *actionUseNJ; // Mark 16-5-07 /** Mark 16-5-07 * Actions for the iteration sub menu */ QAction *actionNoIteration; // Mark 16-5-07 QAction *actionTreeIteration; // Mark 16-5-07 QAction *actionAlignIteration; // Mark 16-5-07 QGroupBox *horizontalGroupBox; QCheckBox* profileScrollLock; /* * The main font for the application */ QFont displayFont; AlignmentParameters* alignmentParams; PairwiseParams* pairwiseParams; ColumnScoreParams* colScoreParams; LowScoringSegParams* lowScoreParams; SearchForString* searchSeqs; bool showExceptionalResidues; bool showLowScoreSegments; bool seqWeightCalculatedAlignmentViewer; vector* lowScoreSeqWeightAlignmentViewer; // Should be nSeqs long + 1 clustalw::Array2D* lowScoreResAlignmentViewer; bool seqWeightCalculatedProfile2Viewer; vector* lowScoreSeqWeightProfile2Viewer; // Should be nSeqs long + 1 clustalw::Array2D* lowScoreResProfile2Viewer; QLabel* fontLabel; QLabel* alignModeLabel; QLabel* bottomInfoLabel; ColorParameters proteinColors; ColorParameters dnaColors; ColorParameters userColors; ColorParameters printerColors; QString protColorFile; QString dnaColorFile; QString printerColorFile; QString sequenceFileName; QString profile2FileName; bool backGroundColors; int searchBeginSeq; int searchBeginRes; std::string searchString; vector cutSeqs; //- nige void keyPressEvent(QKeyEvent *); void keyReleaseEvent(QKeyEvent *); void toplevelMenusEnabled(bool b); }; #endif clustalx-2.1/mainwindow.cpp0000644000175000017500000035056211470725216015711 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * Nigel Brown,EMBL,18-12-06: changed version string. * * Mark: 18-01-2007: Made changes for bug 3, to remember the last directory. Using * FileDialog instead of QFileDialog. * * Mark: 19-1-2007 added more items to the font combobox. * * 31-01-07,Nigel Brown(EBL): changed font sizes available in createFontMenu() * controlled by OS_MAC from "compile.h". * * 03-02-07,Nigel Brown(EMBL): added 'All Files (*)' selection in * loadColorParameterFile(). All references to getOpenFileName() changed to * getOpenDataFileName(). * * 07-02-07,Nigel Brown(EMBL): fixed typos in main menu. * * 16-02-07,Nigel Brown(EMBL): added more font sizes to createFontMenu(). * * 05-03-07,Nigel Brown(EMBL): upcased search string in searchAgain(). * * 12-03-07,Nigel Brown(EMBL): fixed Alignment menu typo. * * 15-03-07,Nigel Brown(EMBL): changed font sizes to mainly classical * typographic scalings. * * 26-03-07,Nigel Brown(EMBL): loadProfile2() now redraws original Profile1 * after error loading Profile2. * * 30-03-07,Nigel Brown(EMBL): loadProfile2() now redraws original Profile1 * after error loading Profile2; alignProfile2ToProfile1(), * alignProfilesFromGuideTrees(), alignSequencesToProfile1() and * alignSequencesToProfile1FromTree() now draw Profile1 and Profile2 * separately after alignment. * * 02-04-07,Nigel Brown(EMBL): uncommented the "No sequences in file..." * warning in errorHandler() and disabled its counterpart in * FileReader::seqInput() which was causing a crash during the repaint after * the user accepts the message, because some state (what exactly?) is * unstable at that depth after a sequence load error, specifically when * loading an empty file after already loading some sequences. * * 12-4-07, Mark Larkin : Changed destructor. We dont need to delete QObjects. * * 25-04-07,Nigel Brown(EMBL): Added a singleton KeyController class and * keyPressEvent() and keyReleaseEvent(), which call KeyController to set the * keyboard state for the whole application. All handlers of [Ctrl]+* keyboard * shortcuts MUST call KeyController::clearCtrl() to simulate a corresponding * key release event. * * 04-05-07,Nigel Brown(EMBL): changed pasteSequences() to insert at end of a * selected block, rather than inside it. pasteSequences() now uses * AlignmentViewerWidget::hasFocus() (instead of AVW::hasSeqNameWidgetFocus()). * 'no seqs loaded' messages turned into English. * * 16-5-07, Mark Larkin: Added a submenu to the tree menu to allow the selection of * NJ or UPGMA for tree construction. Also added support functions. * Also added a sub menu for iteration selection. * * 22-5-2007, Mark Larkin: added function hideLowScoreSegs(), this is called * after each change of mode, cut and paste. * * 23-5-2007, Mark Larkin: added resetAndHideLowScoreSeqs() * * 24-05-07,Nigel Brown(EMBL): simplified default parameter loading * mechanism in readInParamFile(); removed setPathToExecutable(), * removeExecutableNameFromPath(), executablePath members. Added * Resources class instead; added loadDefaultParamFile(). * * 27-06-07,Nigel Brown(EMBL): For screen layout balancing: added two * connections to exchange AlignmentViewerWidget number-of-sequence changes. * * 1-9-2007, Mark Larkin: Setting UPGMA now makes "Bootstrap NJ tree" * inactive. * * 2007-12-03, Andreas Wilm (UCD): added optional file argument to * loadSequences * * 26-11-07,Nigel Brown(EMBL): Fixed spacing typo in message in * readInParamFile(); changed WriteViewer1AsPostscript() and * WriteViewer2AsPostscript() to trigger error message on missing * colour parameter file via readInParamFile(). * ****************************************************************************/ #include #include #include #include #include #include #include #include "SecStructOptions.h" #include "mainwindow.h" #include "WritePostscriptFile.h" #include "AlignOutputFileNames.h" #include "TreeOutputFileNames.h" #include "BootstrapTreeDialog.h" #include "HelpDisplayWidget.h" #include "FileDialog.h" #include "KeyController.h" //AW not needed anymore: #include "compile.h" #include "Resources.h" /** * This is the constructor for a MainWindow widget. It sets up all the initial * values for a MainWindow. * It is not currently possible to change the default values. */ MainWindow::MainWindow() : showExceptionalResidues(false), showLowScoreSegments(false), seqWeightCalculatedAlignmentViewer(false), seqWeightCalculatedProfile2Viewer(false), backGroundColors(true), searchBeginSeq(0), searchBeginRes(0) { setAllPtrsToZero(); QFont infoFont("Helvetica", 10, QFont::Bold); protColorFile = psColorFile; dnaColorFile = psDNAFile; printerColorFile = psPrintFile; bottomInfoLabel = new QLabel(""); bottomInfoLabel->setFont(infoFont); lowScoreSeqWeightAlignmentViewer = new vector; lowScoreResAlignmentViewer = new clustalw::Array2D(); lowScoreSeqWeightProfile2Viewer = new vector; lowScoreResProfile2Viewer = new clustalw::Array2D(); alignmentParams = new AlignmentParameters; pairwiseParams = new PairwiseParams; colScoreParams = new ColumnScoreParams; lowScoreParams = new LowScoringSegParams; displayFont.setPixelSize(12); w = new QWidget; w->setFont(displayFont); setCentralWidget(w); /* * Call the functions to setup the horizontal and grid layout boxes in the main window */ createHorizontalBox(); alignmentViewer = new AlignmentViewerWidget(); profile2Viewer = new AlignmentViewerWidget(); profile2Viewer->setVisible(false); clustalObj = new clustalw::Clustal; alignmentViewer->setAlignmentView(clustalObj->getAlignmentPtr(), 0, 0); profile2Viewer->setAlignmentView(clustalObj->getAlignmentPtr(), 0, 0); alignPtr = clustalObj->getAlignmentPtr(); /* * The QVBoxLayout class lines up widgets vertically */ mainlayout = new QVBoxLayout; mainlayout->setMargin(5); mainlayout->addWidget(horizontalGroupBox); mainlayout->addWidget(alignmentViewer); mainlayout->addWidget(profile2Viewer); mainlayout->addWidget(bottomInfoLabel); w->setLayout(mainlayout); /* * Sets up the menus in the menubar. Also sets up the actions associated * with the menu options. */ createActions(); createMenus(); enableSequenceActions(); /* * Sets initial size for the mainwindow */ setWindowTitle(QString(CLUSTALX_NAME)+ " " + QString(CLUSTALX_VERSION)); setMinimumSize(160, 160); repaint(); resize(1000, 600); connect(fontMenu, SIGNAL(activated(const QString &)), alignmentViewer, SLOT(updateFontAndBoxes(const QString &))); connect(fontMenu, SIGNAL(activated(const QString &)), profile2Viewer, SLOT(updateFontAndBoxes(const QString &))); currentMode = multiMode; setUpSearchForStringDialog(); } void MainWindow::showEvent(QShowEvent* event) { clustalw::utilityObject->setInfoLabelPtr(bottomInfoLabel); loadDefaultParamFile(protColorFile, &proteinColors); loadDefaultParamFile(dnaColorFile, &dnaColors); loadDefaultParamFile(printerColorFile, &printerColors, false); bool dnaFlag = clustalw::userParameters->getDNAFlag(); if(dnaFlag) { alignmentViewer->setColorScheme(&dnaColors); profile2Viewer->setColorScheme(&dnaColors); } else { alignmentViewer->setColorScheme(&proteinColors); profile2Viewer->setColorScheme(&proteinColors); } alignmentViewer->setPrintoutColorScheme(printerColors); profile2Viewer->setPrintoutColorScheme(printerColors); } void MainWindow::loadDefaultParamFile(const QString &fileName, ColorParameters* colorParam, bool showMessage) { Resources *res = Resources::Instance(); QString file = res->searchPathsForFile(fileName); //cout << "paramFile: " << file.toStdString().c_str() << endl; readInParamFile(file, colorParam, showMessage); } /** * The readInParamFile tries to read in the file "fileName" and store the info in the * colorParams object. * @param fileName the name of the color parameter file to be read. * @param colorParam the ColorParameters object to store the info from the file. * @param showMessage show messages if there is something wrong?? * @return true if successful, false if not. */ bool MainWindow::readInParamFile(QString fileName, ColorParameters* colorParam, bool showMessage) { // if we cant read in the file, xml error etc, clear the object // and set it to use the defaults. if(colorParam != 0) { QFile file(fileName); file.open(QFile::ReadOnly | QFile::Text); if (!file.isOpen()) { if(showMessage) { QMessageBox::warning(this, tr("Color Parameter file"), tr("Cannot read color file %1:\n Using default colors.") .arg(fileName)); } // Use hardcoded colors!!! colorParam->clear(); colorParam->useHardCodedColors(); colorParam->useDefaultColorRules(); return false; } bool success; ColorFileXmlParser colorFileParser(showMessage); success = colorFileParser.read(&file); if(success) { *colorParam = *colorFileParser.getColorParametersObj(); //ColorParameters(*colorFileParser.getColorParametersObj()); return true; } else { colorParam->clear(); colorParam->useHardCodedColors(); colorParam->useDefaultColorRules(); return false; } } return true; } /** * The function createActions is used to set up the actions to be used in the menus. */ void MainWindow::createActions() { /* * Clustal File options */ actionLoadSequences = new QAction(tr("Load Sequences"), this); actionLoadSequences->setShortcut(tr("Ctrl+O")); actionLoadSequences->setStatusTip(tr("Load Sequences from File")); connect(actionLoadSequences, SIGNAL(triggered()), this, SLOT(loadSequences())); actionAppendSequences = new QAction(tr("Append Sequences"), this); actionAppendSequences->setStatusTip(tr("Append Sequences from File")); connect(actionAppendSequences, SIGNAL(triggered()), this, SLOT(appendSequences())); actionSaveSeqAs = new QAction(tr("Save Sequences as..."), this); actionSaveSeqAs->setShortcut(tr("Ctrl+S")); actionSaveSeqAs->setStatusTip(tr("Save Sequences as...")); connect(actionSaveSeqAs, SIGNAL(triggered()), this, SLOT(saveSequencesAs())); actionLoadProfile1 = new QAction(tr("Load Profile 1"), this); actionLoadProfile1->setStatusTip(tr("Load Profile 1")); connect(actionLoadProfile1, SIGNAL(triggered()), this, SLOT(loadProfile1())); actionLoadProfile2 = new QAction(tr("Load Profile 2"), this); actionLoadProfile2->setStatusTip(tr("Load Profile 2")); connect(actionLoadProfile2, SIGNAL(triggered()), this, SLOT(loadProfile2())); actionSaveProfile1As = new QAction(tr("Save Profile 1 as..."), this); actionSaveProfile1As->setStatusTip(tr("Save Profile 1")); connect(actionSaveProfile1As, SIGNAL(triggered()), this, SLOT(saveProfile1As())); actionSaveProfile2As = new QAction(tr("Save Profile 2 as..."), this); actionSaveProfile2As->setStatusTip(tr("Save Profile 2")); connect(actionSaveProfile2As, SIGNAL(triggered()), this, SLOT(saveProfile2As())); actionWriteAlignmentAsPost = new QAction(tr("Write Alignment as Postscript"), this); actionWriteAlignmentAsPost->setStatusTip(tr("Write Alignment as Postscript")); actionWriteAlignmentAsPost->setShortcut(tr("Ctrl+P")); connect(actionWriteAlignmentAsPost, SIGNAL(triggered()), this, SLOT(writeAlignmentAsPostscript())); actionWriteProfile1AsPost = new QAction(tr("Write Profile 1 as Postscript"), this); actionWriteProfile1AsPost->setStatusTip(tr("Write Profile 1 as Postscript")); connect(actionWriteProfile1AsPost, SIGNAL(triggered()), this, SLOT(writeProfile1AsPostscript())); actionWriteProfile2AsPost = new QAction(tr("Write Profile 2 as Postscript"), this); actionWriteProfile2AsPost->setStatusTip(tr("Write Profile 2 as Postscript")); connect(actionWriteProfile2AsPost, SIGNAL(triggered()), this, SLOT(writeProfile2AsPostscript())); actionQuit = new QAction(tr("Quit"), this); actionQuit->setStatusTip(tr("Quit")); actionQuit->setShortcut(tr("Ctrl+Q")); connect(actionQuit, SIGNAL(triggered()), this, SLOT(quitProgram())); /* * Clustal Edit actions */ actionCutSequences = new QAction(tr("Cut Sequences"), this); actionCutSequences->setShortcut(tr("Ctrl+X")); actionCutSequences->setStatusTip(tr("Cut Sequences from Alignment")); connect(actionCutSequences, SIGNAL(triggered()), this, SLOT(cutSequences())); actionPasteSequences = new QAction(tr("Paste Sequences"), this); actionPasteSequences->setStatusTip(tr("Paste Sequences")); actionPasteSequences->setShortcut(tr("Ctrl+V")); connect(actionPasteSequences, SIGNAL(triggered()), this, SLOT(pasteSequences())); actionSelectAllSequences = new QAction(tr("Select All Sequences"), this); actionSelectAllSequences->setShortcut(tr("Ctrl+A")); connect(actionSelectAllSequences, SIGNAL(triggered()), this, SLOT(selectAllSequences())); actionSelectProfile1 = new QAction(tr("Select Profile 1"), this); connect(actionSelectProfile1, SIGNAL(triggered()), this, SLOT(selectProfile1())); actionSelectProfile2 = new QAction(tr("Select Profile 2"), this); connect(actionSelectProfile2, SIGNAL(triggered()), this, SLOT(selectProfile2())); actionAddProfile2ToProfile1 = new QAction(tr("Add Profile 2 to Profile 1"), this); connect(actionAddProfile2ToProfile1, SIGNAL(triggered()), this, SLOT(addProfile2ToProfile1())); actionClearSequenceSelection = new QAction(tr("Clear Sequence Selection"), this); connect(actionClearSequenceSelection, SIGNAL(triggered()), this, SLOT(clearSequenceSelection())); actionClearRangeSelection = new QAction(tr("Clear Range Selection"), this); connect(actionClearRangeSelection, SIGNAL(triggered()), this, SLOT(clearRangeSelection())); actionSearchForString = new QAction(tr("Search for String"), this); actionSearchForString->setShortcut(tr("Ctrl+F")); connect(actionSearchForString, SIGNAL(triggered()), this, SLOT(searchForString())); actionRemoveAllGaps = new QAction(tr("Remove All Gaps"), this); connect(actionRemoveAllGaps, SIGNAL(triggered()), this, SLOT(removeAllGaps())); actionRemoveGapOnlyColumns = new QAction(tr("Remove Gap-Only Columns"), this); connect(actionRemoveGapOnlyColumns, SIGNAL(triggered()), this, SLOT(removeGapOnlyColumns())); /* * Alignment Menu */ actionDoCompleteAlign = new QAction(tr("Do Complete Alignment"), this); actionDoCompleteAlign->setShortcut(tr("Ctrl+L")); connect(actionDoCompleteAlign, SIGNAL(triggered()), this, SLOT(doCompleteAlignment())); actionDoGuideTreeOnly = new QAction(tr("Do Guide Tree Only"), this); actionDoGuideTreeOnly->setShortcut(tr("Ctrl+G")); connect(actionDoGuideTreeOnly, SIGNAL(triggered()), this, SLOT(produceGuideTreeOnly())); actionDoAlignmentFromGuideTree = new QAction(tr("Do Alignment from Guide Tree"), this); connect(actionDoAlignmentFromGuideTree, SIGNAL(triggered()), this, SLOT(doAlignmentFromGuideTree())); actionRealignSelectSeq = new QAction(tr("Realign Selected Sequences"), this); connect(actionRealignSelectSeq, SIGNAL(triggered()), this, SLOT(realignSelectedSequences())); actionRealignSelectRange = new QAction(tr("Realign Selected Residue Range"), this); connect(actionRealignSelectRange, SIGNAL(triggered()), this, SLOT(realignSelectedResidueRange())); actionAlignProfile1ToProfile2 = new QAction(tr("Align Profile 2 to Profile 1"), this); connect(actionAlignProfile1ToProfile2, SIGNAL(triggered()), this, SLOT(alignProfile2ToProfile1())); actionAlignProfileFromTree = new QAction(tr("Align Profiles from Guide Trees"), this); connect(actionAlignProfileFromTree, SIGNAL(triggered()), this, SLOT(alignProfilesFromGuideTrees())); actionAlignSequencesToProfile1 = new QAction(tr("Align Sequences to Profile 1"), this); connect(actionAlignSequencesToProfile1, SIGNAL(triggered()), this, SLOT(alignSequencesToProfile1())); actionAlignSequencesToProfile1FromTree = new QAction(tr("Align Sequences to Profile 1 from Tree"), this); connect(actionAlignSequencesToProfile1FromTree, SIGNAL(triggered()), this, SLOT(alignSequencesToProfile1FromTree())); actionSetDefaultParameters = new QAction(tr("Set All Parameters to default"), this); connect(actionSetDefaultParameters, SIGNAL(triggered()), this, SLOT(setDefaultParameters())); menuAlignParameters = new QMenu(tr("Alignment Parameters")); actionOutputFormatOptions = new QAction(tr("Output Format Options"), this); connect(actionOutputFormatOptions, SIGNAL(triggered()), this, SLOT(outputFormatOptions())); actionResetNewGapsBeforeAlignment = new QAction(tr("Reset New Gaps before Alignment"), this); actionResetNewGapsBeforeAlignment->setCheckable(true); connect(actionResetNewGapsBeforeAlignment, SIGNAL(triggered()), this, SLOT(resetNewGapsBeforeAlignment())); actionResetAllGapsBeforeAlignment = new QAction(tr("Reset All Gaps before Alignment"), this); actionResetAllGapsBeforeAlignment->setCheckable(true); connect(actionResetAllGapsBeforeAlignment, SIGNAL(triggered()), this, SLOT(resetAllGapsBeforeAlignment())); actionPairwiseAlignmentParameters = new QAction(tr("Pairwise Alignment Parameters"), this); connect(actionPairwiseAlignmentParameters, SIGNAL(triggered()), this, SLOT(pairwiseAlignmentParameters())); actionMultipleAlignmentParameters = new QAction(tr("Multiple Alignment Parameters"), this); connect(actionMultipleAlignmentParameters, SIGNAL(triggered()), this, SLOT(multipleAlignmentParameters())); actionProteinGapParameters = new QAction(tr("Protein Gap Parameters"), this); connect(actionProteinGapParameters, SIGNAL(triggered()), this, SLOT(proteinGapParameters())); actionSecondaryStructureParameters = new QAction(tr("Secondary Structure Parameters"), this); connect(actionSecondaryStructureParameters, SIGNAL(triggered()), this, SLOT(secondaryStructureParameters())); menuAlignParameters->addAction(actionResetNewGapsBeforeAlignment); menuAlignParameters->addAction(actionResetAllGapsBeforeAlignment); menuAlignParameters->addAction(actionPairwiseAlignmentParameters); menuAlignParameters->addAction(actionMultipleAlignmentParameters); menuAlignParameters->addAction(actionProteinGapParameters); menuAlignParameters->addAction(actionSecondaryStructureParameters); /** * Mark: 16-5-07 Iteration sub menu */ iterationSubMenu = new QMenu(tr("Iteration")); actionNoIteration = new QAction(tr("None"), this); actionNoIteration->setCheckable(true); connect(actionNoIteration, SIGNAL(triggered()), this, SLOT(setNoIteration())); actionTreeIteration = new QAction(tr("Iterate each alignment step"), this); actionTreeIteration->setCheckable(true); connect(actionTreeIteration, SIGNAL(triggered()), this, SLOT(setTreeIteration())); actionAlignIteration = new QAction(tr("Iterate final alignment"), this); actionAlignIteration->setCheckable(true); connect(actionAlignIteration, SIGNAL(triggered()), this, SLOT(setAlignIteration())); iterationSubMenu->addAction(actionNoIteration); iterationSubMenu->addAction(actionTreeIteration); iterationSubMenu->addAction(actionAlignIteration); if(clustalw::userParameters->getDoRemoveFirstIteration() == clustalw::TREE) { actionNoIteration->setChecked(false); actionTreeIteration->setChecked(true); actionAlignIteration->setChecked(false); } else if(clustalw::userParameters->getDoRemoveFirstIteration() == clustalw::ALIGNMENT) { actionNoIteration->setChecked(false); actionTreeIteration->setChecked(false); actionAlignIteration->setChecked(true); } else { actionNoIteration->setChecked(true); actionTreeIteration->setChecked(false); actionAlignIteration->setChecked(false); } /* * Tree Menu */ actionDrawNJTree = new QAction(tr("Draw Tree"), this); // Mark change 21-5-07 actionDrawNJTree->setShortcut(tr("Ctrl+R")); connect(actionDrawNJTree, SIGNAL(triggered()), this, SLOT(drawNJTree())); actionBootstrapNJTree = new QAction(tr("Bootstrap N-J Tree"), this); actionBootstrapNJTree->setShortcut(tr("Ctrl+B")); connect(actionBootstrapNJTree, SIGNAL(triggered()), this, SLOT(bootstrapNJTree())); actionExcludePosWithGap = new QAction(tr("Exclude Positions with Gaps"), this); actionExcludePosWithGap->setCheckable(true); connect(actionExcludePosWithGap, SIGNAL(triggered()), this, SLOT(excludePositionsWithGaps())); actionCorrectForMultiSub = new QAction(tr("Correct for Multiple Substitutions"), this); actionCorrectForMultiSub->setCheckable(true); connect(actionCorrectForMultiSub, SIGNAL(triggered()), this, SLOT(correctForMultipleSubstitutions())); actionTreeOutputFormatOptions = new QAction(tr("Output Format Options"), this); connect(actionTreeOutputFormatOptions, SIGNAL(triggered()), this, SLOT(treeOutputFormatOptions())); /** * Mark 16-5-07 * Tree sub menu */ treeSubMenu = new QMenu(tr("Clustering Algorithm")); actionUseUPGMA = new QAction(tr("UPGMA"), this); actionUseUPGMA->setCheckable(true); connect(actionUseUPGMA, SIGNAL(triggered()), this, SLOT(setUPGMA())); actionUseNJ = new QAction(tr("NJ"), this); actionUseNJ->setCheckable(true); connect(actionUseNJ, SIGNAL(triggered()), this, SLOT(setNJ())); treeSubMenu->addAction(actionUseUPGMA); treeSubMenu->addAction(actionUseNJ); if(clustalw::userParameters->getClusterAlgorithm() == clustalw::UPGMA) { actionUseUPGMA->setChecked(true); actionUseNJ->setChecked(false); } else { actionUseUPGMA->setChecked(false); actionUseNJ->setChecked(true); } /* * Color Menu */ actionBackColoring = new QAction(tr("Background Coloring"), this); actionBackColoring->setCheckable(true); actionBackColoring->setChecked(true); connect(actionBackColoring, SIGNAL(triggered()), this, SLOT(backgroundColoring())); actionBlackAndWhite = new QAction(tr("Black and White"), this); actionBlackAndWhite->setCheckable(true); connect(actionBlackAndWhite, SIGNAL(triggered()), this, SLOT(blackAndWhite())); actionDefaultColors = new QAction(tr("Default Colors"), this); actionDefaultColors->setCheckable(true); actionDefaultColors->setChecked(true); connect(actionDefaultColors, SIGNAL(triggered()), this, SLOT(defaultColors())); actionLoadColorParameterFile = new QAction(tr("Load Color Parameter File"), this); //actionLoadColorParameterFile->setCheckable(true); connect(actionLoadColorParameterFile, SIGNAL(triggered()), this, SLOT(loadColorParameterFile())); /* * Quality Menu */ actionShowLowScoreSeg = new QAction(tr("Show Low-Scoring Segments"), this); actionShowLowScoreSeg->setCheckable(true); connect(actionShowLowScoreSeg, SIGNAL(triggered()), this, SLOT(showLowScoreSeg())); actionShowExceptionalRes = new QAction(tr("Show Exceptional Residues"), this); actionShowExceptionalRes->setCheckable(true); connect(actionShowExceptionalRes, SIGNAL(triggered()), this, SLOT(showExceptionalRes())); actionLowScoreSegParams = new QAction(tr("Low-Scoring Segment Parameters"), this); connect(actionLowScoreSegParams, SIGNAL(triggered()), this, SLOT(lowScoreSegParams())); actionColumnScoreParams = new QAction(tr("Column Score Parameters"), this); connect(actionColumnScoreParams, SIGNAL(triggered()), this, SLOT(columnScoreParams())); actionSaveColumnScoresToFile = new QAction(tr("Save Column Scores to File"), this); connect(actionSaveColumnScoresToFile, SIGNAL(triggered()), this, SLOT(saveColumnScoresToFile())); /* * Help Menu */ actionGeneralHelp = new QAction(tr("General"), this); connect(actionGeneralHelp, SIGNAL(triggered()), this, SLOT(generalHelp())); actionInputOutputHelp = new QAction(tr("Input && Output Files"), this);//&& connect(actionInputOutputHelp, SIGNAL(triggered()), this, SLOT(inputOutputHelp())); actionEditAlignHelp = new QAction(tr("Editing Alignments"), this); connect(actionEditAlignHelp, SIGNAL(triggered()), this, SLOT(editAlignHelp())); actionMultiAlignHelp = new QAction(tr("Multiple Alignments"), this); connect(actionMultiAlignHelp, SIGNAL(triggered()), this, SLOT(multiAlignHelp())); actionProfileAlignHelp = new QAction(tr("Profile Alignments"), this); connect(actionProfileAlignHelp, SIGNAL(triggered()), this, SLOT(profileAlignHelp())); actionSecondaryStructureHelp = new QAction(tr("Secondary Structures"), this); connect(actionSecondaryStructureHelp, SIGNAL(triggered()), this, SLOT(secondaryStructureHelp())); actionTreesHelp = new QAction(tr("Trees"), this); connect(actionTreesHelp, SIGNAL(triggered()), this, SLOT(treesHelp())); actionColorsHelp = new QAction(tr("Colors"), this); connect(actionColorsHelp, SIGNAL(triggered()), this, SLOT(colorsHelp())); actionAlignQualityHelp = new QAction(tr("Alignment Quality"), this); connect(actionAlignQualityHelp, SIGNAL(triggered()), this, SLOT(alignQualityHelp())); actionCommandLineHelp = new QAction(tr("Command Line Parameters"), this); connect(actionCommandLineHelp, SIGNAL(triggered()), this, SLOT(commandLineHelp())); actionReferencesHelp = new QAction(tr("References"), this); connect(actionReferencesHelp, SIGNAL(triggered()), this, SLOT(referencesHelp())); } /** * This function creates the menus and adds the actions to the appropriate menu. */ void MainWindow::createMenus() { /* * Set up the file menu */ fileMenu = menuBar()->addMenu(tr("&File")); fileMenu->addAction(actionLoadSequences); fileMenu->addAction(actionAppendSequences); fileMenu->addAction(actionSaveSeqAs); fileMenu->addAction(actionLoadProfile1); fileMenu->addAction(actionLoadProfile2); fileMenu->addAction(actionSaveProfile1As); fileMenu->addAction(actionSaveProfile2As); fileMenu->addAction(actionWriteAlignmentAsPost); fileMenu->addAction(actionWriteProfile1AsPost); fileMenu->addAction(actionWriteProfile2AsPost); fileMenu->addAction(actionQuit); toplevelMenuList.push_back(fileMenu); /* * Set up the Edit menu */ editMenu = menuBar()->addMenu(tr("&Edit")); editMenu->addAction(actionCutSequences); editMenu->addAction(actionPasteSequences); editMenu->addAction(actionSelectAllSequences); editMenu->addAction(actionSelectProfile1); editMenu->addAction(actionSelectProfile2); editMenu->addAction(actionAddProfile2ToProfile1); editMenu->addAction(actionClearSequenceSelection); editMenu->addAction(actionClearRangeSelection); editMenu->addAction(actionSearchForString); editMenu->addAction(actionRemoveAllGaps); editMenu->addAction(actionRemoveGapOnlyColumns); toplevelMenuList.push_back(editMenu); /* * Set up the Alignment Menu */ alignmentMenu = menuBar()->addMenu(tr("&Alignment")); alignmentMenu->addAction(actionDoCompleteAlign); alignmentMenu->addAction(actionDoGuideTreeOnly); alignmentMenu->addAction(actionDoAlignmentFromGuideTree); alignmentMenu->addAction(actionRealignSelectSeq); alignmentMenu->addAction(actionRealignSelectRange); alignmentMenu->addAction(actionAlignProfile1ToProfile2); alignmentMenu->addAction(actionAlignProfileFromTree); alignmentMenu->addAction(actionAlignSequencesToProfile1); alignmentMenu->addAction(actionAlignSequencesToProfile1FromTree); alignmentMenu->addMenu(menuAlignParameters); alignmentMenu->addMenu(iterationSubMenu); // Mark 16-5-07 alignmentMenu->addAction(actionOutputFormatOptions); alignmentMenu->addAction(actionSetDefaultParameters); toplevelMenuList.push_back(alignmentMenu); /* * Set up the Tree menu */ treeMenu = menuBar()->addMenu(tr("&Trees")); treeMenu->addAction(actionDrawNJTree); treeMenu->addAction(actionBootstrapNJTree); treeMenu->addAction(actionExcludePosWithGap); treeMenu->addAction(actionCorrectForMultiSub); treeMenu->addAction(actionTreeOutputFormatOptions); treeMenu->addMenu(treeSubMenu); // Mark 16-5-07 toplevelMenuList.push_back(treeMenu); /* * Set up the color menu */ colorMenu = menuBar()->addMenu(tr("&Colors")); colorMenu->addAction(actionBackColoring); colorMenu->addAction(actionBlackAndWhite); colorMenu->addAction(actionDefaultColors); colorMenu->addAction(actionLoadColorParameterFile); toplevelMenuList.push_back(colorMenu); qualityMenu = menuBar()->addMenu(tr("&Quality")); qualityMenu->addAction(actionShowLowScoreSeg); qualityMenu->addAction(actionShowExceptionalRes); qualityMenu->addAction(actionLowScoreSegParams); qualityMenu->addAction(actionColumnScoreParams); qualityMenu->addAction(actionSaveColumnScoresToFile); toplevelMenuList.push_back(qualityMenu); helpMenu = menuBar()->addMenu(tr("&Help")); helpMenu->addAction(actionGeneralHelp); helpMenu->addAction(actionInputOutputHelp); helpMenu->addAction(actionEditAlignHelp); helpMenu->addAction(actionMultiAlignHelp); helpMenu->addAction(actionProfileAlignHelp); helpMenu->addAction(actionSecondaryStructureHelp); helpMenu->addAction(actionTreesHelp); helpMenu->addAction(actionColorsHelp); helpMenu->addAction(actionAlignQualityHelp); helpMenu->addAction(actionCommandLineHelp); helpMenu->addAction(actionReferencesHelp); toplevelMenuList.push_back(helpMenu); } /** * This function sets up the Alignment mode drop down menu. */ void MainWindow::createAlignModeMenu() { alignmentModeMenu->addItem("Multiple Alignment Mode"); alignmentModeMenu->addItem("Profile Alignment Mode"); alignmentModeMenu->setSizeAdjustPolicy(QComboBox::AdjustToContents); profileMode = 1; multiMode = 0; connect(alignmentModeMenu, SIGNAL(activated(int)), this, SLOT(changeMode(int))); } void MainWindow::changeMode(int mode) { if(mode != currentMode) { if(mode == profileMode) { profile2Viewer->setVisible(true); profile2Viewer->setProfileNum(2); alignmentViewer->setProfileNum(1); profileScrollLock->setVisible(true); enableProfileActions(); if(alignPtr->getNumSeqs() > 0) { clustalw::userParameters->setProfileNum(1); clustalw::userParameters->setProfile1Empty(false); alignPtr->setProfile1NumSeqs(alignPtr->getNumSeqs()); alignmentViewer->updateView(1, alignPtr->getNumSeqs()); // View all seqs profile2Viewer->updateView(0, 0); } } else // sequence align mode!! { clustalw::userParameters->setProfileNum(0); alignPtr->setProfile1NumSeqs(0); clustalw::userParameters->setProfile1Empty(true); clustalw::userParameters->setProfile2Empty(true); alignmentViewer->setProfileNum(0); profile2Viewer->setProfileNum(0); if(alignPtr->getNumSeqs() > 0) { alignmentViewer->updateView(1, alignPtr->getNumSeqs()); } else { alignmentViewer->updateView(0, 0); } profile2Viewer->updateView(0, 0); profile2Viewer->setVisible(false); profileScrollLock->setVisible(false); enableSequenceActions(); // If it is showing the low scoring segments!!! if(showLowScoreSegments) { // Stop the profile2Viewer from showing it. seqWeightCalculatedProfile2Viewer = false; lowScoreSeqWeightProfile2Viewer->clear(); lowScoreResProfile2Viewer->clearArray(); profile2Viewer->showLowScoringSegments(false, lowScoreResProfile2Viewer); // Recalculculate the alignment one. calculateLowScoreAlign(); } } currentMode = mode; //hideLowScoreSegs(); // Mark 22-5-2007 resetAndHideLowScoreSeqs(); // Mark 23-5-2007 update(); } } void MainWindow::setProfileModeCmd() { alignmentViewer->setProfileNum(1); profile2Viewer->setProfileNum(2); profile2Viewer->setVisible(true); profileScrollLock->setVisible(true); enableProfileActions(); /* if(alignPtr->getNumSeqs() > 0) { clustalw::userParameters->setProfile1Empty(false); clustalw::userParameters->setProfile2Empty(false); alignPtr->setProfile1NumSeqs(alignPtr->getNumSeqs()); alignmentViewer->updateView(1, alignPtr->getNumSeqs()); // View all seqs profile2Viewer->updateView(alignPtr->getProfile1NumSeqs() + 1, alignPtr->getNumSeqs()); } //hideLowScoreSegs(); // Mark 22-5-2007 resetAndHideLowScoreSeqs(); // Mark 23-5-2007 */ currentMode = profileMode; update(); } /** * This function enables the profile actions and disables the sequence ones. */ void MainWindow::enableProfileActions() { // disabled actionLoadSequences->setEnabled(false); actionAppendSequences->setEnabled(false); actionSaveSeqAs->setEnabled(false); actionWriteAlignmentAsPost->setEnabled(false); actionSelectAllSequences->setEnabled(false); actionDoCompleteAlign->setEnabled(false); actionDoGuideTreeOnly->setEnabled(false); actionDoAlignmentFromGuideTree->setEnabled(false); actionRealignSelectSeq->setEnabled(false); actionRealignSelectRange->setEnabled(false); actionDrawNJTree->setEnabled(false); actionBootstrapNJTree->setEnabled(false); actionExcludePosWithGap->setEnabled(false); actionCorrectForMultiSub->setEnabled(false); actionTreeOutputFormatOptions->setEnabled(false); // Enabled actionSecondaryStructureParameters->setEnabled(true); actionLoadProfile1->setEnabled(true); actionLoadProfile2->setEnabled(true); actionSaveProfile1As->setEnabled(true); actionSaveProfile2As->setEnabled(true); actionWriteProfile1AsPost->setEnabled(true); actionWriteProfile2AsPost->setEnabled(true); actionSelectProfile1->setEnabled(true); actionSelectProfile2->setEnabled(true); actionAddProfile2ToProfile1->setEnabled(true); actionAlignProfile1ToProfile2->setEnabled(true); actionAlignProfileFromTree->setEnabled(true); actionAlignSequencesToProfile1->setEnabled(true); actionAlignSequencesToProfile1FromTree->setEnabled(true); } /** * This function enables the sequence actions and disables the profile ones. */ void MainWindow::enableSequenceActions() { // enabled actionLoadSequences->setEnabled(true); actionAppendSequences->setEnabled(true); actionSaveSeqAs->setEnabled(true); actionWriteAlignmentAsPost->setEnabled(true); actionSelectAllSequences->setEnabled(true); actionDoCompleteAlign->setEnabled(true); actionDoGuideTreeOnly->setEnabled(true); actionDoAlignmentFromGuideTree->setEnabled(true); actionRealignSelectSeq->setEnabled(true); actionRealignSelectRange->setEnabled(true); actionDrawNJTree->setEnabled(true); actionBootstrapNJTree->setEnabled(true); actionExcludePosWithGap->setEnabled(true); actionCorrectForMultiSub->setEnabled(true); actionTreeOutputFormatOptions->setEnabled(true); // disabled actionLoadProfile1->setEnabled(false); actionLoadProfile2->setEnabled(false); actionSaveProfile1As->setEnabled(false); actionSaveProfile2As->setEnabled(false); actionWriteProfile1AsPost->setEnabled(false); actionWriteProfile2AsPost->setEnabled(false); actionSelectProfile1->setEnabled(false); actionSelectProfile2->setEnabled(false); actionAddProfile2ToProfile1->setEnabled(false); actionAlignProfile1ToProfile2->setEnabled(false); actionAlignProfileFromTree->setEnabled(false); actionAlignSequencesToProfile1->setEnabled(false); actionAlignSequencesToProfile1FromTree->setEnabled(false); actionSecondaryStructureParameters->setEnabled(false); } /** * This function sets up the font size drop down menu. */ void MainWindow::createFontMenu() { // Mark: 19-1-2007 added more items to the font combobox. // Nige: 13-2-2007 added more items to the font combobox. // Nige: 15-3-2007 changed to classical typographic scalings fontMenu->addItem("72"); fontMenu->addItem("60"); fontMenu->addItem("48"); fontMenu->addItem("36"); //fontMenu->addItem("30"); //- fontMenu->addItem("28"); //fontMenu->addItem("26"); //- fontMenu->addItem("24"); //fontMenu->addItem("22"); //- fontMenu->addItem("21"); //-20 fontMenu->addItem("18"); fontMenu->addItem("16"); fontMenu->addItem("14"); fontMenu->addItem("12"); fontMenu->addItem("11"); fontMenu->addItem("10"); fontMenu->addItem("9"); #if !OS_MAC fontMenu->addItem("8"); #endif fontMenu->setCurrentIndex(12); fontMenu->setSizeAdjustPolicy(QComboBox::AdjustToContents); } /** * This function creates the layout for the horizontal area containing the alignment mode * and font size drop down menu. */ void MainWindow::createHorizontalBox() { /* * Drop down Menus */ alignmentModeMenu = new QComboBox(); createAlignModeMenu(); fontMenu = new QComboBox(); createFontMenu(); profileScrollLock = new QCheckBox("Lock Scroll"); profileScrollLock->setVisible(false); profileScrollLock->setChecked(false); connect(profileScrollLock, SIGNAL(clicked()), this, SLOT(scrollLockClicked())); /* * Labels for the menus */ fontLabel = new QLabel("&Font:", this); alignModeLabel = new QLabel("&Mode:", this); fontLabel->setMaximumWidth ( 40 ); alignModeLabel->setMaximumWidth ( 40 ); fontLabel->setSizePolicy( QSizePolicy::Minimum, QSizePolicy::Minimum); alignModeLabel->setSizePolicy( QSizePolicy::Minimum, QSizePolicy::Minimum); fontLabel->setBuddy(fontMenu); alignModeLabel->setBuddy(alignmentModeMenu); /* * Fill the space not taken up by the other widgets. */ sideFiller = new QWidget; sideFiller->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Minimum); /* * Create a horizontal group box and add the widgets to it. */ horizontalGroupBox = new QGroupBox(); layoutHbox = new QHBoxLayout; layoutHbox->addWidget(alignModeLabel); layoutHbox->addWidget(alignmentModeMenu); layoutHbox->addWidget(fontLabel); layoutHbox->addWidget(fontMenu); layoutHbox->addWidget(profileScrollLock); layoutHbox->addWidget(sideFiller); layoutHbox->setDirection (QBoxLayout::LeftToRight); horizontalGroupBox->setLayout(layoutHbox); } void MainWindow::scrollLockClicked() { QScrollBar* alignmentBar = alignmentViewer->getAlignmentScrollBar(); QScrollBar* profileBar = profile2Viewer->getAlignmentScrollBar(); if(profileScrollLock->isChecked()) { // Connect the two scrollbars together, make the one with the shortest seqs invisible int alignmentLongest = alignmentViewer->getLengthLongestSeqInRange(); int profileLongest = profile2Viewer->getLengthLongestSeqInRange(); connect(alignmentBar, SIGNAL(valueChanged(int)), profileBar, SLOT(setValue(int))); connect(profileBar, SIGNAL(valueChanged(int)), alignmentBar,SLOT(setValue(int))); if(alignmentLongest > profileLongest) { // Make the profile one invisible profileBar->setVisible(false); alignmentBar->setVisible(true); } else { // Make the alignment one invisble alignmentBar->setVisible(false); profileBar->setVisible(true); } } else { profileBar->setVisible(true); alignmentBar->setVisible(true); disconnect(alignmentBar, SIGNAL(valueChanged(int)), profileBar, SLOT(setValue(int))); disconnect(profileBar, SIGNAL(valueChanged(int)), alignmentBar,SLOT(setValue(int))); } } void MainWindow::loadSequences(QString myFile) { KeyController::Instance()->clearCtrl(); //- nige string offendingSeq; // needed in case one sequence is reported as troublemaker bool dnaFlagBefore = clustalw::userParameters->getDNAFlag(); int errorCode; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; /* Andreas Wilm (UCD): * if file provided on commandline didn't exist, clustalx 1.83 * reported this via gui and exited after that. v2 reports * missing file, but doesn't exit */ if(myFile == "") { myFile = fd.getOpenDataFileName(this, tr("Load Sequence File")); if(myFile == "") { return; } } clustalw::userParameters->setSeqName(myFile.toStdString()); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); errorCode = clustalObj->sequenceInput(false, &offendingSeq); // The view is updated anyway, even if there has been an error. seqWeightCalculatedAlignmentViewer = false; bool dnaFlagAfter = clustalw::userParameters->getDNAFlag(); if(dnaFlagBefore != dnaFlagAfter) { if(dnaFlagAfter == true) { // Moved from protein to dna, use dna file alignmentViewer->setColorScheme(&dnaColors); profile2Viewer->setColorScheme(&dnaColors); } else { alignmentViewer->setColorScheme(&proteinColors); profile2Viewer->setColorScheme(&proteinColors); } } if(errorCode != clustalw::OK) { sequenceFileName = ""; alignmentViewer->updateView(0, 0); seqWeightCalculatedAlignmentViewer = false; lowScoreSeqWeightAlignmentViewer->clear(); lowScoreResAlignmentViewer->clearArray(); errorHandler(errorCode, offendingSeq); } else { sequenceFileName = myFile; alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } void MainWindow::appendSequences() { int errorCode; QString myFile; string offendingSeq; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getOpenDataFileName(this, tr("Append Sequence File")); if(myFile == "") { return; } clustalw::userParameters->setSeqName(myFile.toStdString()); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); errorCode = clustalObj->sequenceInput(true, &offendingSeq); alignmentViewer->updateView(1, alignPtr->getNumSeqs()); seqWeightCalculatedAlignmentViewer = false; if(errorCode != clustalw::OK) { errorHandler(errorCode, offendingSeq); } else { if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } void MainWindow::saveSequencesAs() { KeyController::Instance()->clearCtrl(); //- nige alignmentViewer->saveSequences(sequenceFileName, Sequences); } void MainWindow::loadProfile1() { bool dnaFlagBefore = clustalw::userParameters->getDNAFlag(); int errorCode = 0; QString myFile = ""; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getOpenDataFileName(this, tr("Load Profile 1")); if(myFile == "") { return; } alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); errorCode = clustalObj->profile1Input(myFile.toStdString()); bool dnaFlagAfter = clustalw::userParameters->getDNAFlag(); seqWeightCalculatedAlignmentViewer = false; // Check if we need to reread in if(dnaFlagBefore != dnaFlagAfter) { // We have changed from dna to protein or vice versa // Need to reread the parameter file; if(dnaFlagAfter == true) { // Moved from protein to dna, use dna file alignmentViewer->setColorScheme(&dnaColors); profile2Viewer->setColorScheme(&dnaColors); } else { alignmentViewer->setColorScheme(&proteinColors); profile2Viewer->setColorScheme(&proteinColors); } } if(errorCode != clustalw::OK) { sequenceFileName = ""; alignmentViewer->updateView(0, 0); seqWeightCalculatedAlignmentViewer = false; lowScoreSeqWeightAlignmentViewer->clear(); lowScoreResAlignmentViewer->clearArray(); errorHandler(errorCode); } else { sequenceFileName = myFile; alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } void MainWindow::loadProfile2() { int errorCode; if(!clustalw::userParameters->getProfile1Empty()) { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getOpenDataFileName(this, tr("Load Profile 2")); if(myFile == "") { return; } profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); errorCode = clustalObj->profile2Input(myFile.toStdString()); seqWeightCalculatedProfile2Viewer = false; if(errorCode != clustalw::OK) { profile2FileName = ""; profile2Viewer->updateView(0, 0); if(errorCode == clustalw::ALLNAMESNOTDIFFERENT) { /* 26-03-07,nige: display Profile1 intact; reset Profile2 */ alignmentViewer->updateView(); //- nige profile2Viewer->updateView(0, 0); //- nige seqWeightCalculatedAlignmentViewer = false; lowScoreSeqWeightAlignmentViewer->clear(); lowScoreResAlignmentViewer->clearArray(); } errorHandler(errorCode); seqWeightCalculatedProfile2Viewer = false; lowScoreSeqWeightProfile2Viewer->clear(); lowScoreResProfile2Viewer->clearArray(); } else { profile2FileName = myFile; profile2Viewer->updateView(alignPtr->getProfile1NumSeqs() + 1, alignPtr->getNumSeqs()); // If we are showing the low scoring segments then calculate for the 2nd profile if(showLowScoreSegments == true) { calculateLowScoreProfile(); } } QApplication::restoreOverrideCursor(); } else { QMessageBox::information(this, "", "You must load Profile 1 first", QMessageBox::Ok); } searchBeginSeq = 0; searchBeginRes = 1; } void MainWindow::loadProfile1(string myFile) { bool dnaFlagBefore = clustalw::userParameters->getDNAFlag(); int errorCode = 0; alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); errorCode = clustalObj->profile1Input(myFile); bool dnaFlagAfter = clustalw::userParameters->getDNAFlag(); seqWeightCalculatedAlignmentViewer = false; // Check if we need to reread in if(dnaFlagBefore != dnaFlagAfter) { // We have changed from dna to protein or vice versa // Need to reread the parameter file; if(dnaFlagAfter == true) { // Moved from protein to dna, use dna file alignmentViewer->setColorScheme(&dnaColors); profile2Viewer->setColorScheme(&dnaColors); } else { alignmentViewer->setColorScheme(&proteinColors); profile2Viewer->setColorScheme(&proteinColors); } } if(errorCode != clustalw::OK) { sequenceFileName = ""; alignmentViewer->updateView(0, 0); seqWeightCalculatedAlignmentViewer = false; lowScoreSeqWeightAlignmentViewer->clear(); lowScoreResAlignmentViewer->clearArray(); errorHandler(errorCode); } else { alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } void MainWindow::loadProfile2(string myFile) { int errorCode; if(!clustalw::userParameters->getProfile1Empty()) { profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); errorCode = clustalObj->profile2Input(myFile); seqWeightCalculatedProfile2Viewer = false; if(errorCode != clustalw::OK) { profile2FileName = ""; profile2Viewer->updateView(0, 0); if(errorCode == clustalw::ALLNAMESNOTDIFFERENT) { /* 26-03-07,nige: display Profile1 intact; reset Profile2 */ alignmentViewer->updateView(); //- nige profile2Viewer->updateView(0, 0); //- nige seqWeightCalculatedAlignmentViewer = false; lowScoreSeqWeightAlignmentViewer->clear(); lowScoreResAlignmentViewer->clearArray(); } errorHandler(errorCode); seqWeightCalculatedProfile2Viewer = false; lowScoreSeqWeightProfile2Viewer->clear(); lowScoreResProfile2Viewer->clearArray(); } else { profile2Viewer->updateView(alignPtr->getProfile1NumSeqs() + 1, alignPtr->getNumSeqs()); // If we are showing the low scoring segments then calculate for the 2nd profile if(showLowScoreSegments == true) { calculateLowScoreProfile(); } } QApplication::restoreOverrideCursor(); } else { QMessageBox::information(this, "", "You must load Profile 1 first", QMessageBox::Ok); } searchBeginSeq = 0; searchBeginRes = 1; } void MainWindow::saveProfile1As() { if(!clustalw::userParameters->getProfile1Empty()) { alignmentViewer->saveSequences(sequenceFileName, Profile1); } else { QMessageBox::information(this, "", "Load Profile 1 first.", QMessageBox::Ok); } } void MainWindow::saveProfile2As() { if(!clustalw::userParameters->getProfile2Empty()) { profile2Viewer->saveSequences(profile2FileName, Profile2); } else { QMessageBox::information(this, "", "Load Profile 2 first.", QMessageBox::Ok); } } void MainWindow::writeAlignmentAsPostscript() { KeyController::Instance()->clearCtrl(); //- nige writeViewer1AsPostscript(Sequences); } void MainWindow::writeProfile1AsPostscript() { writeViewer1AsPostscript(Profile1); } void MainWindow::writeViewer1AsPostscript(int type) { if(alignmentViewer->seqsLoaded()) { int lengthLongestSeq = alignmentViewer->getLengthLongestSeqInRange(); WritePostscriptFile writePs(sequenceFileName, Sequences, lengthLongestSeq); writePs.exec(); // Need to parse the color parameters file. if(writePs.result() == QDialog::Accepted) { PostscriptFileParams psParams = writePs.getParams(); ColorParameters* ptrToCur; ptrToCur = alignmentViewer->getPtrToCurrentColorScheme(); // We want to have the same rules as the current selection, but different // colors, as the colors may display differently when printed. //26-11-07,nige //readInParamFile(psParams.getPSColorsFile(), &printerColors, false); readInParamFile(psParams.getPSColorsFile(), &printerColors); //26-11-07,nige: end printerColors.addAllRulesTogether(ptrToCur->getColorRules()); printerColors.addConsensusParams(*(ptrToCur->getConPar())); alignmentViewer->setPrintoutColorScheme(printerColors); profile2Viewer->setPrintoutColorScheme(printerColors); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); alignmentViewer->writeToPSFile(psParams); QApplication::restoreOverrideCursor(); } } else { QMessageBox::warning(this, tr("Error"), tr("Error: No file loaded.")); } } void MainWindow::writeProfile2AsPostscript() { if(profile2Viewer->seqsLoaded()) { int lengthLongestSeq = profile2Viewer->getLengthLongestSeqInRange(); WritePostscriptFile writePs(profile2FileName, Profile2, lengthLongestSeq); writePs.exec(); // Need to parse the color parameters file. if(writePs.result() == QDialog::Accepted) { PostscriptFileParams psParams = writePs.getParams(); ColorParameters* ptrToCur; ptrToCur = profile2Viewer->getPtrToCurrentColorScheme(); // We want to have the same rules as the current selection, but different // colors, as the colors may display differently when // printed. //26-11-07,nige //readInParamFile(psParams.getPSColorsFile(), &printerColors, false); readInParamFile(psParams.getPSColorsFile(), &printerColors); //26-11-07,nige: end printerColors.addAllRulesTogether(ptrToCur->getColorRules()); printerColors.addConsensusParams(*(ptrToCur->getConPar())); alignmentViewer->setPrintoutColorScheme(printerColors); profile2Viewer->setPrintoutColorScheme(printerColors); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); profile2Viewer->writeToPSFile(psParams); QApplication::restoreOverrideCursor(); } } else { QMessageBox::warning(this, tr("Error"), tr("Error: No file loaded.")); } } void MainWindow::quitProgram() { exit(0); } bool MainWindow::anySeqsSelected() { if(alignmentViewer->getNumberOfSeqsSelected() != 0) { return true; } if((currentMode == profileMode) && profile2Viewer->getNumberOfSeqsSelected() != 0) { return true; } return false; } void MainWindow::cutSequences() { KeyController::Instance()->clearCtrl(); //- nige // Check if a file is loaded int numSeqs = alignPtr->getNumSeqs(); if(numSeqs == 0) { QMessageBox::information(this, "", "No file loaded.", QMessageBox::Ok); return; } // check if some seqs are selected bool seqsSelected = anySeqsSelected(); if(!seqsSelected) { QString message = "Select sequences to be cut by clicking on the names."; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } // check if we have multi or profile mode std::vector selected; selected = getAllSelectedSeqsVector(); bool allSelected = allSeqsInAlignmentOrProfileSelected(&selected); if(allSelected) { QString message = "Cannot cut all sequences from an alignment or profile."; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } cutSeqs.clear(); cutSeqs = alignPtr->cutSelectedSequencesFromAlignment(&selected); numSeqs = alignPtr->getNumSeqs(); int profileNum = clustalw::userParameters->getProfileNum(); if(currentMode == profileMode) { int numSeqsProfile1 = alignPtr->getProfile1NumSeqs(); int prof1First = 0, prof1Last = 0, prof2First = 0, prof2Last = 0; if(numSeqs <= 0) { clustalw::userParameters->setEmpty(true); prof1First = 0; prof1Last = 0; prof2First = 0; prof2Last = 0; } else if(numSeqsProfile1 == 0) { clustalw::userParameters->setProfile1Empty(true); prof1First = 0; prof1Last = 0; if(profileNum == 2) { prof2First = 1; prof2Last = numSeqs; } } else { prof1First = 1; prof1Last = numSeqsProfile1; if(profileNum == 2) { if(numSeqsProfile1 < numSeqs) { prof2First = numSeqsProfile1 + 1; prof2Last = numSeqs; } else // None left in profile 2 { prof2First = 0; prof2Last = 0; } } } alignmentViewer->updateView(prof1First, prof1Last); if(profileNum == 2) { profile2Viewer->updateView(prof2First, prof2Last); } } else { //int alignmentFirstSeq, alignmentLastSeq; if(numSeqs <= 0) { clustalw::userParameters->setEmpty(true); alignmentViewer->updateView(0, 0); } else { alignmentViewer->updateView(1, numSeqs); } } //hideLowScoreSegs(); // Mark 22-5-2007 resetAndHideLowScoreSeqs(); // Mark 23-5-2007 } bool MainWindow::allSeqsInAlignmentOrProfileSelected(std::vector* seqsSelected) { if(currentMode == profileMode) { int numProfile1 = alignPtr->getProfile1NumSeqs(); int sizeSelected = seqsSelected->size(); bool allSeqsInProf1Selected = false, allSeqsInProf2Selected = false; int numSelectedInProf1 = 0, numSelectedInProf2 = 0; // Mark Jan 17th 2007. This section of the code has changed to fix the problem in // bug 2. for(int i = 1; i <= numProfile1; i++) { if((*seqsSelected)[i] == 1) { numSelectedInProf1++; } } if(numSelectedInProf1 == numProfile1) { allSeqsInProf1Selected = true; } int numSeqsInProf2 = (sizeSelected - 1) - numProfile1; if(numSeqsInProf2 != 0) { for(int i = numProfile1 + 1; i < sizeSelected; i++) { if((*seqsSelected)[i] == 1) { numSelectedInProf2++; } } if(numSelectedInProf2 == numSeqsInProf2) { allSeqsInProf2Selected = true; } } return (allSeqsInProf1Selected || allSeqsInProf2Selected); } else { int sizeSelected = seqsSelected->size(); bool allSeqsInAlignSelected = true; for(int i = 1; i < sizeSelected; i++) { if((*seqsSelected)[i] != 1) { allSeqsInAlignSelected = false; } } return allSeqsInAlignSelected; } } void MainWindow::pasteSequences() { KeyController::Instance()->clearCtrl(); //- nige if(cutSeqs.size() == 0) { QString message = "No sequences available for pasting.\n"; message += "Cut sequences first."; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } int insert=0; bool insertIntoProfile2 = false; // Check if a seq has been selected! if(alignPtr->getNumSeqs() == 0) { insert = 0; } else { // Mark Jan 16th 2007. Bug 11 // I have made some changes here to allow pasting into the empty profile 2. bool seqsSelected = anySeqsSelected(); bool hasProfile2NamesFocus = profile2Viewer->hasFocus(); // Find the point to paste the sequences vector selected; selected = getAllSelectedSeqsVector(); if(seqsSelected) { //paste after *first sequence in selected block* //for(int i = 1; i < selected.size(); i++) { //- nige: always paste *after selected block* regardless of //- length; this gives consistent behaviour for block of length 1 //- or more. for (int i = selected.size()-1; i > 0; i--) { if (selected[i] == 1) { //cout << "ins: " << selected.size() << "," << i << endl; insert = i; break; } } } else if(!seqsSelected && currentMode == profileMode && hasProfile2NamesFocus) { // Mark Jan 16th 2007 // This is to allow pasting into the empty profile2 insert = selected.size() - 1; // Insert at the end profile2FileName = "nofilepastedin"; clustalw::userParameters->setProfileNum(2); clustalw::userParameters->setProfile2Name("nofilepastedin"); clustalw::userParameters->setProfile2Empty(false); insertIntoProfile2 = true; } else { QString message = "Select a sequence by clicking on the name.\n"; message += "Cut sequences will be pasted after this one."; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } } alignPtr->pasteSequencesIntoPosition(&cutSeqs, insert, insertIntoProfile2); cutSeqs.clear(); int numSeqs = alignPtr->getNumSeqs(); int profileNum = clustalw::userParameters->getProfileNum(); if(currentMode == profileMode) { int numSeqsProfile1 = alignPtr->getProfile1NumSeqs(); alignmentViewer->updateView(1, numSeqsProfile1); if(profileNum == 2 && numSeqsProfile1 < numSeqs) { profile2Viewer->updateView(numSeqsProfile1 + 1, numSeqs); } else { profile2Viewer->updateView(0, 0); } } else { alignmentViewer->updateView(1, numSeqs); } //hideLowScoreSegs(); // Mark 22-5-2007 resetAndHideLowScoreSeqs(); // Mark 23-5-2007 } void MainWindow::selectAllSequences() { KeyController::Instance()->clearCtrl(); //- nige alignmentViewer->selectAllSequences(); } void MainWindow::selectProfile1() { alignmentViewer->selectAllSequences(); } void MainWindow::selectProfile2() { profile2Viewer->selectAllSequences(); } void MainWindow::addProfile2ToProfile1() { /* AW corresponding 1,83 function is xmenu.c:MergeProfiles */ if (clustalw::userParameters->getProfile2Empty()) { QMessageBox::information(this, "", "Profile 2 not loaded", QMessageBox::Ok); return; } int numSeqs = alignPtr->getNumSeqs(); clustalw::userParameters->setProfileNum(1); alignPtr->setProfile1NumSeqs(numSeqs); clustalw::userParameters->setProfile2Empty(true); /* AW: (related to bug 141) * shouldn't userParameters->getUseSS2() and * userParameters->getStructPenalties2() etc * be reset as well here? */ profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); alignmentViewer->updateView(1, numSeqs); QApplication::restoreOverrideCursor(); /* bug 141: seqs are displayed correctly, but saving failes: seqs in profile 1 are used repeatedly because outputindex is broken e.g. for two profile of four sequences each: 1:1 2:2 3:3 4:4 5:1 6:2 7:3 8:4 In 1.83 outputindex is okay at this stage. so maybe the error occurs earlier on, but the safest bet here to fix the bug was simply to reset the outputindex */ alignPtr->setDefaultOutputIndex(); } void MainWindow::clearSequenceSelection() { alignmentViewer->clearSequenceSelection(); profile2Viewer->clearSequenceSelection(); } void MainWindow::clearRangeSelection() { alignmentViewer->clearRangeSelection(); profile2Viewer->clearRangeSelection(); } void MainWindow::searchForString() { KeyController::Instance()->clearCtrl(); //- nige bool seqsSelectedInAlignment = false; bool seqsSelectedInProfile2 = false; searchBeginSeq = 0; searchBeginRes = 0; searchString = ""; if(alignmentViewer->getNumberOfSeqsSelected() != 0) { seqsSelectedInAlignment = true; } if((currentMode == profileMode) && profile2Viewer->getNumberOfSeqsSelected() != 0) { seqsSelectedInProfile2 = true; } if(seqsSelectedInAlignment || seqsSelectedInProfile2) { searchSeqs->exec(); } else { QMessageBox::information(this, "", "You must select sequences first", QMessageBox::Ok); } } void MainWindow::removeAllGaps() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } bool seqsSelectedInAlignment = false, seqsSelectedInProfile2 = false; if(alignmentViewer->getNumberOfSeqsSelected() != 0) { seqsSelectedInAlignment = true; } if((currentMode == profileMode) && profile2Viewer->getNumberOfSeqsSelected() != 0) { seqsSelectedInProfile2 = true; } if(!seqsSelectedInAlignment && !seqsSelectedInProfile2) { QMessageBox::information(this, "", "Select sequences by clicking on names.", QMessageBox::Ok); return; } int answer; answer = QMessageBox::information(this, "", "Remove gaps from selected sequences?", QMessageBox::Yes, QMessageBox::No); if(answer == QMessageBox::No) { return; } std::vector selected; selected = getAllSelectedSeqsVector(); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); alignPtr->removeGapsFromSelectedSeqs(&selected); if(currentMode == profileMode) { alignmentViewer->updateView(); profile2Viewer->updateView(); } else { alignmentViewer->updateView(); } QApplication::restoreOverrideCursor(); } vector MainWindow::getAllSelectedSeqsVector() { int nSeqs = alignPtr->getNumSeqs(); std::vector selected; selected.resize(nSeqs + 1, 0); // Get a vector which contains a 1 for each sequences that is selected. // Not it must start at index 1 as this is the was the clustal code works. if(currentMode == profileMode) { int size1, size2; const std::vector* alignmentSelected = alignmentViewer->getSelectedRows(); size1 = alignmentSelected->size(); const std::vector* profileSelected = profile2Viewer->getSelectedRows(); size2 = profileSelected->size(); //cout << "gas: " << size1 << "+" << size2 << "=" << size1+size2 << endl; for(int i = 0; i < size1 + size2; i++) { if(i < size1) { if((*alignmentSelected)[i] == 1) { selected[i + 1] = 1; } else { selected[i + 1] = 0; } } else { if((*profileSelected)[i - size1] == 1) { selected[i + 1] = 1; } else { selected[i + 1] = 0; } } } } else { const std::vector* alignmentSelected = alignmentViewer->getSelectedRows(); int size; size = alignmentSelected->size(); for(int i = 0; i < size; i++) { if((*alignmentSelected)[i] == 1) { selected[i + 1] = 1; } else { selected[i + 1] = 0; } } } return selected; } void MainWindow::removeGapOnlyColumns() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); if(currentMode == profileMode) { alignPtr->removeAllGapOnlyColumns(alignmentViewer->getFirstSeq(), alignmentViewer->getLastSeq(), 1); alignPtr->removeAllGapOnlyColumns(profile2Viewer->getFirstSeq(), profile2Viewer->getLastSeq(), 2); alignmentViewer->updateView(); profile2Viewer->updateView(); } else { alignPtr->removeAllGapOnlyColumns(1, nSeqs, 0); alignmentViewer->updateView(); } QApplication::restoreOverrideCursor(); clustalw::utilityObject->info("All the columns that contains only the gaps, are removed!"); } void MainWindow::doCompleteAlignment() { KeyController::Instance()->clearCtrl(); //- nige // Need to open all the files that are needed first! int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } FileNameDialogType type = CompleteAlign; AlignType alignType = Sequence; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName = names.treeFile; clustalObj->QTSetFileNamesForOutput(names); alignmentViewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); toplevelMenusEnabled(false);// Andreas Wilm (UCD): needed because we keep the window // responsive (window movement etc) during alignment process clustalObj->align(&phylipName); alignPtr->reloadAlignment(); alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } searchBeginSeq = 0; searchBeginRes = 1; toplevelMenusEnabled(true); QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::produceGuideTreeOnly() { KeyController::Instance()->clearCtrl(); //- nige int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(nSeqs < 2) { QMessageBox::information(this, "", "Cannot produce tree. Not enough sequences", QMessageBox::Ok); return; } FileNameDialogType type = DoGuideTreeOnly; AlignType alignType = Sequence; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName = names.treeFile; QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->doGuideTreeOnly(&phylipName); QApplication::restoreOverrideCursor(); } } void MainWindow::doAlignmentFromGuideTree() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } FileNameDialogType type = AlignFromTree; AlignType alignType = Sequence; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName = names.treeFile; clustalObj->QTSetFileNamesForOutput(names); alignmentViewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->doAlignUseOldTree(&phylipName); alignPtr->reloadAlignment(); alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::realignSelectedSequences() { // Check if a file is loaded int numSeqs = alignPtr->getNumSeqs(); if(numSeqs == 0) { QMessageBox::information(this, "", "No file loaded.", QMessageBox::Ok); return; } // check if some seqs are selected bool seqsSelected = anySeqsSelected(); if(!seqsSelected) { QString message = "Select sequences to be realigned by clicking on the names."; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } // check if we have multi or profile mode std::vector selected; selected = getAllSelectedSeqsVector(); bool allSelected = allSeqsInAlignmentOrProfileSelected(&selected); if(allSelected) { doCompleteAlignment(); // Dont cut anything, just do a complete realign. return; } FileNameDialogType type = RealignSelectedSeqs; AlignType alignType = Sequence; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType); int code = namesDialog.exec(); if(code == QDialog::Accepted) { alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); // Cut these sequences from the vector of sequences. vector seqsToRealign; seqsToRealign = alignPtr->cutSelectedSequencesFromAlignment(&selected); // Now paste them in at the end. int nSeqs = alignPtr->getNumSeqs(); alignPtr->setProfile1NumSeqs(nSeqs); // Now paste the other sequences in! // Mark change jan 16 2007. added false into function call. alignPtr->pasteSequencesIntoPosition(&seqsToRealign, nSeqs, false); clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName1 = names.treeFile; clustalObj->QTSetFileNamesForOutput(names); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->sequencesAlignToProfile(&phylipName1); if(alignPtr->getNumSeqs() > 0) { alignPtr->reloadAlignment(); alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::realignSelectedResidueRange() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } int beginCol = alignmentViewer->getFirstSelectedCol(); int endCol = alignmentViewer->getSecondSelectedCol(); if(beginCol < 0 || endCol < 1 || beginCol > endCol) { QMessageBox::information(this, "", "No range selected", QMessageBox::Ok); return; } FileNameDialogType type = RealignSeqRange; AlignType alignType = Sequence; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName = names.treeFile; clustalObj->QTSetFileNamesForOutput(names); alignmentViewer->updateView(0, 0); bool realignEndGapPen = namesDialog.getRealignEndGapPen(); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->QTRealignSelectedRange(names, beginCol, endCol, realignEndGapPen); if(alignPtr->getNumSeqs() > 0) { alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::alignProfile2ToProfile1() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile1Empty()) { QMessageBox::information(this, "", "Profile 1 empty", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile2Empty()) { QMessageBox::information(this, "", "Profile 2 empty", QMessageBox::Ok); return; } FileNameDialogType type = AlignProf2ToProf1; AlignType alignType = Profile; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType, profile2FileName); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName1 = names.treeFile; string phylipName2 = names.profile2TreeFile; clustalObj->QTSetFileNamesForOutput(names); /* 26-03-07,nige: save the profile boundaries */ int prof1First = alignmentViewer->getFirstSeq(); int prof1Last = alignmentViewer->getLastSeq(); int prof2First = profile2Viewer->getFirstSeq(); int prof2Last = profile2Viewer->getLastSeq(); alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->profileAlign(&phylipName1, &phylipName2); /* 26-03-07,nige: redisplay Profile1 and Profile2 */ //alignPtr->reloadAlignment(); //- nige: not called in clustalx //alignmentViewer->updateView(1, alignPtr->getNumSeqs()); //- nige alignmentViewer->updateView(prof1First, prof1Last); //- nige profile2Viewer->updateView(prof2First, prof2Last); //- nige if(showLowScoreSegments == true) { calculateLowScoreAlign(); } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::alignProfilesFromGuideTrees() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile1Empty()) { QMessageBox::information(this, "", "Profile 1 empty", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile2Empty()) { QMessageBox::information(this, "", "Profile 2 empty", QMessageBox::Ok); return; } FileNameDialogType type = AlignProf2ToProf1WithTree; AlignType alignType = Profile; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType, profile2FileName); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName1 = names.treeFile; string phylipName2 = names.profile2TreeFile; clustalObj->QTSetFileNamesForOutput(names); /* 26-03-07,nige: save the profile boundaries */ int prof1First = alignmentViewer->getFirstSeq(); int prof1Last = alignmentViewer->getLastSeq(); int prof2First = profile2Viewer->getFirstSeq(); int prof2Last = profile2Viewer->getLastSeq(); alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); // Need to use the old guide trees! if(phylipName1 != "") { clustalw::userParameters->setUseTree1File(true); } else { clustalw::userParameters->setUseTree1File(false); } if(phylipName2 != "") { clustalw::userParameters->setUseTree2File(true); } else { clustalw::userParameters->setUseTree2File(false); } QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->profileAlign(&phylipName1, &phylipName2); if(alignPtr->getNumSeqs() > 0) { /* 26-03-07,nige: redisplay Profile1 and Profile2 */ //alignPtr->reloadAlignment(); //- nige: not called in clustalx //alignmentViewer->updateView(1, alignPtr->getNumSeqs()); //- nige alignmentViewer->updateView(prof1First, prof1Last); //- nige profile2Viewer->updateView(prof2First, prof2Last); //- nige if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::alignSequencesToProfile1() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile1Empty()) { QMessageBox::information(this, "", "Profile 1 empty", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile2Empty()) { QMessageBox::information(this, "", "Profile 2 empty", QMessageBox::Ok); return; } FileNameDialogType type = AlignSeqsToProf1; AlignType alignType = Profile; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType, profile2FileName); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName1 = names.treeFile; clustalObj->QTSetFileNamesForOutput(names); /* 26-03-07,nige: save the profile boundaries */ int prof1First = alignmentViewer->getFirstSeq(); int prof1Last = alignmentViewer->getLastSeq(); int prof2First = profile2Viewer->getFirstSeq(); int prof2Last = profile2Viewer->getLastSeq(); alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->sequencesAlignToProfile(&phylipName1); /* 26-03-07,nige: redisplay Profile1 and Profile2 */ alignPtr->reloadAlignment(); //alignmentViewer->updateView(1, alignPtr->getNumSeqs()); //- nige alignmentViewer->updateView(prof1First, prof1Last); //- nige profile2Viewer->updateView(prof2First, prof2Last); //- nige if(showLowScoreSegments == true) { calculateLowScoreAlign(); } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::alignSequencesToProfile1FromTree() { int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile1Empty()) { QMessageBox::information(this, "", "Profile 1 empty", QMessageBox::Ok); return; } if(clustalw::userParameters->getProfile2Empty()) { QMessageBox::information(this, "", "Profile 2 empty", QMessageBox::Ok); return; } FileNameDialogType type = AlignSeqsToProf1WithTree; AlignType alignType = Profile; AlignOutputFileNames namesDialog(sequenceFileName, type, alignType, profile2FileName); int code = namesDialog.exec(); if(code == QDialog::Accepted) { clustalw::AlignmentFileNames names = namesDialog.getNames(); string phylipName1 = names.treeFile; // Need to use the old guide trees! if(phylipName1 != "") { clustalw::userParameters->setUseTree1File(true); } else { clustalw::userParameters->setUseTree1File(false); } clustalObj->QTSetFileNamesForOutput(names); /* 26-03-07,nige: save the profile boundaries */ int prof1First = alignmentViewer->getFirstSeq(); int prof1Last = alignmentViewer->getLastSeq(); int prof2First = profile2Viewer->getFirstSeq(); int prof2Last = profile2Viewer->getLastSeq(); alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->sequencesAlignToProfile(&phylipName1); if(alignPtr->getNumSeqs() > 0) { /* 26-03-07,nige: redisplay Profile1 and Profile2 */ alignPtr->reloadAlignment(); //alignmentViewer->updateView(1, alignPtr->getNumSeqs()); //- nige alignmentViewer->updateView(prof1First, prof1Last); //- nige profile2Viewer->updateView(prof2First, prof2Last); //- nige if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::outputFormatOptions() { AlignmentFormatOptions alignFormatOptions; alignFormatOptions.exec(); } // Submenu void MainWindow::resetNewGapsBeforeAlignment() { // Andreas Wilm (UCD) 2008-03-10: // resetting gaps doesn't make sense if iteration is enabled if (clustalw::userParameters->IterationIsEnabled()) { QMessageBox::warning(this, tr("Error"), tr("Error: \"Reset gaps\" in combination with \"Iteration\" is not possible.")); actionResetNewGapsBeforeAlignment->setChecked(false); return; } if(actionResetNewGapsBeforeAlignment->isChecked()) { clustalw::userParameters->setResetAlignmentsNew(true); } else { clustalw::userParameters->setResetAlignmentsNew(false); } if(clustalw::userParameters->getResetAlignmentsNew() == true) { clustalw::userParameters->setResetAlignmentsAll(false); actionResetAllGapsBeforeAlignment->setChecked(false); } } void MainWindow::resetAllGapsBeforeAlignment() { // Andreas Wilm (UCD) 2008-03-10: // resetting gaps doesn't make sense if iteration is enabled if (clustalw::userParameters->IterationIsEnabled()) { QMessageBox::warning(this, tr("Error"), tr("Error: \"Reset gaps\" in combination with \"Iteration\" is not possible.")); actionResetAllGapsBeforeAlignment->setChecked(false); return; } if(actionResetAllGapsBeforeAlignment->isChecked()) { clustalw::userParameters->setResetAlignmentsAll(true); } else { clustalw::userParameters->setResetAlignmentsAll(false); } if(clustalw::userParameters->getResetAlignmentsAll() == true) { clustalw::userParameters->setResetAlignmentsNew(false); actionResetNewGapsBeforeAlignment->setChecked(false); } } void MainWindow::pairwiseAlignmentParameters() { pairwiseParams->exec(); } void MainWindow::multipleAlignmentParameters() { alignmentParams->exec(); } void MainWindow::proteinGapParameters() { ProteinGapParameters dialogProteinGapParameters; dialogProteinGapParameters.exec(); } void MainWindow::secondaryStructureParameters() { SecStructOptions dialogSecStructOptions; dialogSecStructOptions.exec(); } void MainWindow::setDefaultParameters() { clustalw::userParameters->setParamsToDefault(); clustalw::subMatrix->setValuesToDefault(); // Also need to reset the reset Gaps before Alignment !!!! actionResetAllGapsBeforeAlignment->setChecked(false); actionResetNewGapsBeforeAlignment->setChecked(false); } void MainWindow::drawNJTree() { KeyController::Instance()->clearCtrl(); //- nige int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(nSeqs < 2) { QString message, num; num.setNum(nSeqs); message = "Alignment has only " + num + " sequences"; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } TreeOutputFileNames treeNameDialog(sequenceFileName); int code = treeNameDialog.exec(); if(code == QDialog::Accepted) { clustalw::TreeNames treeNames = treeNameDialog.getNames(); alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->phylogeneticTree(&treeNames.phylipName, &treeNames.clustalName, &treeNames.distName, &treeNames.nexusName, treeNames.pimName); if(alignPtr->getNumSeqs() > 0) { alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::bootstrapNJTree() { KeyController::Instance()->clearCtrl(); //- nige int nSeqs = alignPtr->getNumSeqs(); if(nSeqs == 0) { QMessageBox::information(this, "", "No sequences loaded", QMessageBox::Ok); return; } if(nSeqs < 2) { QString message, num; num.setNum(nSeqs); message = "Alignment has only " + num + " sequences"; QMessageBox::information(this, "", message, QMessageBox::Ok); return; } BootstrapTreeDialog bootNameDialog(sequenceFileName); int code = bootNameDialog.exec(); if(code == QDialog::Accepted) { clustalw::TreeNames treeNames = bootNameDialog.getNames(); alignmentViewer->updateView(0, 0); profile2Viewer->updateView(0, 0); QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); clustalObj->bootstrapTree(&treeNames.phylipName, &treeNames.clustalName, &treeNames.nexusName); if(alignPtr->getNumSeqs() > 0) { alignmentViewer->updateView(1, alignPtr->getNumSeqs()); if(showLowScoreSegments == true) { calculateLowScoreAlign(); } } searchBeginSeq = 0; searchBeginRes = 1; QApplication::restoreOverrideCursor(); } alignmentViewer->updateView(); } void MainWindow::excludePositionsWithGaps() { if(actionExcludePosWithGap->isChecked()) { clustalw::userParameters->setTossGaps(true); } else { clustalw::userParameters->setTossGaps(false); } } void MainWindow::correctForMultipleSubstitutions() { if(actionCorrectForMultiSub->isChecked()) { clustalw::userParameters->setKimura(true); } else { clustalw::userParameters->setKimura(false); } } void MainWindow::treeOutputFormatOptions() { TreeFormatOptions treeFormatOptions; treeFormatOptions.exec(); } void MainWindow::backgroundColoring() { if(backGroundColors) { backGroundColors = false; } else { backGroundColors = true; } alignmentViewer->setBackGroundColoring(backGroundColors); profile2Viewer->setBackGroundColoring(backGroundColors); } void MainWindow::blackAndWhite() { //int scheme = BlackAndWhite; actionDefaultColors->setChecked(false); //actionLoadColorParameterFile->setChecked(false); alignmentViewer->setBlackAndWhite(); profile2Viewer->setBlackAndWhite(); } void MainWindow::defaultColors() { bool dnaFlag = clustalw::userParameters->getDNAFlag(); actionBlackAndWhite->setChecked(false); actionDefaultColors->setChecked(true); //actionLoadColorParameterFile->setChecked(false); if(dnaFlag) { alignmentViewer->setColorScheme(&dnaColors); profile2Viewer->setColorScheme(&dnaColors); } else { alignmentViewer->setColorScheme(&proteinColors); profile2Viewer->setColorScheme(&proteinColors); } } void MainWindow::loadColorParameterFile() { bool success = false; QString userFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; userFile = fd.getOpenParamFileName(this, tr("Load Color Parameter File"), tr("XBEL Files (*.xbel *.xml);;All Files (*)")); if(userFile == "") { return; } success = readInParamFile(userFile, &userColors); // If successful, load it! if(success) { // Use the file in the viewers actionBlackAndWhite->setChecked(false); actionDefaultColors->setChecked(false); //actionLoadColorParameterFile->setChecked(true); alignmentViewer->setColorScheme(&userColors); profile2Viewer->setColorScheme(&userColors); } else { // use default colors actionBlackAndWhite->setChecked(false); actionDefaultColors->setChecked(true); //actionLoadColorParameterFile->setChecked(false); if(clustalw::userParameters->getDNAFlag()) { alignmentViewer->setColorScheme(&dnaColors); profile2Viewer->setColorScheme(&dnaColors); } else { alignmentViewer->setColorScheme(&proteinColors); profile2Viewer->setColorScheme(&proteinColors); } } } void MainWindow::showLowScoreSeg() { if(showLowScoreSegments) { hideLowScoreSegs(); } else { actionShowLowScoreSeg->setChecked(true); showLowScoreSegments = true; } recalculateLowScoreSegments(); } void MainWindow::hideLowScoreSegs() // Mark 22-5-07 { actionShowLowScoreSeg->setChecked(false); showLowScoreSegments = false; } void MainWindow::resetAndHideLowScoreSeqs() // Mark 23-5-07 { hideLowScoreSegs(); seqWeightCalculatedAlignmentViewer = false; seqWeightCalculatedProfile2Viewer = false; alignmentViewer->showLowScoringSegments(false, lowScoreResAlignmentViewer); profile2Viewer->showLowScoringSegments(false, lowScoreResProfile2Viewer); } void MainWindow::recalculateLowScoreSegments() { if(!showLowScoreSegments) { if(alignmentModeMenu->currentIndex() == 1) // If it is a profile { alignmentViewer->showLowScoringSegments(showLowScoreSegments, lowScoreResAlignmentViewer); profile2Viewer->showLowScoringSegments(showLowScoreSegments, lowScoreResProfile2Viewer); } else { alignmentViewer->showLowScoringSegments(showLowScoreSegments, lowScoreResAlignmentViewer); } } else { if(alignmentModeMenu->currentIndex() == 1) // If it is a profile { calculateLowScoreAlign(); calculateLowScoreProfile(); } else { calculateLowScoreAlign(); } } } void MainWindow::calculateLowScoreAlign() { int firstSeqAlign, lastSeqAlign, numSeqsAlign, nColsAlign; firstSeqAlign = alignmentViewer->getFirstSeq(); lastSeqAlign = alignmentViewer->getLastSeq(); numSeqsAlign = lastSeqAlign - firstSeqAlign + 1; if(firstSeqAlign != 0 && lastSeqAlign != 0) { nColsAlign = alignPtr->getLengthLongestSequence(firstSeqAlign, lastSeqAlign); // Now I need to set up the vectors to hold the weights and the low scoring segs // Check if the seqWeights need to be resized if(!seqWeightCalculatedAlignmentViewer) { lowScoreSeqWeightAlignmentViewer->clear(); lowScoreSeqWeightAlignmentViewer->resize(numSeqsAlign + 1); } lowScoreResAlignmentViewer->clearArray(); lowScoreResAlignmentViewer->ResizeRect(numSeqsAlign + 1, nColsAlign + 1); clustalw::LowScoreSegParams alignParams; alignParams.firstSeq = firstSeqAlign - 1; alignParams.nSeqs = numSeqsAlign; alignParams.lastSeq = lastSeqAlign - 1; alignParams.nCols = nColsAlign; alignParams.seqWeight = lowScoreSeqWeightAlignmentViewer; alignParams.lowScoreRes = lowScoreResAlignmentViewer; alignParams.seqWeightCalculated = seqWeightCalculatedAlignmentViewer; clustalObj->QTcalcLowScoreSegments(&alignParams); alignmentViewer->showLowScoringSegments(showLowScoreSegments, lowScoreResAlignmentViewer); seqWeightCalculatedAlignmentViewer = true; } } void MainWindow::calculateLowScoreProfile() { int firstSeqProf, lastSeqProf, numSeqsProf, nColsProf; firstSeqProf = profile2Viewer->getFirstSeq(); lastSeqProf = profile2Viewer->getLastSeq(); numSeqsProf = lastSeqProf - firstSeqProf + 1; if(firstSeqProf != 0 && lastSeqProf != 0) { nColsProf = alignPtr->getLengthLongestSequence(firstSeqProf, lastSeqProf); if(!seqWeightCalculatedProfile2Viewer) { lowScoreSeqWeightProfile2Viewer->clear(); lowScoreSeqWeightProfile2Viewer->resize(numSeqsProf + 1); } lowScoreResProfile2Viewer->clearArray(); lowScoreResProfile2Viewer->ResizeRect(numSeqsProf + 1, nColsProf + 1); clustalw::LowScoreSegParams profileParams; profileParams.firstSeq = firstSeqProf - 1; profileParams.nSeqs = numSeqsProf; profileParams.lastSeq = lastSeqProf - 1; profileParams.nCols = nColsProf; profileParams.seqWeight = lowScoreSeqWeightProfile2Viewer; profileParams.lowScoreRes = lowScoreResProfile2Viewer; profileParams.seqWeightCalculated = seqWeightCalculatedProfile2Viewer; clustalObj->QTcalcLowScoreSegments(&profileParams); profile2Viewer->showLowScoringSegments(showLowScoreSegments, lowScoreResProfile2Viewer); seqWeightCalculatedProfile2Viewer = true; } } void MainWindow::showExceptionalRes() { if(showExceptionalResidues) { actionShowExceptionalRes->setChecked(false); showExceptionalResidues = false; } else { actionShowExceptionalRes->setChecked(true); showExceptionalResidues = true; } if(alignmentModeMenu->currentIndex() == 1) // If it is a profile { alignmentViewer->showExceptionalRes(showExceptionalResidues); profile2Viewer->showExceptionalRes(showExceptionalResidues); } else { alignmentViewer->showExceptionalRes(showExceptionalResidues); } } void MainWindow::lowScoreSegParams() { lowScoreParams->exec(); recalculateLowScoreSegments(); } void MainWindow::columnScoreParams() { colScoreParams->exec(); // Now we need to recalculate the histogram. //alignmentViewer->updateHistogram(1, alignPtr->getNumSeqs()); // Also need to recalculate the low scoring and exceptional residues. if(alignmentModeMenu->currentIndex() == 1) // If it is a profile { alignmentViewer->updateHistogram(alignmentViewer->getFirstSeq(), alignmentViewer->getLastSeq()); alignmentViewer->showExceptionalRes(showExceptionalResidues); profile2Viewer->updateHistogram(profile2Viewer->getFirstSeq(), profile2Viewer->getLastSeq()); profile2Viewer->showExceptionalRes(showExceptionalResidues); } else { alignmentViewer->updateHistogram(1, alignPtr->getNumSeqs()); alignmentViewer->showExceptionalRes(showExceptionalResidues); } } void MainWindow::saveColumnScoresToFile() { alignmentViewer->outputColumnScores(); } void MainWindow::errorHandler(int errorCode, string offendingSeq) { string info; if(errorCode == clustalw::CANNOTOPENFILE) { QMessageBox::information(this, "", "Cannot open file!", QMessageBox::Ok); } else if(errorCode == clustalw::BADFORMAT) { QMessageBox::information(this, "", "File is badly formatted", QMessageBox::Ok); } else if(errorCode == clustalw::SEQUENCETOOBIG) { info = "File contains sequences longer than allowed length: "; info += offendingSeq; info += ")"; QMessageBox::information(this, "", info.c_str(), QMessageBox::Ok); } else if(errorCode == clustalw::NOSEQUENCESINFILE) { QMessageBox::information(this, "", "No sequences in file - bad format?", QMessageBox::Ok); } else if(errorCode == clustalw::ALLNAMESNOTDIFFERENT) { info = "Conflicting sequence names: " ; info += offendingSeq; info += "; names must be unique!"; QMessageBox::information(this, "", info.c_str(), QMessageBox::Ok); } else if(errorCode == clustalw::EMPTYSEQUENCE) { info = "There were empty sequences in the input file: "; info += offendingSeq; QMessageBox::information(this, "", info.c_str(), QMessageBox::Ok); } else if(errorCode == clustalw::MUSTREADINPROFILE1FIRST) { QMessageBox::information(this, "", "Must read in Profile 1 first!", QMessageBox::Ok); } else { QMessageBox::information(this, "", "An unknown error occured reading in the file!", QMessageBox::Ok); } } void MainWindow::setUpSearchForStringDialog() { searchString = ""; searchSeqs = new SearchForString; connect(searchSeqs, SIGNAL(searchFromStartSignal()), this, SLOT(searchFromStart())); connect(searchSeqs, SIGNAL(searchAgainSignal()), this, SLOT(searchAgain())); } void MainWindow::searchFromStart() { bottomInfoLabel->setText(""); // set both to start positions searchBeginSeq = 0; searchBeginRes = 1; //and then search again!!!!! searchAgain(); } void MainWindow::searchAgain() { bottomInfoLabel->setText(""); bool seqsSelectedInAlignment = false; bool seqsSelectedInProfile2 = false; //searchString = searchSeqs->getSearchString().toStdString(); searchString = searchSeqs->getSearchString().toUpper().toStdString(); //- nige: case-insensitive // If string is empty we should return. Dont search if(searchString.size() == 0) { return; } int numSeqsTopViewer; if(currentMode == profileMode) { numSeqsTopViewer = alignPtr->getProfile1NumSeqs(); } else { numSeqsTopViewer = alignPtr->getNumSeqs(); } if(alignmentViewer->getNumberOfSeqsSelected() != 0) { seqsSelectedInAlignment = true; } if((currentMode == profileMode) && profile2Viewer->getNumberOfSeqsSelected() != 0) { seqsSelectedInProfile2 = true; } const std::vector* seqsSelected; int firstSeqInViewer; bool found=false; Viewer viewerToSearch; // Check seqs in top viewer if(seqsSelectedInAlignment && searchBeginSeq < numSeqsTopViewer) { seqsSelected = alignmentViewer->getSelectedRows(); firstSeqInViewer = alignmentViewer->getFirstSeq(); if(currentMode == profileMode) { viewerToSearch = Profile1Viewer; } else { viewerToSearch = SequenceViewer; } // Do the sequences in the alignment first found = findNextInstanceOfString(seqsSelected, firstSeqInViewer, viewerToSearch); } int numSeqs = alignPtr->getNumSeqs(); // Check seqs in bottom viewer if(seqsSelectedInProfile2 && searchBeginSeq >= numSeqsTopViewer && searchBeginSeq < numSeqs) { viewerToSearch = Profile2Viewer; seqsSelected = profile2Viewer->getSelectedRows(); firstSeqInViewer = profile2Viewer->getFirstSeq(); found = findNextInstanceOfString(seqsSelected, firstSeqInViewer, viewerToSearch); } if(found == false) { // print message to screen saying cannot find it! QString message = "Cannot find string " + QString(searchString.c_str()); bottomInfoLabel->setText(message); } } bool MainWindow::findNextInstanceOfString(const std::vector* seqsSelected, int firstSeq, Viewer viewerSearched) { int size = seqsSelected->size(); bool found = false; for(int i = searchBeginSeq; i < (size + firstSeq - 1); i++) { if((*seqsSelected)[i - firstSeq + 1] == 1) { // Then serch sequence i+1; int seq = i + 1; searchBeginRes = alignPtr->searchForString(&found, seq, searchBeginRes, searchString); if(found == false) { searchBeginSeq++; searchBeginRes = 1; } else { // write message to screen saying where it was found QString seqName = QString(alignPtr->getName(seq).c_str()); QString resFound = QString("%1").arg(searchBeginRes); QString message; if(viewerSearched == Profile1Viewer) { message = "String " + QString(searchString.c_str()) + " in profile1 sequence " + seqName; } else if(viewerSearched == Profile2Viewer) { message = "String " + QString(searchString.c_str()) + " in profile2 sequence " + seqName; } else { message = "String " + QString(searchString.c_str()) + " in sequence " + seqName; } message += ", column " + resFound; bottomInfoLabel->setText(message); searchBeginRes++; return true; // found it!!!!! } } else { searchBeginSeq++; searchBeginRes = 1; } } return false; } void MainWindow::generalHelp() { HelpDisplayWidget help('G'); help.exec(); } void MainWindow::inputOutputHelp() { HelpDisplayWidget help('F'); help.exec(); } void MainWindow::editAlignHelp() { HelpDisplayWidget help('E'); help.exec(); } void MainWindow::multiAlignHelp() { HelpDisplayWidget help('M'); help.exec(); } void MainWindow::profileAlignHelp() { HelpDisplayWidget help('P'); help.exec(); } void MainWindow::secondaryStructureHelp() { HelpDisplayWidget help('B'); help.exec(); } void MainWindow::treesHelp() { HelpDisplayWidget help('T'); help.exec(); } void MainWindow::colorsHelp() { HelpDisplayWidget help('C'); help.exec(); } void MainWindow::alignQualityHelp() { HelpDisplayWidget help('Q'); help.exec(); } void MainWindow::commandLineHelp() { HelpDisplayWidget help('9'); help.exec(); } void MainWindow::referencesHelp() { HelpDisplayWidget help('R'); help.exec(); } MainWindow::~MainWindow() { if(clustalObj != 0) { delete clustalObj; } } void MainWindow::setAllPtrsToZero() { searchSeqs = 0; actionReferencesHelp = 0; actionCommandLineHelp = 0; actionAlignQualityHelp = 0; actionColorsHelp = 0; actionTreesHelp = 0; actionSecondaryStructureHelp = 0; actionProfileAlignHelp = 0; actionMultiAlignHelp = 0; actionEditAlignHelp = 0; actionInputOutputHelp = 0; actionGeneralHelp = 0; actionSaveColumnScoresToFile = 0; actionColumnScoreParams = 0; actionLowScoreSegParams = 0; actionShowExceptionalRes = 0; actionShowLowScoreSeg = 0; actionLoadColorParameterFile = 0; actionDefaultColors = 0; actionBlackAndWhite = 0; actionBackColoring = 0; actionTreeOutputFormatOptions = 0; actionCorrectForMultiSub = 0; actionExcludePosWithGap = 0; actionBootstrapNJTree = 0; actionDrawNJTree = 0; actionSecondaryStructureParameters = 0; actionProteinGapParameters = 0; actionMultipleAlignmentParameters = 0; actionPairwiseAlignmentParameters = 0; actionResetAllGapsBeforeAlignment = 0; actionResetNewGapsBeforeAlignment = 0; actionOutputFormatOptions = 0; menuAlignParameters = 0; actionSetDefaultParameters = 0; actionAlignSequencesToProfile1FromTree = 0; actionAlignSequencesToProfile1 = 0; actionAlignProfileFromTree = 0; actionAlignProfile1ToProfile2 = 0; actionRealignSelectRange = 0; actionRealignSelectSeq = 0; actionDoAlignmentFromGuideTree = 0; actionDoGuideTreeOnly = 0; actionDoCompleteAlign = 0; actionRemoveGapOnlyColumns = 0; actionRemoveAllGaps = 0; actionSearchForString = 0; actionClearRangeSelection = 0; actionClearSequenceSelection = 0; actionAddProfile2ToProfile1 = 0; actionSelectProfile2 = 0; actionSelectProfile1 = 0; actionSelectAllSequences = 0; actionPasteSequences = 0; actionCutSequences = 0; actionQuit = 0; actionWriteProfile2AsPost = 0; actionWriteProfile1AsPost = 0; actionWriteAlignmentAsPost = 0; actionSaveProfile2As = 0; actionSaveProfile1As = 0; actionLoadProfile2 = 0; actionLoadProfile1 = 0; actionSaveSeqAs = 0; actionAppendSequences = 0; actionLoadSequences = 0; profile2Viewer = 0; alignmentViewer = 0; horizontalGroupBox = 0; layoutHbox = 0; sideFiller = 0; alignModeLabel = 0; fontLabel = 0; profileScrollLock = 0; fontMenu = 0; alignmentModeMenu = 0; clustalObj = 0; lowScoreParams = 0; colScoreParams = 0; pairwiseParams = 0; alignmentParams = 0; lowScoreResProfile2Viewer = 0; lowScoreSeqWeightProfile2Viewer = 0; lowScoreResAlignmentViewer = 0; lowScoreSeqWeightAlignmentViewer = 0; bottomInfoLabel = 0; w = 0; mainlayout = 0; } /* nige 25-04-07: handler for the WHOLE application. */ void MainWindow::keyPressEvent(QKeyEvent *event) { KeyController *kbd = KeyController::Instance(); if (kbd->consumeKeyPress(event->key())) return; QWidget::keyPressEvent(event); } /* nige 25-04-07: handler for the WHOLE application. */ void MainWindow::keyReleaseEvent(QKeyEvent *event) { KeyController *kbd = KeyController::Instance(); if (kbd->consumeKeyRelease(event->key())) return; QWidget::keyReleaseEvent(event); } // Mark 16-5-07 void MainWindow::setUPGMA() { actionUseUPGMA->setChecked(true); actionUseNJ->setChecked(false); clustalw::userParameters->setClusterAlgorithm(clustalw::UPGMA); actionBootstrapNJTree->setEnabled(false); // Mark 1-9-2007. } // Mark 16-5-07 void MainWindow::setNJ() { actionUseUPGMA->setChecked(false); actionUseNJ->setChecked(true); clustalw::userParameters->setClusterAlgorithm(clustalw::NJ); actionBootstrapNJTree->setEnabled(true); // Mark 1-9-2007. } // Mark 16-5-07 void MainWindow::setNoIteration() { clustalw::userParameters->setDoRemoveFirstIteration(clustalw::NONE); actionNoIteration->setChecked(true); actionTreeIteration->setChecked(false); actionAlignIteration->setChecked(false); } // Mark 16-5-07 void MainWindow::setTreeIteration() { // Andreas Wilm (UCD) 2008-03-10: // warn if "resetting gaps" is enabled if (clustalw::userParameters->ResetGapsIsEnabled()) { QMessageBox::warning(this, tr("Error"), tr("Error: \"Iteration\" in combination with \"Reset gaps\" is not possible.")); setNoIteration(); return; } clustalw::userParameters->setDoRemoveFirstIteration(clustalw::TREE); actionNoIteration->setChecked(false); actionTreeIteration->setChecked(true); actionAlignIteration->setChecked(false); } // Mark 16-5-07 void MainWindow::setAlignIteration() { // Andreas Wilm (UCD) 2008-03-10: // warn if "resetting gaps" is enabled if (clustalw::userParameters->ResetGapsIsEnabled()) { QMessageBox::warning(this, tr("Error"), tr("Error: \"Iteration\" in combination with \"Reset gaps\" is not possible.")); setNoIteration(); return; } clustalw::userParameters->setDoRemoveFirstIteration(clustalw::ALIGNMENT); actionNoIteration->setChecked(false); actionTreeIteration->setChecked(false); actionAlignIteration->setChecked(true); } void MainWindow::toplevelMenusEnabled(bool b) { list::iterator i; QMenu *menu; for (i=toplevelMenuList.begin(); i != toplevelMenuList.end(); ++i) { menu = *i; menu->setEnabled(b); } } clustalx-2.1/main.cpp0000644000175000017500000000653211470725216014454 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 03-02-07,Nigel Brown(EMBL): removed 'globalLastDir.h' dependency * and 'lastDir' global initialization. * * 24-05-07,Nigel Brown(EMBL): changed saving of executablePath to use * Resources class instead of MainWindow. * * 2007-12-03, Andreas Wilm (UCD): added support for infile flag * (commandline) * * 2007-12-06, Andreas Wilm (UCD): added full clustalw command line support * ****************************************************************************/ #include //#include #include "clustalW/substitutionMatrix/SubMatrix.h" #include "QTUtility.h" #include "clustalW/general/UserParameters.h" #include "mainwindow.h" #include #include #include "version.h" // - nige: Resources.h #include "Resources.h" #include "clustalW/interface/CommandLineParser.h" namespace clustalw { UserParameters* userParameters; Utility* utilityObject; SubMatrix *subMatrix; Stats* statsObject; } /** * The main function. It creates a new MainWindow widget and displays it. */ using namespace clustalw; int main(int argc, char *argv[]) { //QTranslator translator; //translator.load("clustalx_la"); // Note the path to the executable is usually passed in as argv[0] // This will be used if we cannot find the help or the xml files in current dir. Resources *resources = Resources::Instance(); resources->setPathToExecutable(QString(argv[0])); //resources->dump(); //debug: nige userParameters = new UserParameters(false); subMatrix = new SubMatrix(); // Mark April 11th 2007 userParameters->setInteractive(true); userParameters->setGui(true); // gui is not the same as interactive - text menus are interactive but not gui statsObject = new Stats();// actually only needed in clustalw QApplication app(argc, argv); //app.installTranslator(&translator); psPrintParams params; MainWindow window; utilityObject = new QTUtility(&window); window.show(); // parse command line if we got some non-Qt args // must be done after we've setup utilityObject if (argc > 1) { vector args; for (int i = 1; i < argc; ++i) { args.push_back(argv[i]); } userParameters->setMenuFlag(false); CommandLineParser cmdLineParser(&args, true); } // if profiles were given on the command line, we need to switch to two window mode if(clustalw::userParameters->getProfileNum() > 0) // profile mode { window.setProfileModeCmd(); window.loadProfile1(clustalw::userParameters->getProfile1Name()); window.loadProfile2(clustalw::userParameters->getProfile2Name()); } else // multiple alignment mode { // infile parsed? then load string seqname = userParameters->getSeqName(); if (seqname.length()) window.loadSequences(QString(seqname.c_str())); } // Andreas Wilm (UCD): // if help flag is given, show graphical clustax help on cmd line // flags, instead of trying to load clustalw help (as 1.83 did!) // if (userParameters->getHelpFlag() ||userParameters->getFullHelpFlag() ) window.commandLineHelp(); return app.exec(); delete userParameters; delete utilityObject; delete subMatrix; } clustalx-2.1/installer0000755000175000017500000001276711470725217014757 0ustar ddineenddineen#!/bin/sh ########################################################################### # Installer for clustal. # # Changes: # # 25-05-07,Nigel Brown (EMBL): created. # 08-06-07,Nigel Brown (EMBL): added default link location. ########################################################################### SAVEPATH=`echo $PATH | tr ':' ' '| sort -u` PATH=/bin:/usr/bin; export PATH INSTALLDIR=`pwd` BINDIR= SCRIPT=clustalx2 BINARY=clustalx PROG=`basename $0` umask 022 cleanup () { rm -f "$SCRIPT" >/dev/null 2>&1 echo; echo Aborted. 1>&2 exit 1 } trap cleanup 1 2 3 15 msg () { echo $* 1>&2; } warn () { echo " + $*" 1>&2; } error () { echo $PROG: ERROR: $* 1>&2; } die () { echo $PROG: FATAL: $* 1>&2; exit 1; } makeLink () { src="$1"; dest="$2"; file="$3" [ "$dest" = . ] && return [ "$src" = "$dest" ] && return if [ -e "$dest/$file" ]; then warn "Replacing existing file '$dest/$file'" rmFile "$dest/$file" fi symLink "$src/$file" "$dest/$file" } rmFile () { #warn "Deleting '$1'" rm -f "$1" >/dev/null 2>&1 || die "Attempt to delete old '$1' failed" warn "Deleted '$1'" } symLink () { #warn "Symbolic linking '$1' to '$2'" ln -s "$1" "$2" >/dev/null 2>&1 || die "Attempt to symlink '$1' to '$2' failed" warn "Symlinked '$1' to '$2'" } mkDir () { mkdir -p "$1" >/dev/null 2>&1 if [ $? -ne 0 ]; then [ $# -lt 2 ] && die "Can't make directory '$1'" warn "Can't make directory '$1'" fi warn "Created directory '$1'" } guessBinDir () { [ -n "$BINDIR" ] && return #already set/guessed if [ -e /usr/bin/id ]; then user=`/usr/bin/id -u` elif [ -e /bin/id ]; then user=`/bin/id -u` elif [ "$USER" = "root" ]; then user=0 fi if [ "$user" -eq 0 ]; then BINDIR=/usr/local/bin msg "You have administrator/root permissions - good!" else BINDIR=$HOME/bin msg "Note: you must have write permission for the chosen directory." fi #echo; msg "The suggested location will be: [$BINDIR]" unset user } pauseAndContinue () { echo echo -n "Hit the 'return' or 'enter' key to continue. " read x unset x } readBinDir () { dir= while true; do echo echo -n "Choose a directory for the executable [$BINDIR]? " read dir echo if [ -z "$dir" -a -n "$BINDIR" ]; then dir=$BINDIR break fi if [ "$dir" = "$SCRIPT" ]; then warn "That name is reserved - please use something else!" continue fi if [ ! -e "$dir" ]; then mkDir "$dir" nofail fi if [ -e "$dir" ]; then [ -w "$dir" ] && break warn "Directory '$dir' exists, but is not writeable" fi done BINDIR="$dir" unset dir } checkBinDir () { if [ -z "$BINDIR" ]; then die "Directory '$BINDIR' was not specified - aborting" fi if [ "$BINDIR" = "$SCRIPT" ]; then die "That name is reserved - please use something else!" fi if [ ! -d "$BINDIR" -a ! -h "$BINDIR" ]; then mkDir "$BINDIR" nofail fi if [ -d "$BINDIR" -o -h "$BINDIR" ]; then if [ -w "$BINDIR" ]; then return fi die "Directory '$BINDIR' exists, but is not writeable" fi die "Directory '$BINDIR' does not exist and could not be created - aborting" } makeScript () { cat<&2 #echo BINARY=\$BINARY 1>&2 # Is there an executable binary file? binary=\$CLU_INSTALL_DIR/\$BINARY if [ ! -f \$binary ]; then echo "\$PROG: Can't find executable '\$binary'" exit 1 fi if [ ! -x \$binary ]; then echo "\$PROG: Binary '\$binary' is not executable" exit 1 fi # Exec the child over this one exec \$binary \$* ########################################################################### EOT } ########################################################################### if [ ! -f "$BINARY" ]; then die "You must change into the unpacked clustal directory first!" fi rm -f "$SCRIPT" >/dev/null 2>&1 if [ $# -gt 0 ]; then BINDIR="$1" else cat< "$SCRIPT" chmod 0755 "$SCRIPT" makeLink "$INSTALLDIR" "$BINDIR" "$SCRIPT" cat< clustalx-2.1/colprint.xml0000644000175000017500000000237411470725216015400 0ustar ddineenddineen clustalx-2.1/coldna.xml0000644000175000017500000000151711470725216015004 0ustar ddineenddineen clustalx-2.1/clustalx.pro0000644000175000017500000002115211470725216015400 0ustar ddineenddineen###################################################################### # Automatically generated by qmake (2.01a) Wed Nov 17 10:01:48 2010 ###################################################################### TEMPLATE = app TARGET = clustalx CONFIG += release static QT += xml VERSION = 2.1 win32 { DEFINES += OS_WINDOWS } unix { DEFINES += OS_UNIX } macx { DEFINES += OS_MAC } DEPENDPATH += . \ clustalW \ clustalW/alignment \ clustalW/fileInput \ clustalW/general \ clustalW/interface \ clustalW/multipleAlign \ clustalW/pairwise \ clustalW/substitutionMatrix \ clustalW/tree \ clustalW/tree/UPGMA INCLUDEPATH += . \ clustalW/general \ clustalW/substitutionMatrix \ clustalW/alignment \ clustalW \ clustalW/fileInput \ clustalW/interface \ clustalW/pairwise \ clustalW/multipleAlign \ clustalW/tree \ clustalW/tree/UPGMA # Input HEADERS += AlignmentFormatOptions.h \ AlignmentParameters.h \ AlignmentViewerWidget.h \ AlignmentWidget.h \ AlignOutputFileNames.h \ BootstrapTreeDialog.h \ ClustalQtParams.h \ ColorFileXmlParser.h \ ColorParameters.h \ ColumnScoreParams.h \ FileDialog.h \ HardCodedColorScheme.h \ HelpDisplayWidget.h \ HistogramWidget.h \ KeyController.h \ LowScoringSegParams.h \ mainwindow.h \ PairwiseParams.h \ PostscriptFileParams.h \ ProteinGapParameters.h \ PSPrinter.h \ QTUtility.h \ Resources.h \ SaveSeqFile.h \ SearchForString.h \ SecStructOptions.h \ SeqNameWidget.h \ TreeFormatOptions.h \ TreeOutputFileNames.h \ version.h \ WritePostscriptFile.h \ clustalW/Clustal.h \ clustalW/clustalw_version.h \ clustalW/Help.h \ clustalW/alignment/Alignment.h \ clustalW/alignment/AlignmentOutput.h \ clustalW/alignment/ObjectiveScore.h \ clustalW/alignment/Sequence.h \ clustalW/fileInput/ClustalFileParser.h \ clustalW/fileInput/EMBLFileParser.h \ clustalW/fileInput/FileParser.h \ clustalW/fileInput/FileReader.h \ clustalW/fileInput/GDEFileParser.h \ clustalW/fileInput/InFileStream.h \ clustalW/fileInput/MSFFileParser.h \ clustalW/fileInput/PearsonFileParser.h \ clustalW/fileInput/PIRFileParser.h \ clustalW/fileInput/RSFFileParser.h \ clustalW/general/Array2D.h \ clustalW/general/clustalw.h \ clustalW/general/ClustalWResources.h \ clustalW/general/DebugLog.h \ clustalW/general/debuglogObject.h \ clustalW/general/general.h \ clustalW/general/OutputFile.h \ clustalW/general/param.h \ clustalW/general/RandomAccessLList.h \ clustalW/general/SequenceNotFoundException.h \ clustalW/general/SquareMat.h \ clustalW/general/Stats.h \ clustalW/general/statsObject.h \ clustalW/general/SymMatrix.h \ clustalW/general/UserParameters.h \ clustalW/general/userparams.h \ clustalW/general/Utility.h \ clustalW/general/utils.h \ clustalW/general/VectorOutOfRange.h \ clustalW/general/VectorUtility.h \ clustalW/interface/CommandLineParser.h \ clustalW/interface/InteractiveMenu.h \ clustalW/multipleAlign/Iteration.h \ clustalW/multipleAlign/LowScoreSegProfile.h \ clustalW/multipleAlign/MSA.h \ clustalW/multipleAlign/MyersMillerProfileAlign.h \ clustalW/multipleAlign/ProfileAlignAlgorithm.h \ clustalW/multipleAlign/ProfileBase.h \ clustalW/multipleAlign/ProfileStandard.h \ clustalW/multipleAlign/ProfileWithSub.h \ clustalW/pairwise/FastPairwiseAlign.h \ clustalW/pairwise/FullPairwiseAlign.h \ clustalW/pairwise/PairwiseAlignBase.h \ clustalW/substitutionMatrix/globalmatrix.h \ clustalW/substitutionMatrix/matrices.h \ clustalW/substitutionMatrix/SubMatrix.h \ clustalW/tree/AlignmentSteps.h \ clustalW/tree/ClusterTree.h \ clustalW/tree/ClusterTreeAlgorithm.h \ clustalW/tree/ClusterTreeOutput.h \ clustalW/tree/dayhoff.h \ clustalW/tree/NJTree.h \ clustalW/tree/RandomGenerator.h \ clustalW/tree/Tree.h \ clustalW/tree/TreeInterface.h \ clustalW/tree/UnRootedClusterTree.h \ clustalW/tree/UPGMA/Node.h \ clustalW/tree/UPGMA/RootedClusterTree.h \ clustalW/tree/UPGMA/RootedGuideTree.h \ clustalW/tree/UPGMA/RootedTreeOutput.h \ clustalW/tree/UPGMA/UPGMAAlgorithm.h \ clustalW/tree/UPGMA/upgmadata.h \ clustalW/general/InvalidCombination.cpp SOURCES += AlignmentFormatOptions.cpp \ AlignmentParameters.cpp \ AlignmentViewerWidget.cpp \ AlignmentWidget.cpp \ AlignOutputFileNames.cpp \ BootstrapTreeDialog.cpp \ ColorFileXmlParser.cpp \ ColorParameters.cpp \ ColumnScoreParams.cpp \ FileDialog.cpp \ HardCodedColorScheme.cpp \ HelpDisplayWidget.cpp \ HistogramWidget.cpp \ KeyController.cpp \ LowScoringSegParams.cpp \ main.cpp \ mainwindow.cpp \ PairwiseParams.cpp \ PostscriptFileParams.cpp \ ProteinGapParameters.cpp \ PSPrinter.cpp \ QTUtility.cpp \ Resources.cpp \ SaveSeqFile.cpp \ SearchForString.cpp \ SecStructOptions.cpp \ SeqNameWidget.cpp \ TreeFormatOptions.cpp \ TreeOutputFileNames.cpp \ WritePostscriptFile.cpp \ clustalW/Clustal.cpp \ clustalW/Help.cpp \ clustalW/alignment/Alignment.cpp \ clustalW/alignment/AlignmentOutput.cpp \ clustalW/alignment/ObjectiveScore.cpp \ clustalW/alignment/Sequence.cpp \ clustalW/fileInput/ClustalFileParser.cpp \ clustalW/fileInput/EMBLFileParser.cpp \ clustalW/fileInput/FileParser.cpp \ clustalW/fileInput/FileReader.cpp \ clustalW/fileInput/GDEFileParser.cpp \ clustalW/fileInput/InFileStream.cpp \ clustalW/fileInput/MSFFileParser.cpp \ clustalW/fileInput/PearsonFileParser.cpp \ clustalW/fileInput/PIRFileParser.cpp \ clustalW/fileInput/RSFFileParser.cpp \ clustalW/general/ClustalWResources.cpp \ clustalW/general/DebugLog.cpp \ clustalW/general/InvalidCombination.cpp \ clustalW/general/OutputFile.cpp \ clustalW/general/Stats.cpp \ clustalW/general/SymMatrix.cpp \ clustalW/general/UserParameters.cpp \ clustalW/general/Utility.cpp \ clustalW/general/VectorOutOfRange.cpp \ clustalW/interface/CommandLineParser.cpp \ clustalW/interface/InteractiveMenu.cpp \ clustalW/multipleAlign/Iteration.cpp \ clustalW/multipleAlign/LowScoreSegProfile.cpp \ clustalW/multipleAlign/MSA.cpp \ clustalW/multipleAlign/MyersMillerProfileAlign.cpp \ clustalW/multipleAlign/ProfileBase.cpp \ clustalW/multipleAlign/ProfileStandard.cpp \ clustalW/multipleAlign/ProfileWithSub.cpp \ clustalW/pairwise/FastPairwiseAlign.cpp \ clustalW/pairwise/FullPairwiseAlign.cpp \ clustalW/substitutionMatrix/SubMatrix.cpp \ clustalW/tree/AlignmentSteps.cpp \ clustalW/tree/ClusterTree.cpp \ clustalW/tree/ClusterTreeOutput.cpp \ clustalW/tree/NJTree.cpp \ clustalW/tree/RandomGenerator.cpp \ clustalW/tree/Tree.cpp \ clustalW/tree/TreeInterface.cpp \ clustalW/tree/UnRootedClusterTree.cpp \ clustalW/tree/UPGMA/Node.cpp \ clustalW/tree/UPGMA/RootedClusterTree.cpp \ clustalW/tree/UPGMA/RootedGuideTree.cpp \ clustalW/tree/UPGMA/RootedTreeOutput.cpp \ clustalW/tree/UPGMA/UPGMAAlgorithm.cpp clustalx-2.1/clustalx.hlp0000644000175000017500000020746411470725216015377 0ustar ddineenddineen This is the on-line help file for Clustal X (version 2.0 or greater). It should be named or defined as: clustalx.hlp Toby Gibson EMBL, Heidelberg, Germany. Des Higgins Conway Institute, UCD, Dublin, Ireland. Julie Thompson/Francois Jeanmougin IGBMC, Strasbourg, France. >>HELP G <<

General help for CLUSTAL X (2.0)

Clustal X is a windows interface for the ClustalW multiple sequence alignment program. It provides an integrated environment for performing multiple sequence and profile alignments and analysing the results. The sequence alignment is displayed in a window on the screen. A versatile coloring scheme has been incorporated allowing you to highlight conserved features in the alignment. The pull-down menus at the top of the window allow you to select all the options required for traditional multiple sequence and profile alignment.

You can cut-and-paste sequences to change the order of the alignment; you can select a subset of sequences to be aligned; you can select a sub-range of the alignment to be realigned and inserted back into the original alignment.

Alignment quality analysis can be performed and low-scoring segments or exceptional residues can be highlighted.

ClustalX is available on Linux, Mac and Windows.

SEQUENCE INPUT

Sequences and profiles (a term for pre-existing alignments) are input using the FILE menu. Invalid options will be disabled. All sequences must be included into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT, Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file. All non-alphabetic characters (spaces, digits, punctuation marks) are ignored except "-" which is used to indicate a GAP ("." in MSF/RSF).

SEQUENCE / PROFILE ALIGNMENTS

Clustal X has two modes which can be selected using the switch directly above the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.

To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT MODE is selected. A single sequence data area is then displayed. The ALIGNMENT menu then allows you to either produce a guide tree for the alignment, or to do a multiple alignment following the guide tree, or to do a full multiple alignment.

In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you to align 2 alignments (termed profiles). Profiles are also used to add a new sequence to an old alignment, or to use secondary structure to guide the alignment process. GAPS in the old alignments are indicated using the "-" character. PROFILES can be input in ANY of the allowed formats; just use "-" (or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button "Lock Scroll" is displayed which allows you to scroll the two profiles together using a single scroll bar. When the Lock Scroll is turned off, the two profiles can be scrolled independently.

PHYLOGENETIC TREES

Phylogenetic trees can be calculated from old alignments (read in with "-" characters to indicate gaps) OR after a multiple alignment while the alignment is still displayed.

ALIGNMENT DISPLAY

The alignment is displayed on the screen with the sequence names on the left hand side. The sequence alignment is for display only, it cannot be edited here (except for changing the sequence order by cutting-and-pasting on the sequence names).

A ruler is displayed below the sequences, starting at 1 for the first residue position (residue numbers in the sequence input file are ignored).

A line above the alignment is used to mark strongly conserved positions. Three characters ("*", ":" and ".") are used:

"*" indicates positions which have a single, fully conserved residue.

":" indicates that one of the following 'strong' groups is fully conserved:

    STA  
    NEQK  
    NHQK  
    NDEQ  
    QHRK  
    MILV  
    MILF  
    HY  
    FYW  

"." indicates that one of the following 'weaker' groups is fully conserved:

    CSA  
    ATV  
    SAG  
    STNK  
    STPA  
    SGND  
    SNDEQK  
    NDEQHK  
    NEQHRK  
    FVLIM  
    HFY

These are all the positively scoring groups that occur in the Gonnet Pam250 matrix. The strong and weak groups are defined as strong score > 0.5 and weak score =< 0.5 respectively.

For profile alignments, secondary structure and gap penalty masks are displayed above the sequences, if any data is found in the profile input file.

>>HELP F <<

Input / Output Files

LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any sequences that are already loaded. All sequences must be in 1 file. The formats that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file. All non-alphabetic characters (spaces, digits, punctuation marks) are ignored except "-" which is used to indicate a GAP ("." in MSF/RSF).

The program tries to automatically recognise the different file formats used and to guess whether the sequences are amino acid or nucleotide. This is not always foolproof.

FASTA and NBRF/PIR formats are recognised by having a ">" as the first character in the file.

EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the file (the token for the entry name field).

CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.

GCG/MSF format is recognised by one of the following:

  • the word PileUp at the start of the file.
  • the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT at the start of the file.
  • the word MSF on the first line of the file, and the characters .. at the end of this line.

GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of the file.

If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the sequence will be assumed to be nucleotide. This works in 97.3% of cases but watch out!

APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences do not replace those already loaded, but are appended at the end of the alignment.

SAVE SEQUENCES AS... offers the user a choice of one of six output formats: CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS, GDE or FASTA. All sequences are written to a single file. Options are available to save a range of the alignment, switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING for CLUSTAL files. Users can also choose to include the residue range numbers by appending them to the sequence names.

LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any sequences already loaded as Profile 1. This option will also remove any sequences which are loaded in Profile 2.

LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any sequences already loaded as Profile 2.

SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only those sequences in Profile 1 will be written to the output file.

SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only those sequences in Profile 2 will be written to the output file.

WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript format file. This will include any secondary structure / gap penalty mask information and the consensus and ruler lines which are displayed on the screen. The Alignment Quality curve can be optionally included in the output file.

WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT except that only the profile 1 display will be printed.

WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT except that only the profile 2 display will be printed.

POSTSCRIPT PARAMETERS

A number of options are available to allow you to configure your postscript output file.

PS COLORS FILE: The exact RGB values required to reproduce the colors used in the alignment window will vary from printer to printer. A PS colors file can be specified that contains the RGB values for all the colors required by each of your postscript printers.

By default, Clustal X looks for a file called "colprint.par" in the current directory (if your running under UNIX, it then looks in your home directory, and finally in the directories in your PATH environment variable). If no PS colors file is found or a color used on the screen is not defined here, the screen RGB values (from the Color Parameter File) are used.

The PS colors file consists of one line for each color to be defined, with the color name followed by the RGB values (on a scale of 0 to 1). For example,

    RED          0.9 0.1 0.1

Blank lines and comments (lines beginning with a "#" character) are ignored.

PAGE SIZE: The alignment can be displayed on either A4, A3 or US Letter size pages.

ORIENTATION: The alignment can be displayed on either a landscape or portrait page.

PRINT HEADER: An optional header including the postscript filename, and creation date can be printed at the top of each page.

PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath the alignment on the screen can be included in the postscript output.

PRINT RULER: The ruler which is displayed underneath the alignment on the screen can be included in the postscript output.

PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right hand side of the alignment.

RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size selected. This option can be turned off, in which case a font size of 10 will be used for the sequences.

PRINT FROM POSITION/TO: A range of the alignment can be printed. The default is to print the full alignment. The first and last residues to be printed are specified here.

USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The number of residues in a block is specified here. More than one block may then be printed on a single page. This is useful for long alignments of a small number of sequences. If the block length is set to 0, The alignment will not be divided into blocks, but printed across a number of pages.

>>HELP E <<

Editing Alignments

Clustal X allows you to change the order of the sequences in the alignment, by cutting-and-pasting the sequence names.

To select a group of sequences to be moved, click on a sequence name and drag the cursor until all the required sequences are highlighted. Holding down the Shift key when clicking on the first name will add new sequences to those already selected.

(Options are provided to Select All Sequences, Select Profile 1 or Select Profile 2.)

The selected sequences can be removed from the alignment by using the EDIT menu, CUT option.

To add the cut sequences back into an alignment, select a sequence by clicking on the sequence name. The cut sequences will be added to the alignment, immediately following the selected sequence, by the EDIT menu, PASTE option.

To add the cut sequences to an empty alignment (eg. when cutting sequences from Profile 1 and pasting them to Profile 2), click on the empty sequence name display area, and select the EDIT menu, PASTE option as before.

The sequence selection and sequence range selection can be cleared using the EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options respectively.

To search for a string of residues in the sequences, select the sequences to be searched by clicking on the sequence names. You can then enter the string to search for by selecting the SEARCH FOR STRING option. If the string is found in any of the sequences selected, the sequence name and column number is printed below the sequence display.

In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently displayed as Profile 2 will be appended to Profile 1.

The REMOVE ALL GAPS option will remove all gaps from the sequences currently selected. WARNING: This option removes ALL gaps, not only those introduced by ClustalX, but also those that were read from the input alignment file. Any secondary structure information associated with the alignment will NOT be automatically realigned.

The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which contain gaps in all sequences. This can occur as a result of removing divergent sequences from an alignment, or if an alignment has been realigned.

>>HELP M <<

Multiple Alignments

Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above the sequence display area. Then, use the ALIGNMENT menu to do multiple alignments.

Multiple alignments are carried out in 3 stages:

  1. all sequences are compared to each other (pairwise alignments);
  2. a dendrogram (like a phylogenetic tree) is constructed, describing the approximate groupings of the sequences by similarity (stored in a file).
  3. the final multiple alignment is carried out, using the dendrogram as a guide.

The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option. You can skip the first stages (pairwise alignments; guide tree) by using an old guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).

REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the alignment. Sequences can be selected by clicking on the sequence names - see Editing Alignments for more details. The unselected sequences are then 'fixed' and a profile is made including only the unselected sequences. Each of the selected sequences in turn is then realigned to this profile. The realigned sequences will be displayed as a group at the end the alignment.

REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the alignment. A residue range can be selected by clicking on the sequence display area. A multiple alignment is then performed, following the 3 stages described above, but only using the selected residue range. Finally the new alignment of the range is pasted back into the full sequence alignment.

By default, gap penalties are used at each end of the subrange in order to penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is switched off, gaps can be introduced at the ends of the residue range at no cost.

ALIGNMENT PARAMETERS displays a sub-menu with the following options:

RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the sequences during multiple alignment if you wish to change the parameters and try again. This only takes effect just before you do a second multiple alignment. You can make phylogenetic trees after alignment whether or not this is ON. If you turn this OFF, the new gaps are kept even if you do a second multiple alignment. This allows you to iterate the alignment gradually. Sometimes, the alignment is improved by a second or third pass.

RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including gaps which were read in from the sequence input file. This only takes effect just before you do a second multiple alignment. You can make phylogenetic trees after alignment whether or not this is ON. If you turn this OFF, all gaps are kept even if you do a second multiple alignment. This allows you to iterate the alignment gradually. Sometimes, the alignment is improved by a second or third pass.

PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial alignments.

MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple alignments.

PROTEIN GAP PARAMETERS displays a temporary window which allows you to set various parameters only used in the alignment of protein sequences.

(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only, allows you to set various parameters only used with gap penalty masks.)

SAVE LOG FILE will write the alignment calculation scores to a file. The log filename is the same as the input sequence filename, with an extension ".log" appended.

ITERATION: A remove first iteration scheme has been added. This can be used to improve the final alignment or improve the alignment at each stage of the progressive alignment. During the iteration step each sequence is removed in turn and realigned. If the resulting alignment is better than the previous alignment it is kept. This process is repeated until the score converges (the score is not improved) or until the maximum number of iterations is reached. The user can iterate at each step of the progressive alignment by setting the iteration parameter to 'Iterate each alignment step' or just on the final alignment by setting the iteration parameter to 'Iterate final alignment'. The default number of iterations is 3.

OUTPUT FORMAT OPTIONS

You can choose from 7 different alignment formats (CLUSTAL, GCG, NBRF/PIR, PHYLIP, GDE, NEXUS, FASTA). You can choose more than one (or all 7 if you wish).

CLUSTAL format output is a self explanatory alignment format. It shows the sequences aligned in blocks. It can be read in again at a later date to (for example) calculate a phylogenetic tree or add in new sequences by profile alignment.

GCG output can be used by any of the GCG programs that can work on multiple alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG .msf format files (multiple sequence file); new in version 7 of GCG.

NEXUS format is used by several phylogeny programs, including PAUP and MacClade.

PHYLIP format output can be used for input to the PHYLIP package of Joe Felsenstein. This is a very widely used package for doing every imaginable form of phylogenetic analysis (MUCH more than the the modest introduction offered by this program).

NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap characters "-" are used to indicate the positions of gaps in the multiple alignment. These files can be re-used as input in any part of clustal that allows sequences (or alignments or profiles) to be read in.

FASTA: this is included for compatibility with numberous sequence analysis programs.

GDE: this format is used by the GDE package of Steven Smith and is understood by SEQLAB in GCG 9 or later.

GDE OUTPUT CASE: sequences in GDE format may be written in either upper or lower case.

CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the alignment lines in clustalw format.

OUTPUT ORDER is used to control the order of the sequences in the output alignments. By default, it uses the order in which the sequences were aligned (from the guide tree/dendrogram), thus automatically grouping closely related sequences. It can be switched to be the same as the original input order.

PARAMETER OUTPUT: This option will save all your parameter settings in a parameter file (suffix ".par") during alignment. The file can be subsequently used to rerun ClustalW using the same parameters.

ALIGNMENT PARAMETERS

PAIRWISE ALIGNMENT PARAMETERS

A distance is calculated between every pair of sequences and these are used to construct the phylogenetic tree which guides the final multiple alignment. The scores are calculated from separate pairwise alignments. These can be calculated using 2 methods: dynamic programming (slow but accurate) or by the method of Wilbur and Lipman (extremely fast but approximate).

You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS option. The slow/accurate method is fast enough for short sequences but will be VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.

SLOW-ACCURATE alignment parameters:

These parameters do not have any affect on the speed of the alignments. They are used to give initial alignments which are then rescored to give percent identity scores. These % scores are the ones which are displayed on the screen. The scores are converted to distances for the trees.

Gap Open Penalty: the penalty for opening a gap in the alignment.

Gap Extension Penalty: the penalty for extending a gap by 1 residue.

Protein Weight Matrix: the scoring table which describes the similarity of each amino acid to each other.

Load protein matrix: allows you to read in a comparison table from a file.

DNA weight matrix: the scores assigned to matches and mismatches (including IUB ambiguity codes).

Load DNA matrix: allows you to read in a comparison table from a file.

See the Multiple alignment parameters, MATRIX option below for details of the matrix input format.

FAST-APPROXIMATE alignment parameters:

These similarity scores are calculated from fast, approximate, global align- ments, which are controlled by 4 parameters. 2 techniques are used to make these alignments very fast: 1) only exactly matching fragments (k-tuples) are considered; 2) only the 'best' diagonals (the ones with most k-tuple matches) are used.

GAP PENALTY: This is a penalty for each gap in the fast alignments. It has little effect on the speed or sensitivity except for extreme values.

K-TUPLE SIZE: This is the size of exactly matching fragment that is used. INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity. For longer sequences (e.g. >1000 residues) you may wish to increase the default.

TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary dot-matrix plot) is calculated. Only the best ones (with most matches) are used in the alignment. This parameter specifies how many. Decrease for speed; increase for sensitivity.

WINDOW SIZE: This is the number of diagonals around each of the 'best' diagonals that will be used. Decrease for speed; increase for sensitivity.

MULTIPLE ALIGNMENT PARAMETERS

These parameters control the final multiple alignment. This is the core of the program and the details are complicated. To fully understand the use of the parameters and the scoring system, you will have to refer to the documentation.

Each step in the final multiple alignment consists of aligning two alignments or sequences. This is done progressively, following the branching order in the GUIDE TREE. The basic parameters to control this are two gap penalties and the scores for various identical/non-indentical residues.

The GAP OPENING and EXTENSION PENALTIES can be set here. These control the cost of opening up every new gap and the cost of every item in a gap. Increasing the gap opening penalty will make gaps less frequent. Increasing the gap extension penalty will make gaps shorter. Terminal gaps are not penalised.

The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly related sequences until after the most closely related sequences have been aligned. The setting shows the percent identity level required to delay the addition of a sequence; sequences that are less identical than this level to any other sequences will be aligned later.

The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero means that the transitions are scored as mismatches, while a weight of 1 gives the transitions the match score. For distantly related DNA sequences, the weight should be near to zero; for closely related sequences it can be useful to assign a higher score. The default is set to 0.5.

The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight matrices. For protein alignments, you use a weight matrix to determine the similarity of non-identical amino acids. For example, Tyr aligned with Phe is usually judged to be 'better' than Tyr aligned with Pro.

There are three 'in-built' series of weight matrices offered. Each consists of several matrices which work differently at different evolutionary distances. To see the exact details, read the documentation. Crudely, we store several matrices in memory, spanning the full range of amino acid distance (from almost identical sequences to highly divergent ones). For very similar sequences, it is best to use a strict weight matrix which only gives a high score to identities and the most favoured conservative substitutions. For more divergent sequences, it is appropriate to use 'softer' matrices which give a high score to many other frequent substitutions.

  1. BLOSUM (Henikoff). These matrices appear to be the best available for carrying out data base similarity (homology searches). The matrices currently used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X versions.
  2. PAM (Dayhoff). These have been extremely widely used since the late '70s. We currently use the PAM 20, 60, 120, 350 matrices.
  3. GONNET. These matrices were derived using almost the same procedure as the Dayhoff one (above) but are much more up to date and are based on a far larger data set. They appear to be more sensitive than the Dayhoff series. We currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the default for Clustal X version 1.8.

We also supply an identity matrix which gives a score of 10 to two identical amino acids and a score of zero otherwise. This matrix is not very useful.

Load protein matrix: allows you to read in a comparison matrix from a file. This can be either a single matrix or a series of matrices (see below for format).

DNA WEIGHT MATRIX option allows you to select a single matrix (not a series) used for aligning nucleic acid sequences. Two hard-coded matrices are available:

  1. IUB. This is the default scoring matrix used by BESTFIT for the comparison of nucleic acid sequences. X's and N's are treated as matches to any IUB ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
  2. CLUSTALW(1.6). A previous system used by ClustalW, in which matches score 1.0 and mismatches score 0. All matches for IUB symbols also score 0.

Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a file (just one matrix, not a series).

SINGLE MATRIX INPUT FORMAT The format used for a single matrix is the same as the BLAST program. The scores in the new weight matrix should be similarities. You can use negative as well as positive values if you wish, although the matrix will be automatically adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected. Any lines beginning with a "#" character are assumed to be comments. The first non-comment line should contain a list of amino acids in any order, using the 1 letter code, followed by a "*" character. This should be followed by a square matrix of scores, with one row and one column for each amino acid. The last row and column of the matrix (corresponding to the "*" character) contain the minimum score over the whole matrix.

MATRIX SERIES INPUT FORMAT ClustalX uses different matrices depending on the mean percent identity of the sequences to be aligned. You can specify a series of matrices and the range of the percent identity for each matrix in a matrix series file. The file is automatically recognised by the word CLUSTAL_SERIES at the beginning of the file. Each matrix in the series is then specified on one line which should start with the word MATRIX. This is followed by the lower and upper limits of the sequence percent identities for which you want to apply the matrix. The final entry on the matrix line is the filename of a Blast format matrix file (see above for details of the single matrix file format).

Example.

    CLUSTAL_SERIES
     
    MATRIX 81 100 /us1/user/julie/matrices/blosum80
    MATRIX 61 80 /us1/user/julie/matrices/blosum62
    MATRIX 31 60 /us1/user/julie/matrices/blosum45
    MATRIX 0 30 /us1/user/julie/matrices/blosum30

PROTEIN GAP PARAMETERS

RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or increase the gap opening penalties at each position in the alignment or sequence. See the documentation for details. As an example, positions that are rich in glycine are more likely to have an adjacent gap than positions that are rich in valine.

HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a run (5 or more residues) of hydrophilic amino acids; these are likely to be loop or random coil regions where gaps are more common. The residues that are 'considered' to be hydrophilic can be entered in HYDROPHILIC RESIDUES.

GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close to each other. Gaps that are less than this distance apart are penalised more than other gaps. This does not prevent close gaps; it makes them less frequent, promoting a block-like appearance of the alignment.

END GAP SEPARATION treats end gaps just like internal gaps for the purposes of avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you turn this off, end gaps will be ignored for this purpose. This is useful when you wish to align fragments where the end gaps are not biologically meaningful.

>>HELP P <<

Profile and Structure Alignments

By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile alignments allow you to store alignments of your favourite sequences and add new sequences to them in small bunches at a time. A profile is simply an alignment of one or more sequences (e.g. an alignment output file from Clustal X). Each input can be a single sequence. One or both sets of input sequences may include secondary structure assignments or gap penalty masks to guide the alignment.

Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above the sequence display area. Then, use the ALIGNMENT menu to do profile and secondary structure alignments.

The profiles can be in any of the allowed input formats with "-" characters used to specify gaps (except for GCG/MSF where "." is used).

You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and LOAD PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2 profiles to each other. Secondary structure masks in either profile can be used to guide the alignment. This option compares all the sequences in profile 1 with all the sequences in profile 2 in order to build guide trees which will be used to calculate sequence weights, and select appropriate alignment parameters for the final profile alignment.

You can skip the first stage (pairwise alignments; guide trees) by using old guide tree files (ALIGN PROFILES FROM GUIDE TREES).

The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second profile and align them to the first profile, 1 at a time. This is useful to add some new sequences to an existing alignment, or to align a set of sequences to a known structure. In this case, the second profile set need not be pre-aligned.

You can skip the first stage (pairwise alignments; guide tree) by using an old guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE).

SAVE LOG FILE will write the alignment calculation scores to a file. The log filename is the same as the input sequence filename, with an extension ".log" appended.

The alignment parameters can be set using the ALIGNMENT PARAMETERS menu, Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options. These are EXACTLY the same parameters as used by the general, automatic multiple alignment procedure. The general multiple alignment procedure is simply a series of profile alignments. Carrying out a series of profile alignments on larger and larger groups of sequences, allows you to manually build up a complete alignment, if necessary editing intermediate alignments.

SECONDARY STRUCTURE PARAMETERS

Use this menu to set secondary structure options. If a solved structure is known, it can be used to guide the alignment by raising gap penalties within secondary structure elements, so that gaps will preferentially be inserted into unstructured surface loop regions. Alternatively, a user-specified gap penalty mask can be supplied for a similar purpose.

A gap penalty mask is a series of numbers between 1 and 9, one per position in the alignment. Each number specifies how much the gap opening penalty is to be raised at that position (raised by multiplying the basic gap opening penalty by the number) i.e. a mask figure of 1 at a position means no change in gap opening penalty; a figure of 4 means that the gap opening penalty is four times greater at that position, making gaps 4 times harder to open.

The format for gap penalty masks and secondary structure masks is explained in a separate help section.

>>HELP B <<

Secondary Structure / Gap Penalty Masks

The use of secondary structure-based penalties has been shown to improve the accuracy of sequence alignment. Clustal X now allows secondary structure/gap penalty masks to be supplied with the input sequences used during profile alignment. (NB. The secondary structure information is NOT used during multiple sequence alignment). The masks work by raising gap penalties in specified regions (typically secondary structure elements) so that gaps are preferentially opened in the less well conserved regions (typically surface loops).

The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control whether the input 2D-structure information or gap penalty masks will be used during the profile alignment.

The OUTPUT options control whether the secondary structure and gap penalty masks should be included in the Clustal X output alignments. Showing both is useful for understanding how the masks work. The 2D-structure information is itself useful in judging the alignment quality and in seeing how residue conservation patterns vary with secondary structure.

The HELIX and STRAND GAP PENALTY options provide the value for raising the gap penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL format, capital residues denote the A and B core structure notation. Basic gap penalties are multiplied by the amount specified.

The LOOP GAP PENALTY option provides the value for the gap penalty in Loops. By default this penalty is not raised. In CLUSTAL format, loops are specified by "." in the secondary structure notation.

The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap penalty at the ends of secondary structures. Ends of secondary structures are known to grow or shrink, comparing related structures. Therefore by default these are given intermediate values, lower than the core penalties. All secondary structure read in as lower case in CLUSTAL format gets the reduced terminal penalty.

The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure termini for the intermediate penalties. In the alignment output, these are indicated as lower case. For Alpha Helices, by default, the range spans the end-helical turn (3 residues). For Beta Strands, the default range spans the end residue and the adjacent loop residue, since sequence conservation often extends beyond the actual H-bonded Beta Strand.

Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input files. For many 3-D protein structures, secondary structure information is recorded in the feature tables of SWISS-PROT database entries. You should always check that the assignments are correct - some are quite inaccurate. Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.

    FT   HELIX       100    115
    FT   STRAND      118    119

The structure and penalty masks can also be read from CLUSTAL alignment format as comment lines beginning "!SS_" or "!GM_" e.g.

    !SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
    !GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
    HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK

Note that the mask itself is a set of numbers between 1 and 9 each of which is assigned to the residue(s) in the same column below.

In GDE flat file format, the masks are specified as text and the names must begin with "SS_" or "GM_".

Either a structure or penalty mask or both may be used. If both are included in an alignment, the user will be asked which is to be used.

>>HELP T <<

Phylogenetic Trees

Before calculating a tree, you must have an ALIGNMENT in memory. This can be input using the FILE menu, LOAD SEQUENCES option or you should have just carried out a full multiple alignment and the alignment is still in memory. Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!

The UPGMA algorithm has been added to allow faster tree construction. The user now has the choice of using Neighbour Joining or UPGMA. The default is still NJ, but the user can change this by setting the clustering parameter.

To calculate a tree, use the DRAW TREE option. This gives an UNROOTED tree and all branch lengths when using the NJ method. The root of the tree can only be inferred by using an outgroup (a sequence that you are certain branches at the outside of the tree... certain on biological grounds) OR if you assume a degree of constancy in the 'molecular clock', you can place the root in the 'middle' of the tree (roughly equidistant from all tips). The UPGMA algorithm generates a rooted tree.

BOOTSTRAP N-J TREE uses a method for deriving confidence values for the groupings in a tree (first adapted for trees by Joe Felsenstein). It involves making N random samples of sites from the alignment (N should be LARGE, e.g. 500 - 1000); drawing N trees (1 from each sample) and counting how many times each grouping from the original tree occurs in the sample trees. You can set N using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In practice, you should use a large number of bootstrap replicates (1000 is recommended, even if it means running the program for an hour on a slow computer). You can also supply a seed number for the random number generator here. Different runs with the same seed will give the same answer. See the documentation for more details.

EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where ANY of the sequences have a gap will be ignored. This means that 'like' will be compared to 'like' in all distances, which is highly desirable. It also automatically throws away the most ambiguous parts of the alignment, which are concentrated around gaps (usually). The disadvantage is that you may throw away much of the data if there are many gaps (which is why it is difficult for us to make it the default).

CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option makes no difference. For greater divergence, this option corrects for the fact that observed distances underestimate actual evolutionary distances. This is because, as sequences diverge, more than one substitution will happen at many sites. However, you only see one difference when you look at the present day sequences. Therefore, this option has the effect of stretching branch lengths in trees (especially long branches). The corrections used here (for DNA or proteins) are both due to Motoo Kimura. See the documentation for details.

Where possible, this option should be used. However, for VERY divergent sequences, the distances cannot be reliably corrected. You will be warned if this happens. Even if none of the distances in a data set exceed the reliable threshold, if you bootstrap the data, some of the bootstrap distances may randomly exceed the safe limit.

SAVE LOG FILE will write the tree calculation scores to a file. The log filename is the same as the input sequence filename, with an extension ".log" appended.

OUTPUT FORMAT OPTIONS

Four different formats are allowed. None of these displays the tree visually. You can display the tree using the NJPLOT program distributed with Clustal X OR get the PHYLIP package and use the tree drawing facilities there.

  1. CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances between the sequences and the number of alignment positions used for each. The tree is described at the end of the file. It lists the sequences that are joined at each alignment step and the branch lengths. After two sequences are joined, it is referred to later as a NODE. The number of a NODE is the number of the lowest sequence in that NODE.
  2. PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many phylogenetic analysis packages. It consists of a series of nested parentheses, describing the branching order, with the sequence names and branch lengths. It can be read by the NJPLOT program distributed with ClustalX. It can also be used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see the trees graphically. This is the same format used during multiple alignment for the guide trees. Some other packages that can read and display New Hampshire format are TreeTool, TreeView, and Phylowin.
  3. PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the pairwise distances in a format that can be used by the PHYLIP package. It used to be useful when one could not produce distances from protein sequences in the Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
  4. NEXUS FORMAT TREE. This format is used by several popular phylogeny programs, including PAUP and MacClade. The format is described fully in: Maddison, D. R., D. L. Swofford and W. P. Maddison. 1997. NEXUS: an extensible file format for systematic information. Systematic Biology 46:590-621.

BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on the tree branches of the phylip format output tree. The toggle allows them to be placed on the nodes, which is incorrect, but some display packages (e.g. TreeTool, TreeView and Phylowin) only support node labelling but not branch labelling. Care should be taken to note which branches and labels go together.

>>HELP C <<

Colors

Clustal X provides a versatile coloring scheme for the sequence alignment display. The sequences (or profiles) are colored automatically, when they are loaded. Sequences can be colored either by assigning a color to specific residues, or on the basis of an alignment consensus. In the latter case, the alignment consensus is calculated automatically, and the residues in each column are colored according to the consensus character assigned to that column. In this way, you can choose to highlight, for example, conserved hydrophylic or hydrophobic positions in the alignment.

The 'rules' used to color the alignment are specified in a COLOR PARAMETER FILE. Clustal X automatically looks for a file called "colprot.xml" for protein sequences or "coldna.xml" for DNA, in the installation directory and then. (Under UNIX, it then looks in your home directory). Under Mac OS X you can locate these files by clicking on the ClustalX application while holding down the Ctrl key and selecting "Show package contents". The files are located in the "Contents/MacOS" folder.

By default, if no color parameter file is found, protein sequences are colored by residue as follows:

    Color                   Residue Code

    ORANGE                  GPST
    RED                     HKR
    BLUE                    FWY
    GREEN                   ILMV
In the case of DNA sequences, the default colors are as follows:
    Color                   Residue Code

    ORANGE                  A
    RED                     C
    BLUE                    T
    GREEN                   G

The default BACKGROUND COLORING option shows the sequence residues using a black character on a colored background. It can be switched off to show residues as a colored character on a white background.

Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color option looks first for the color parameter file (as described above) and, if no file is found, uses the default residue-specific colors.

You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE option. The format of the color parameter file is described below.

COLOR PARAMETER FILE

This file is an xml file divided into 3 sections:

  1. the names and RGB values of the colors (rgbindex)
  2. the rules for calculating the consensus (consensus)
  3. the rules for assigning colors to the residues (colorrules)

An example file is given here.

<?xml version="1.0" encoding="UTF-8"?>
<colorparam>
   <rgbindex>
      <color name = "RED" red = "229" green = "51" blue = "25"></color>
      ...
      <color name = "ORANGE" red = "229" green = "153" blue = "76"></color>
   </rgbindex>
   <consensus>
      <condition name = "%" cutoffpercent = "60" residues = "wlvimafcyhp"></condition>
      ...
       <condition name = "Y" cutoffpercent = "85" residues = "y"></condition>
   </consensus>
   <colorrules>
      <resrule residue = "g" colorname = "ORANGE" conditions = ""></resrule>
      ...
      <resrule residue = "r" colorname = "RED" conditions = "+KRQ"></resrule>
    </colorrules>
</colorparam>

The RGB index section is optional (identified by <rgbindex>). If this section exists, each color used in the file must be named and the RGB values specified (on a scale from 0 to 255). If the RGB index section is not found, the following set of hard-coded colors will be used.

  RED     229  51  25
  BLUE     25 127 229
  GREEN    25 204  25
  CYAN     25 178 178
  PINK    229 127 127
  MAGENTA 204  76 204
  YELLOW  204 204   0
  ORANGE  229 153  76

The consensus section is optional and is identified by the header <consensus>. It defines how the consensus is calculated.

The format of each consensus parameter is:

   <condition name = "C" cutoffpercent = "N" residues = "RESIDUE_LIST"></condition>
   where
       C             is a character used to identify the parameter.
       N             is an integer value used as the percentage cutoff point.
       RESIDUE_LIST  is a list of residues.

For example: <condition name = "#" cutoffpercent = "80" residues = "wlvimafcyhp"></condition>

will assign a consensus character "#" to any column in the alignment which contains more than 80% of the residues w,l,v,i,m,a,f,c,y,h and p.

The third section is identified by the header <colorrules>, and defines how colors are assigned to each residue in the alignment.

The color rules section has the following format:

   <resrule residue = "R" colorname = "COLOR" conditions = "RESIDUE_LIST"></resrule>
   where
      R             is a character used to denote a residue.
      COLOR         is one of the above defined colors.
      RESIDUE_LIST  is a list of residues

Examples:

<resrule residue = "g" colorname = "ORANGE" conditions = ""></resrule>

will color all glycines ORANGE, regardless of the consensus.

<resrule residue = "k" colorname = "RED" conditions = "+KRQ"></resrule>

will color BLUE any Lysine which is found in a column with a consensus of -, R or Q.

>>HELP Q <<

Alignment Quality Analysis

QUALITY SCORES

Clustal X provides an indication of the quality of an alignment by plotting a 'conservation score' for each column of the alignment. A high score indicates a well-conserved column; a low score indicates low conservation. The quality curve is drawn below the alignment.

Two methods are also provided to indicate single residues or sequence segments which score badly in the alignment.

Low-scoring residues are expected to occur at a moderate frequency in all the sequences because of their steady divergence due to the natural processes of evolution. The most divergent sequences are likely to have the most outliers. However, the highlighted residues are especially useful in pointing to sequence misalignments. Note that clustering of highlighted residues is a strong indication of misalignment. This can arise due to various reasons, for example:

  1. Partial or total misalignments caused by a failure in the alignment algorithm. Usually only in difficult alignment cases.
  2. Partial or total misalignments because at least one of the sequences in the given set is partly or completely unrelated to the other sequences. It is up to the user to check that the set of sequences are alignable.
  3. Frameshift translation errors in a protein sequence causing local mismatched regions to be heavily highlighted. These are surprisingly common in database entries. If suspected, a 3-frame translation of the source DNA needs to be examined.

Occasionally, highlighted residues may point to regions of some biological significance. This might happen for example if a protein alignment contains a sequence which has acquired new functions relative to the main sequence set. It is important to exclude other explanations, such as error or the natural divergence of sequences, before invoking a biological explanation.

LOW-SCORING SEGMENTS

Unreliable regions in the alignment can be highlighted using the Low-Scoring Segments option. A sequence-weighted profile is used to indicate any segments in the sequences which score badly. Because the profile calculation may take some time, an option is provided to calculate LOW-SCORING SEGMENTS. The segment display can then be toggled on or off without having to repeat the time-consuming calculations.

For details of the low-scoring segment calculation, see the CALCULATION section below.

LOW-SCORING SEGMENT PARAMETERS

MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be hidden by increasing the minimum length of segments which will be displayed.

DNA MARKING SCALE is used to remove less significant segments from the highlighted display. Increase the scale to display more segments; decrease the scale to remove the least significant.

PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each amino acid to each other. The matrix is used to calculate the sequence- weighted profile scores. There are four 'in-built' Log-Odds matrices offered: the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only gives a high score to identities and the most favoured conservative substitutions, may be more suitable when the sequences are closely related. For more divergent sequences, it is appropriate to use 'softer' matrices which give a high score to many other frequent substitutions. This option automatically recalculates the low-scoring segments.

DNA WEIGHT MATRIX: Two hard-coded matrices are available:

  1. IUB. This is the default scoring matrix used by BESTFIT for the comparison of nucleic acid sequences. X's and N's are treated as matches to any IUB ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score 0.9.
  2. CLUSTALW(1.6). The previous system used by ClustalW, in which matches score 1.0 and mismatches score 0. All matches for IUB symbols also score 0.
  3. A new matrix can be read from a file on disk, if the filename consists only of lower case characters. The values in the new weight matrix should be similarities and should be NEGATIVE for infrequent substitutions.

INPUT FORMAT. The format used for a new matrix is the same as the BLAST program. Any lines beginning with a "#" character are assumed to be comments. The first non-comment line should contain a list of amino acids in any order, using the 1 letter code, followed by a "*" character. This should be followed by a square matrix of scores, with one row and one column for each amino acid. The last row and column of the matrix (corresponding to the "*" character) contain the minimum score over the whole matrix.

QUALITY SCORE PARAMETERS

You can customise the column 'quality scores' plotted underneath the alignment display using the following options.

SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to change the scale of the quality score plot.

RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be used to change the number of residue exceptions which are highlighted in the alignment display. (For an explanation of this cutoff, see the CALCULATION OF RESIDUE EXCEPTIONS section below.)

PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each amino acid to each other.

DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).

For more information about the weight matrices, see the help above for the Low-scoring Segments Weight Matrix.

For details of the quality score calculations, see the CALCULATION section below.

SHOW LOW-SCORING SEGMENTS

The low-scoring segment display can be toggled on or off. This option does not recalculate the profile scores.

SHOW EXCEPTIONAL RESIDUES

This option highlights individual residues which score badly in the alignment quality calculations. Residues which score exceptionally low are highlighted by using a white character on a grey background.

SAVE QUALITY SCORES TO FILE

The quality scores that are plotted underneath the alignment display can also be saved in a text file. Each column in the alignment is written on one line in the output file, with the value of the quality score at the end of the line. Only the sequences currently selected in the display are written to the file. One use for quality scores is to color residues in a protein structure by sequence conservation. In this way conserved surface residues can be highlighted to locate functional regions such as ligand-binding sites.

CALCULATION OF QUALITY SCORES

Suppose we have an alignment of m sequences of length n. Then, the alignment can be written as:

    A11 A12 A13 .......... A1n
    A21 A22 A23 .......... A2n
    .
    .
    Am1 Am2 Am3 .......... Amn

We also have a residue comparison matrix of size R where C(i,j) is the score for aligning residue i with residue j.

We want to calculate a score for the conservation of the jth position in the alignment.

To do this, we define an R-dimensional sequence space. For the jth position in the alignment, each sequence consists of a single residue which is assigned a point S in the space. S has R dimensions, and for sequence i, the rth dimension is defined as:

    Sr =    C(r,Aij)

We then calculate a consensus value for the jth position in the alignment. This value X also has R dimensions, and the rth dimension is defined as:

    Xr = (   SUM   (Fij * C(i,r)) ) / m
           1<=i<=R

where Fij is the count of residues i at position j in the alignment.

Now we can calculate the distance Di between each sequence i and the consensus position X in the R-dimensional space.

    Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
                  1<=i<=R

The quality score for the jth position in the alignment is defined as the mean of the sequence distances Di.

The score is normalised by multiplying by the percentage of sequences which have residues (and not gaps) at this position.

CALCULATION OF RESIDUE EXCEPTIONS

The jth residue of the ith sequence is considered as an exception if the distance Di of the sequence from the consensus value P is greater than (Upper Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff value will only display very significant exceptions; a low value will allow more, less significant, exceptions to be highlighted.

(NB. Sequences which contain gaps at this position are not included in the exception calculation.)

CALCULATION OF LOW-SCORING SEGMENTS

Suppose we have an alignment of m sequences of length n. Then, the alignment can be written as:

    A11 A12 A13 .......... A1n
    A21 A22 A23 .......... A2n
    .
    .
    Am1 Am2 Am3 .......... Amn

We also have a residue comparison matrix of size R where C(i,j) is the score for aligning residue i with residue j.

We calculate sequence weights by building a neighbour-joining tree, in which branch lengths are proportional to divergence. Summing the branches by branch ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and Henikoff et al.,JMB, 243, 574 1994).

To find the low-scoring segments in a sequence Si, we build a weighted profile of the remaining sequences in the alignment. Suppose we find residue r at position j in the sequence; then the score for the jth position in the sequence is defined as

    Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
                                   for residue r at position j in the
                                   alignment.

These residue scores are summed along the sequence in both forward and backward directions. If the sum of the scores is positive, then it is reset to zero. Segments which score negatively in both directions are considered as 'low-scoring' and will be highlighted in the alignment display.

>>HELP 9 <<

Command Line Parameters

DATA (sequences)

    -INFILE=file.ext                             :input sequences
    -PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (aligned sequences)

VERBS (do things)

    -OPTIONS          :list the command line parameters
    -HELP  or -CHECK  :outline the command line parameters
     (or  -FULLHELP)
    -ALIGN            :do full multiple alignment
    -TREE             :calculate NJ tree
    -BOOTSTRAP(=n)    :bootstrap a NJ tree (n= number of bootstraps; def. = 1000)
    -CONVERT          :output the input sequences in a different file format

PARAMETERS (set things)

General settings:
    -INTERACTIVE   :read command line, then enter normal interactive menus
    -QUICKTREE     :use FAST algorithm for the alignment guide tree
    -TYPE=         :PROTEIN or DNA sequences
    -NEGATIVE      :protein alignment with negative values in matrix
    -OUTFILE=      :sequence alignment file name
    -OUTPUT=       :CLUSTAL, GCG, GDE, PHYLIP, PIR, NEXUS, FASTA
    -OUTORDER=     :INPUT or ALIGNED
    -CASE=         :LOWER or UPPER (for GDE output only)
    -SEQNOS=       :OFF or ON (for Clustal output only)
Fast Pairwise Alignments:
    -KTUPLE=n      :word size
    -TOPDIAGS=n    :number of best diags.
    -WINDOW=n      :window around best diags.
    -PAIRGAP=n     :gap penalty
    -SCORE=        :PERCENT or ABSOLUTE
Slow Pairwise Alignments:
    -PWMATRIX=     :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
    -PWDNAMATRIX=  :DNA weight matrix=IUB, CLUSTALW or filename
    -PWGAPOPEN=f   :gap opening penalty
    -PWGAPEXT=f    :gap opening penalty
Multiple Alignments:
    -NEWTREE=      :file for new guide tree
    -USETREE=      :file for old guide tree
    -MATRIX=       :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
    -DNAMATRIX=    :DNA weight matrix=IUB, CLUSTALW or filename
    -GAPOPEN=f     :gap opening penalty
    -GAPEXT=f      :gap extension penalty
    -ENDGAPS       :no end gap separation pen.
    -GAPDIST=n     :gap separation pen. range
    -NOPGAP        :residue-specific gaps off
    -NOHGAP        :hydrophilic gaps off
    -HGAPRESIDUES= :list hydrophilic res.
    -MAXDIV=n      :% ident. for delay
    -TYPE=         :PROTEIN or DNA
    -TRANSWEIGHT=f :transitions weighting
    -ITERATION=    :NONE or TREE or ALIGNMENT
    -NUMITER=n     :maximum number of iterations to perform
Profile Alignments:
    -PROFILE       :Merge two alignments by profile alignment
    -NEWTREE1=     :file for new guide tree for profile1
    -NEWTREE2=     :file for new guide tree for profile2
    -USETREE1=     :file for old guide tree for profile1
    -USETREE2=     :file for old guide tree for profile2
Sequence to Profile Alignments:
    -SEQUENCES     :Sequentially add profile2 sequences to profile1 alignment
    -NEWTREE=      :file for new guide tree
    -USETREE=      :file for old guide tree
Structure Alignments:
    -NOSECSTR1     :do not use secondary structure/gap penalty mask for profile 1 
    -NOSECSTR2     :do not use secondary structure/gap penalty mask for profile 2
    -SECSTROUT=STRUCTURE or MASK or BOTH or NONE  :output in alignment file
    -HELIXGAP=n    :gap penalty for helix core residues 
    -STRANDGAP=n   :gap penalty for strand core residues
    -LOOPGAP=n     :gap penalty for loop regions
    -TERMINALGAP=n :gap penalty for structure termini
    -HELIXENDIN=n  :number of residues inside helix to be treated as terminal
    -HELIXENDOUT=n :number of residues outside helix to be treated as terminal
    -STRANDENDIN=n :number of residues inside strand to be treated as terminal
    -STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
Trees:
    -OUTPUTTREE=nj OR phylip OR dist OR nexus
    -SEED=n        :seed number for bootstraps
    -KIMURA        :use Kimura's correction
    -TOSSGAPS      :ignore positions with gaps
    -BOOTLABELS=node OR branch :position of bootstrap values in tree display
    -CLUSTERING=   :NJ or UPGMA
>>HELP R <<

References

Version 2 of ClustalW and ClustalX is described in:

Larkin,M.A., Blackshields, G., Brown, N.P., Chenna, R., McGettigan, P.A., McWilliam, H., Valentin, F., Wallace, I.M., Wilm, A., Lopez, R., Thompson, J.D., Gibson, T.J., Higgins, D.G. (2007) Clustal W and Clustal X version 2.0. Bioinformatics, 23:2947-2948.

The ClustalX program is described in:

Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997) The ClustalX windows interface: flexible strategies for multiple sequence alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.

The ClustalW program is described in:

Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, positions-specific gap penalties and weight matrix choice. Nucleic Acids Research, 22:4673-4680.

The ClustalV program is described in:

Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for multiple sequence alignment. CABIOS 8,189-191.

The original Clustal program is described in the manuscripts:

Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence alignments on a microcomputer. CABIOS 5,151-153.

Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple sequence alignment on a microcomputer. Gene 73,237-244.

Some tips on using Clustal X:

Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998) Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.

Some tips on using Clustal W:

Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for multiple sequence alignments. Methods Enzymol., 266, 383-402.

You can get the latest version of the ClustalX program by anonymous ftp to:

    ftp://ftp.ebi.ac.uk/pub/software/clustalw2

Or, have a look at the following WWW sites:

    http://www.clustal.org
    http://www.ebi.ac.uk/Tools/clustalw2/
clustalx-2.1/clustalW/tree/dayhoff.h0000644000175000017500000000545611470725216017356 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef DAYHOFFPAMS2_H #define DAYHOFFPAMS2_H /* Mark tidy up Nov 2005 */ /* DAYHOFF.H Table of estimated PAMS (actual no. of substitutions per 100 residues) for a range of observed amino acid distances from 75.0% (the first entry in the array), in 0.1% increments, up to 93.0%. These values are used to correct for multiple hits in protein alignments. The values below are for observed distances above 74.9%. For values above 93%, an arbitrary value of 1000 PAMS (1000% substitution) is used. These values are derived from a Dayhoff model (1978) of amino acid substitution and assume average amino acid composition and that amino acids replace each other at the same rate as in the original Dayhoff model. Up to 75% observed distance, use Kimura's emprical formula to derive the correction. For 75% or greater, use this table. Kimura's formula is accurate up to about 75% and fails completely above 85%. */ namespace clustalw { int dayhoff_pams[]={ 195, /* 75.0% observed d; 195 PAMs estimated = 195% estimated d */ 196, /* 75.1% observed d; 196 PAMs estimated */ 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 226, 227, 228, 229, 230, 231, 232, 233, 234, 236, 237, 238, 239, 240, 241, 243, 244, 245, 246, 248, 249, 250, /* 250 PAMs = 80.3% observed d */ 252, 253, 254, 255, 257, 258, 260, 261, 262, 264, 265, 267, 268, 270, 271, 273, 274, 276, 277, 279, 281, 282, 284, 285, 287, 289, 291, 292, 294, 296, 298, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323, 325, 328, 330, 332, 335, 337, 339, 342, 344, 347, 349, 352, 354, 357, 360, 362, 365, 368, 371, 374, 377, 380, 383, 386, 389, 393, 396, 399, 403, 407, 410, 414, 418, 422, 426, 430, 434, 438, 442, 447, 451, 456, 461, 466, 471, 476, 482, 487, 493, 498, 504, 511, 517, 524, 531, 538, 545, 553, 560, 569, 577, 586, 595, 605, 615, 626, 637, 649, 661, 675, 688, 703, 719, 736, 754, 775, 796, 819, 845, 874, 907, 945, /* 92.9% observed; 945 PAMs */ 988 /* 93.0% observed; 988 PAMs */ }; } #endif clustalx-2.1/clustalW/tree/UnRootedClusterTree.h0000644000175000017500000000127711470725216021654 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef UNROOTEDCLUSTERTREE_H #define UNROOTEDCLUSTERTREE_H #include "ClusterTree.h" namespace clustalw { class UnRootedClusterTree : private ClusterTree { public: UnRootedClusterTree(); void treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr); void treeFromDistMatrix(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string& phylipName); void bootstrapTree(TreeNames* treeNames, Alignment *alignPtr); private: PhyloTree* phyloTree; ClusterTreeOutput* outputTree; }; } #endif clustalx-2.1/clustalW/tree/UnRootedClusterTree.cpp0000644000175000017500000003700411470725216022204 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "UnRootedClusterTree.h" #include "../general/utils.h" #include "RandomGenerator.h" #include #include #include "../general/OutputFile.h" namespace clustalw { UnRootedClusterTree::UnRootedClusterTree() { } /* * The function treeFromAlignment is called to generate a tree from an alignment * without a distance matrix. It calculates the distance matrix from the alignment * quickly. */ // Note this function was called phylogenetic_tree before void UnRootedClusterTree::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr) { try { OutputFile phylipPhyTreeFile; OutputFile clustalPhyTreeFile; OutputFile distancesPhyTreeFile; OutputFile nexusPhyTreeFile; OutputFile pimFile; string path; int i, j; int overspill = 0; int totalDists; numSeqs = alignPtr->getNumSeqs(); // NOTE class variable /** * Check if numSeqs is ok */ if(!checkIfConditionsMet(numSeqs, 2)) { return; } firstSeq = 1; lastSeq = numSeqs; phyloTree = new PhyloTree; // The SeqInfo struct is passed to reduce the number of parameters passed! SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = numSeqs; outputTree = new ClusterTreeOutput(&info, 0); // No bootstrap! phyloTree->treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); TreeGroups saveTree(numSeqs + 1, vector(numSeqs + 1)); // NOTE at the moment there is only one type of clustering algorithm, but there will // be more! clusAlgorithm = new NJTree(); utilityObject->getPath(userParameters->getSeqName(), &path); /** * Open the required output files. */ if(!openFilesForTreeFromAlignment(&clustalPhyTreeFile, &phylipPhyTreeFile, &distancesPhyTreeFile, &nexusPhyTreeFile, &pimFile, treeNames, &path)) { return; // Problem opeing one of the files, cannot continue! } int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); bootPositions.resize(_lenFirstSeq + 2); for (j = 1; j <= _lenFirstSeq; ++j) { bootPositions[j] = j; } /** * Calculate quickDist and overspill */ overspill = calcQuickDistMatForAll(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), pimFile.getPtrToFile(), distancesPhyTreeFile.getPtrToFile(), alignPtr); // check if any distances overflowed the distance corrections if (overspill > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; overspillMessage(overspill, totalDists); } if (userParameters->getOutputTreeClustal()) { verbose = true; } // Turn on file output if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { clusAlgorithm->setVerbose(true); clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info, clustalPhyTreeFile.getPtrToFile()); clusAlgorithm->setVerbose(false); } for (i = 1; i < numSeqs + 1; i++) for (j = 1; j < numSeqs + 1; j++) { saveTree[i][j] = phyloTree->treeDesc[i][j]; } if (userParameters->getOutputTreePhylip()) { outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); } for (i = 1; i < numSeqs + 1; i++) for (j = 1; j < numSeqs + 1; j++) { phyloTree->treeDesc[i][j] = saveTree[i][j]; } if (userParameters->getOutputTreeNexus()) { outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); } /** Free up resources!!!!! */ treeGaps.clear(); bootPositions.clear(); delete clusAlgorithm; delete phyloTree; delete outputTree; } catch(const exception& ex) { cerr << ex.what() << endl; utilityObject->error("Terminating program. Cannot continue\n"); exit(1); } } /* * Routine for producing unrooted NJ trees from seperately aligned * pairwise distances. This produces the GUIDE DENDROGRAMS in * PHYLIP format. */ void UnRootedClusterTree::treeFromDistMatrix(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string& phylipName) { OutputFile phylipPhyTreeFile; try { // Test to see if the inputs are valid if(seq1 < 1 || nSeqs < 1) { cerr << "Invalid inputs into treeFromDistMatrix \n" << "seq1 = " << seq1 << " nSeqs = " << nSeqs << "\n" << "Need to end program!\n"; exit(1); return; } PhyloTree phyloTree; float dist; string path; verbose = false; firstSeq = seq1; lastSeq = firstSeq + nSeqs - 1; SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = nSeqs; // Open up the phylipPhyTreeFile now! // NOTE that this is not exactly correct. This may cause a problem when outputing // a tree for each of the profiles. But then we can pass it in, maybe. utilityObject->getPath(userParameters->getSeqName(), &path); if(nSeqs >= 2) { string name = phylipName; if(!phylipPhyTreeFile.openFile(&name, "\nEnter name for new GUIDE TREE file ", &path, "dnd", "Guide tree")) { return; } phylipName = name; } else { return; } // Not sure about bootstrapping here! clusAlgorithm = new NJTree(); outputTree = new ClusterTreeOutput(&info, 0); ofstream* ptrToFile = phylipPhyTreeFile.getPtrToFile(); if (nSeqs == 2) { dist = (*distMat)(firstSeq, firstSeq + 1) / 2.0; if(ptrToFile->is_open()) { (*ptrToFile) << "(" << alignPtr->getName(firstSeq) << ":" << setprecision(5) << dist << "," << alignPtr->getName(firstSeq + 1) << ":" << setprecision(5) << dist <<");\n"; } } else { int dimensions = lastSeq - firstSeq + 2; phyloTree.treeDesc.resize(dimensions, vector(dimensions)); /* debugging, also used when -OUTPUTTREE is used */ #if 0 ofstream debuglog("debug.treeFromDistMatrix.txt", ios::out); clusAlgorithm->setVerbose(true); clusAlgorithm->generateTree(&phyloTree, distMat, &info, &debuglog); #else clusAlgorithm->generateTree(&phyloTree, distMat, &info); #endif outputTree->printPhylipTree(&phyloTree, ptrToFile, alignPtr, distMat, &bootTotals); } delete clusAlgorithm; delete outputTree; //phylipPhyTreeFile.close(); } catch(const exception &ex) { cerr << "ERROR: Error has occured in treeFromDistMatrix. " << "Need to terminate program.\n" << ex.what(); exit(1); } catch(...) { cerr << "ERROR: Error has occured in treeFromDistMatrix. " << "Need to terminate program.\n"; exit(1); } } /* * Note before I had this function was accepting a distance matrix. But I think it * just uses the quickly generated one. So it doesnt need it anymore. But be careful !!!! */ void UnRootedClusterTree::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr) { int i, j; int ranno; string path; OutputFile clustalPhyTreeFile; ofstream* ptrToClustalFile; OutputFile phylipPhyTreeFile; OutputFile nexusPhyTreeFile; try { phyloTree = new PhyloTree; PhyloTree sampleTree; PhyloTree standardTree; PhyloTree saveTree; int totalDists, overspill = 0, totalOverspill = 0; int nfails = 0; numSeqs = alignPtr->getNumSeqs(); firstSeq = 1; lastSeq = numSeqs; clusAlgorithm = new NJTree(); SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = numSeqs; /** * Check if numSeqs is ok */ if(!checkIfConditionsMet(numSeqs, 4)) { return; } if (!userParameters->getOutputTreeClustal() && !userParameters->getOutputTreePhylip() && !userParameters->getOutputTreeNexus()) { utilityObject->error("You must select either clustal or phylip or nexus tree output format"); return; } utilityObject->getPath(userParameters->getSeqName(), &path); if(!openFilesForBootstrap(&clustalPhyTreeFile, &phylipPhyTreeFile, &nexusPhyTreeFile, treeNames, &path)) { return; // There was a problem opening the output files. } int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); bootTotals.clear(); bootTotals.resize(numSeqs + 1); bootPositions.clear(); bootPositions.resize(_lenFirstSeq + 2); for (j = 1; j <= _lenFirstSeq; ++j) // First select all positions for { bootPositions[j] = j; } // the "standard" tree overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr); // check if any distances overflowed the distance corrections if (overspill > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; overspillMessage(overspill, totalDists); } treeGaps.clear(); if (userParameters->getOutputTreeClustal()) { verbose = true; } // Turn on screen output phyloTree->treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); // compute the standard tree if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { clusAlgorithm->setVerbose(true); clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info, clustalPhyTreeFile.getPtrToFile()); } ptrToClustalFile = clustalPhyTreeFile.getPtrToFile(); promptForBoolSeedAndNumTrials(); RandomGenerator randGenerator(userParameters->getBootRanSeed()); /** * Print bootstrap information to top of clustal bootstrap file! */ if (userParameters->getOutputTreeClustal()) { printBootstrapHeaderToClustalFile(ptrToClustalFile); } verbose = false; // Turn OFF screen output clusAlgorithm->setVerbose(false); sampleTree.treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); if (userParameters->getMenuFlag()) { cout << "\n\nEach dot represents 10 trials\n\n"; } totalOverspill = 0; nfails = 0; int lenSeq1 = alignPtr->getSeqLength(1); for (i = 1; i <= userParameters->getBootNumTrials(); ++i) { for (j = 1; j <= alignPtr->getSeqLength(firstSeq); ++j) { // select alignment positions for ranno = randGenerator.addRand((unsigned long)lenSeq1) + 1; bootPositions[j] = ranno; // bootstrap sample } overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr, true); if (overspill > 0) { totalOverspill = totalOverspill + overspill; nfails++; } treeGaps.clear(); if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { // NOTE this is bad to pass in clustalPhyTreeFile clusAlgorithm->generateTree(&sampleTree, quickDistMat.get(), &info, clustalPhyTreeFile.getPtrToFile()); } sampleTree.leftBranch.clear(); sampleTree.rightBranch.clear(); compareTree(phyloTree, &sampleTree, &bootTotals, lastSeq - firstSeq + 1); if (userParameters->getMenuFlag()) { if (i % 10 == 0) { cout << "."; } if (i % 100 == 0) { cout << "\n"; } } } // check if any distances overflowed the distance corrections if (nfails > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; printErrorMessageForBootstrap(totalOverspill, totalDists, nfails); } bootPositions.clear(); outputTree = new ClusterTreeOutput(&info, userParameters->getBootstrapFormat()); /** * Print ClustalTree with bootTotals */ if (userParameters->getOutputTreeClustal()) { outputTree->printTree(phyloTree, clustalPhyTreeFile.getPtrToFile(), &bootTotals); } /** * Print phylip tree with boottotals. */ if (userParameters->getOutputTreePhylip()) { // Save the old tree! saveTree.treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); saveTree.treeDesc.assign(phyloTree->treeDesc.begin(), phyloTree->treeDesc.end()); outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); // reassign the old values! phyloTree->treeDesc.assign(saveTree.treeDesc.begin(), saveTree.treeDesc.end()); } /** * print nexus tree with boottotals */ if (userParameters->getOutputTreeNexus()) { outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); } delete phyloTree; delete clusAlgorithm; delete outputTree; } catch(const exception &ex) { } } } clustalx-2.1/clustalW/tree/UPGMA/upgmadata.h0000644000175000017500000000056411470725216020545 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef UPGMADATA_H #define UPGMADATA_H struct distMatRow { distMatRow(double* p, int n) { ptrToDistMatRow = p; numDists = n; } double* ptrToDistMatRow; int numDists; }; const int BLANKDIST = -1; const int INTERNAL = -1; #endif clustalx-2.1/clustalW/tree/UPGMA/UPGMAAlgorithm.h0000644000175000017500000000366711470725216021331 0ustar ddineenddineen#ifndef __UPGMAALGORITHM_H #define __UPGMAALGORITHM_H #include "Node.h" #include #include "../../general/clustalw.h" #include "../AlignmentSteps.h" #include "RootedGuideTree.h" #include #include namespace clustalw { using namespace std; class UPGMAAlgorithm { public: bool overwriteMatrix; UPGMAAlgorithm(); auto_ptr generateTree(RootedGuideTree* phyTree, DistMatrix* distMat, SeqInfo* seqInfo, bool overwrite, ofstream* tree = 0); void setVerbose(bool _verbose){verbose = _verbose;} private: Node **initialiseNodes(double *distanceMatrix, int firstSeq); Node *doUPGMA(Node **nodes, ofstream* tree); void printAllNodes(Node** nodes); void addAlignmentStep(vector* group1, vector* group2); Node** getNodeWithMinDist(Node** clusters); void recomputeNodeToJoin1DistMatRow(Node* nodeToJoin1, double** nodeToJoin2DistIter); void computeAllOtherDistsToNewNode(Node* nodeToJoin1, Node* nodeToJoin2, double** nodeToJoin2DistIter); void computeDistsUpToNodeToJoin2(Node* nToJoin1, Node* nToJoin2, double** nodeToJoin2DistIter); void computeDistsForNodesAfterNode2(Node* nToJoin2); void movePtrPastUnusedDistances(double** ptrToDist) { while(**ptrToDist < 0) { (*ptrToDist)++; } } double calcNewDist(double dist1, double dist2); //vector::iterator getIterToNodeWithMinDist(vector* nodesLeft); auto_ptr progSteps; int numSeqs; bool verbose; int orderNode1, orderNode2, orderNewNode; }; } #endif clustalx-2.1/clustalW/tree/UPGMA/UPGMAAlgorithm.cpp0000644000175000017500000003367711470725216021670 0ustar ddineenddineen#ifdef HAVE_CONFIG_H #include "config.h" #endif #include "UPGMAAlgorithm.h" #include #include #include #include #include "../../general/SymMatrix.h" #include "../../general/debuglogObject.h" #include "../../general/clustalw.h" #include "upgmadata.h" namespace clustalw { UPGMAAlgorithm::UPGMAAlgorithm() : overwriteMatrix(false), numSeqs(0), verbose(false), orderNode1(0), orderNode2(0), orderNewNode(0) {} auto_ptr UPGMAAlgorithm::generateTree(RootedGuideTree* phyTree, DistMatrix* distMat, SeqInfo* seqInfo, bool overwrite, ofstream* tree) { if (tree == 0 || !tree->is_open()) { verbose = false; } if (verbose) { (*tree) << "\n\n\t\t\tUPGMA Method\n" << "\n\n This is a ROOTED tree\n" << "\n Numbers in parentheses are branch lengths\n\n"; } progSteps.reset(new AlignmentSteps); Node** clusters; Node* root; numSeqs = seqInfo->numSeqs; const int sizeDistMat = ((numSeqs + 1) * (numSeqs + 2)) / 2; double* elements = overwrite ? distMat->getDistMatrix(seqInfo->firstSeq, seqInfo->numSeqs) : (double *)memcpy(new double[sizeDistMat], distMat->getDistMatrix(seqInfo->firstSeq, seqInfo->numSeqs), sizeDistMat * sizeof(double)); clusters = initialiseNodes(elements, seqInfo->firstSeq); root = doUPGMA(clusters, tree); phyTree->setRoot(root); delete [] clusters; if(!overwrite) { delete [] elements; } distMat->clearSubArray(); return progSteps; } Node** UPGMAAlgorithm::initialiseNodes(double* distanceMatrix, int fSeq) { int firstSeq = fSeq; Node** nodes = new Node*[numSeqs]; Node** nodeIter = nodes; *nodes = new Node(firstSeq, 0, 0); distanceMatrix++; // Move to first position in distanceMatrix. for(int elementIndex = 1, e = numSeqs; elementIndex < e; distanceMatrix += ++elementIndex) { Node* newcluster = new Node(elementIndex + firstSeq, distanceMatrix, elementIndex); (*nodeIter++)->next = newcluster; *nodeIter = newcluster; } return nodes; } void UPGMAAlgorithm::printAllNodes(Node** nodes) { int numNodes = 0; for(Node* nodeIter = *nodes; nodeIter; nodeIter = nodeIter->next) { numNodes++; cout << "Node " << numNodes << "\n"; nodeIter->printElements(); cout << "\n\n"; } cout << "There are " << numNodes << " nodes\n"; } Node* UPGMAAlgorithm::doUPGMA(Node** nodes, ofstream* tree) { if (tree == 0 || !tree->is_open()) { verbose = false; } string type1, type2; int step = 0; while((*nodes)->next) // While there is more than 1 node. { step++; if (verbose) { (*tree) << "\n Cycle" << setw(4) << step << " = "; } Node** ptrNodeWithMin; /** * STEP 1 find node with min distance */ ptrNodeWithMin = getNodeWithMinDist(nodes); Node* nodeToJoin2 = *ptrNodeWithMin; const int indexToMinDist = nodeToJoin2->indexToMinDist; Node* const nodeToJoin1 = nodes[indexToMinDist]; orderNode1 = nodeToJoin1->size; orderNode2 = nodeToJoin2->size; orderNewNode = orderNode1 + orderNode2; /** * STEP 2 Recompute the dist Matrix row for nodeToJoin1 * example: Join nodes 2 and 6 * * 1 0 * 2 X 0 * 3 0 0 0 * 4 0 0 0 0 * 5 0 0 0 0 0 * 6 0 0 0 0 0 0 * 7 0 0 0 0 0 0 0 */ double* nodeToJoin2DistIter = nodeToJoin2->ptrToDistMatRow; if (indexToMinDist != 0) { recomputeNodeToJoin1DistMatRow(nodeToJoin1, &nodeToJoin2DistIter); } /** * STEP 3 Recompute all distances in column * example: Join nodes 2 and 6 * * 1 0 * 2 C 0 * 3 0 X 0 * 4 0 X 0 0 * 5 0 X 0 0 0 * 6 0 0 0 0 0 0 * 7 0 X 0 0 0 0 0 */ computeAllOtherDistsToNewNode(nodeToJoin1, nodeToJoin2, &nodeToJoin2DistIter); /** * STEP 4 Add the step to the progSteps. * This creates the multiple alignment steps. */ addAlignmentStep(&nodeToJoin1->allElements, &nodeToJoin2->allElements); double minDistance = (*ptrNodeWithMin)->minDist; double height = 0.0; height = minDistance / 2.0; if(verbose) { if(nodeToJoin1->allElements.size() > 1) { type1 = "NODE: "; } else { type1 = "SEQ: "; } if(nodeToJoin2->allElements.size() > 1) { type2 = "NODE: "; } else { type2 = "SEQ: "; } (*tree) << type1 << nodeToJoin1->allElements[0] << " (" << setw(9) << setprecision(5) << height << ") joins " << type2 << setw(4) << nodeToJoin2->allElements[0] << " (" << setw(9) << setprecision(5) << height << ")"; } /** * STEP 5 merge 2 nodes */ nodeToJoin1->merge(ptrNodeWithMin, height); } return *nodes; } /** * This function returns a Node object that has the minimum distance of * all the nodes left. */ Node** UPGMAAlgorithm::getNodeWithMinDist(Node** nodes) { Node** ptrMinNode = NULL; double minDistance = numeric_limits::max(); //Start at node 1, check see if it points to something, then move on the next one. Node** nodeIter; for(nodeIter = &((*nodes)->next); *nodeIter; nodeIter = &(*nodeIter)->next) { if ((*nodeIter)->getMinDist() < minDistance) { minDistance = (*nodeIter)->getMinDist(); ptrMinNode = nodeIter; // ptrMinNode will hold a ptr to node with sm'st dist } } return ptrMinNode; } /** * This function is used to recompute the nodeToJoin1 dist mat row. Each row in the distance * matrix has the distances to all nodes before it, not after. For example the dist mat row * for Node 3 would have the distances to nodes 1 and 2. * 1 0 * 2 0 0 * 3 X X 0 Dists to node 1 and 2. * 4 0 0 0 0 * 5 0 0 0 0 0 * 6 0 0 0 0 0 0 * 7 0 0 0 0 0 0 0 */ void UPGMAAlgorithm::recomputeNodeToJoin1DistMatRow(Node* nodeToJoin1, double** nodeToJoin2DistIter) { double* nodeToJoin1DistIter = nodeToJoin1->ptrToDistMatRow; // calculate new distance *nodeToJoin1DistIter = calcNewDist(*nodeToJoin1DistIter, **nodeToJoin2DistIter); const double* minIndex1 = nodeToJoin1DistIter; nodeToJoin1DistIter++; (*nodeToJoin2DistIter)++; int numDistToUpdate = nodeToJoin1->numDists - 1; // For each of the distances in nodeToJoin1 while(numDistToUpdate > 0) { if (*nodeToJoin1DistIter >= 0) { // Calculate the average *nodeToJoin1DistIter = calcNewDist(*nodeToJoin1DistIter, **nodeToJoin2DistIter); if (*nodeToJoin1DistIter < *minIndex1) { minIndex1 = nodeToJoin1DistIter; } } nodeToJoin1DistIter++; (*nodeToJoin2DistIter)++; numDistToUpdate--; } // We have found the minimum distance nodeToJoin1->minDist = *minIndex1; nodeToJoin1->indexToMinDist = minIndex1 - nodeToJoin1->ptrToDistMatRow; } /** * This function is used to recompute all the other distances. It does this by calling * two other functions. We first compute all the distances until we get to node 2. * Then we call another function to do the rest. This is because nodes after node 2 may * have had EITHER node1 of node2 as their min distance. */ void UPGMAAlgorithm::computeAllOtherDistsToNewNode(Node* nodeToJoin1, Node* nodeToJoin2, double** nodeToJoin2DistIter) { computeDistsUpToNodeToJoin2(nodeToJoin1, nodeToJoin2, nodeToJoin2DistIter); computeDistsForNodesAfterNode2(nodeToJoin2); } /** * STEP 3A: * This function recomputes the distances in column until we get to nodeToJoin2 * example: Join nodes 2 and 6 * * 1 0 * 2 C 0 * 3 0 X 0 * 4 0 X 0 0 * 5 0 X 0 0 0 * 6 0 0 0 0 0 0 * 7 0 0 0 0 0 0 0 * * For each node until we get to nodeToJoin2 * If newdistance is less than the old min distance, set it to this. * else if its greater than old minDist and indexTominDist is the same, recompute min * else leave the min the same as it hasnt changed. */ void UPGMAAlgorithm::computeDistsUpToNodeToJoin2(Node* nodeToJoin1, Node* nodeToJoin2, double** nodeToJoin2DistIter) { const int indexToMinDist = nodeToJoin2->indexToMinDist; movePtrPastUnusedDistances(nodeToJoin2DistIter); Node* nodeIter; // For each node until we get to the second node we are joining for(nodeIter = nodeToJoin1->next; nodeIter != nodeToJoin2; nodeIter = nodeIter->next) { (*nodeToJoin2DistIter)++; // Skip the distance to the node we are joining with movePtrPastUnusedDistances(nodeToJoin2DistIter); double distToNode = nodeIter->ptrToDistMatRow[indexToMinDist]; double newDistToNode = calcNewDist(distToNode, **nodeToJoin2DistIter); nodeIter->ptrToDistMatRow[indexToMinDist] = newDistToNode; if (newDistToNode < nodeIter->minDist) { /** new value is smaller than current min. */ nodeIter->minDist = newDistToNode; nodeIter->indexToMinDist = indexToMinDist; } else if ((newDistToNode > nodeIter->minDist) && (nodeIter->indexToMinDist == indexToMinDist)) { /** indexToMinDist was the min dist, but it is now a larger num, recompute */ nodeIter->findMinDist(); } } } /** * STEP 3B Recompute distance for nodes after nodeToJoin2 * example: Join nodes 2 and 6 * * 1 0 * 2 C 0 * 3 0 C 0 * 4 0 C 0 0 * 5 0 C 0 0 0 * 6 0 0 0 0 0 0 * 7 0 X 0 0 0 0 0 * * For each node until we get to the end. * if dist is less than minDist, set mindist to new distance, set index to index * else if dist is greater than mindist and the index was either nodetojoin1 or * nodetojoin2, recompute distance, set entry for nodetojoin2 to NOTUSED * else set the distance to unused. */ void UPGMAAlgorithm::computeDistsForNodesAfterNode2(Node* nodeToJoin2) { int indexToNode2 = nodeToJoin2->numDists; const int indexToMinDist = nodeToJoin2->indexToMinDist; Node* nodeIter; for(nodeIter = nodeToJoin2->next; nodeIter; nodeIter = nodeIter->next) { double &distUpdate = nodeIter->ptrToDistMatRow[indexToMinDist]; distUpdate = ((distUpdate * orderNode1) + (nodeIter->ptrToDistMatRow[indexToNode2] * orderNode2)) / orderNewNode; /* The comparison (distUpdate > nodeIter->minDist) is unsafe. * Specifically, we get different results for 32/64bit machines, * which leads to different branching in the if/else statement * and nasty behaviour down the line. * Using all of Pfam as a benchmark distUpdate can 'wobble' by 40 ULPs * (Unit of Least Precision) which is difficult to translate into * a maximum relative error, so we pick COMPARE_REL_EPSILON * phenomenologically: approx 2E-15 was the biggest we saw in Pfam, * but suggest 1E-14 (for good measure). * Using ULPs, eg, *(long long int*)&(distUpdate), * would be better (more elegant?) but gives problems * with aliasing and/or performance reduction. * FS, 2009-04-30 */ #define COMPARE_REL_EPSILON 1E-14 if ( (distUpdate < nodeIter->minDist) && ((nodeIter->minDist-distUpdate) / nodeIter->minDist > COMPARE_REL_EPSILON) ) { /** new distance is smaller */ nodeIter->minDist = distUpdate; nodeIter->indexToMinDist = indexToMinDist; } else if (((distUpdate > nodeIter->minDist) && ((distUpdate-nodeIter->minDist) / distUpdate > COMPARE_REL_EPSILON) && (nodeIter->indexToMinDist == indexToMinDist)) || (nodeIter->indexToMinDist == indexToNode2)) { /** if old min dist was to either nodeToJoin1 or nodeToJoin2 */ nodeIter->ptrToDistMatRow[indexToNode2] = BLANKDIST; nodeIter->findMinDist(); } else { nodeIter->ptrToDistMatRow[indexToNode2] = BLANKDIST; } } } void UPGMAAlgorithm::addAlignmentStep(vector* group1, vector* group2) { int sizeGroup1 = group1->size(); int sizeGroup2 = group2->size(); vector groups; groups.resize(numSeqs + 1, 0); int sizeGroup = groups.size(); for(int i = 0; i < sizeGroup1 && (*group1)[i] < sizeGroup; i++) { groups[(*group1)[i]] = 1; // Set to be apart of group1 } for(int i = 0; i < sizeGroup2 && (*group2)[i] < sizeGroup; i++) { groups[(*group2)[i]] = 2; // Set to be apart of group1 } progSteps->saveSet(&groups); } /** * At the moment we are only using average distance, UPGMA, but we can also use this * function to have different distance measures, min, max etc. */ double UPGMAAlgorithm::calcNewDist(double dist1, double dist2) { return ((dist1 * orderNode1) + (dist2 * orderNode2)) / orderNewNode; } } clustalx-2.1/clustalW/tree/UPGMA/RootedTreeOutput.h0000644000175000017500000000217411470725216022076 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * mark 8-5-2007: I removed the isLeafNode function. Changed both the traversal functions * to access Node's data members via functions. */ #ifndef ROOTEDTREEOUTPUT_H #define ROOTEDTREEOUTPUT_H #include #include "RootedGuideTree.h" #include "../../general/clustalw.h" /** this is only used for upgma?! * */ namespace clustalw { class RootedTreeOutput { public: RootedTreeOutput(SeqInfo* seqInfo); void printPhylipTree(RootedGuideTree* tree, ofstream* ptrToFile, Alignment *alignPtr, DistMatrix* distMat); void printNexusTree(RootedGuideTree* tree, ofstream* ptrToFile, Alignment *alignPtr, DistMatrix* distMat); private: void phylipTraverse(ofstream* ptrToFile, Alignment *alignPtr, Node* tree); void nexusTraverse(ofstream* ptrToFile, Alignment *alignPtr, Node* tree); int firstSeq; int lastSeq; int numSeqs; }; } #endif clustalx-2.1/clustalW/tree/UPGMA/RootedTreeOutput.cpp0000644000175000017500000001054211470725216022427 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "RootedTreeOutput.h" namespace clustalw { RootedTreeOutput::RootedTreeOutput(SeqInfo* seqInfo) { firstSeq = seqInfo->firstSeq; lastSeq = seqInfo->lastSeq; numSeqs = seqInfo->numSeqs; } void RootedTreeOutput::printPhylipTree(RootedGuideTree* tree, ofstream* ptrToFile, Alignment *alignPtr, DistMatrix* distMat) { if(!ptrToFile || !ptrToFile->is_open()) { return; } //cerr << "\n******************* DEBUG AW RootedTreeOutput::printPhylipTree: we only get here when using UPGMA with >2 seqs. Why?\n"; // If we have only 2 sequences, use the distances in the distMat if (lastSeq - firstSeq + 1 == 2) { (*ptrToFile) << "(" << alignPtr->getName(firstSeq) << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << "," << alignPtr->getName(firstSeq + 1) << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) ; // << ")"; // AW 2009-05-15: final ")" added here, but not tested // when is this ever used? tried // infile=test.aln -clustering=nj|upgma // on pairwise input, but this is only called for multiple // input. why? } else { // AW 2009-05-08: fixed bug 155 by removing unnecessary, extra // outer parenthesis from output phylipTraverse(ptrToFile, alignPtr, tree->getRoot()); } (*ptrToFile) << ";\n"; } void RootedTreeOutput::printNexusTree(RootedGuideTree* tree, ofstream* ptrToFile, Alignment *alignPtr, DistMatrix* distMat) { if(!ptrToFile || !ptrToFile->is_open()) { return; } (*ptrToFile) << "#NEXUS\n\n"; (*ptrToFile) << "BEGIN TREES;\n\n"; (*ptrToFile) << "\tTRANSLATE\n"; for(int j = 1; j < numSeqs; j++) { (*ptrToFile) << "\t\t" << j << "\t" << alignPtr->getName(j) <<",\n"; } (*ptrToFile) << "\t\t" << numSeqs << "\t" << alignPtr->getName(numSeqs) << "\n"; (*ptrToFile) << "\t\t;\n"; (*ptrToFile) << "\tUTREE PAUP_1= "; // IF we have only 2 seqs if (lastSeq - firstSeq + 1 == 2) { (*ptrToFile) << "(" << alignPtr->getName(firstSeq) << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << "," << alignPtr->getName(firstSeq + 1) << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1); } else { (*ptrToFile) << "("; nexusTraverse(ptrToFile, alignPtr, tree->getRoot()); } (*ptrToFile) << ");\n"; (*ptrToFile) << "\nENDBLOCK;\n"; } /** * PRIVATE FUNCTIONS */ void RootedTreeOutput::phylipTraverse(ofstream* ptrToFile, Alignment *alignPtr, Node* t) { if(!ptrToFile) { return; } if(t != 0) { if(t->isLeafNode()) { if(alignPtr) { (*ptrToFile) << alignPtr->getName(t->getSeqNum()) << ":" << t->getHeight(); } else { (*ptrToFile) << t->getSeqNum() << ":" << t->getHeight(); } } else // Internal node { (*ptrToFile) << "(\n"; phylipTraverse(ptrToFile, alignPtr, t->getLeft()); (*ptrToFile) << ",\n"; phylipTraverse(ptrToFile, alignPtr, t->getRight()); (*ptrToFile) << "):" << t->getHeight(); } } } void RootedTreeOutput::nexusTraverse(ofstream* ptrToFile, Alignment *alignPtr, Node* t) { if(t != 0) { if(!t->isLeafNode()) // Internal node { (*ptrToFile) << "("; nexusTraverse(ptrToFile, alignPtr, t->getLeft()); (*ptrToFile) << ","; nexusTraverse(ptrToFile, alignPtr, t->getRight()); (*ptrToFile) << "):" << t->getHeight(); } else // Leaf node { if(alignPtr) { (*ptrToFile) << alignPtr->getName(t->getSeqNum()) << ":" << t->getHeight(); } else { (*ptrToFile) << t->getSeqNum() << ":" << t->getHeight(); } } } } } clustalx-2.1/clustalW/tree/UPGMA/RootedGuideTree.h0000644000175000017500000000175311470725216021635 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * mark May 8th 2007: removed makeEmpty(Node* t function), changed makeEmpty(). I also * made changes to any functions that accessed Node's data members */ #ifndef ROOTEDGUIDETREE_H #define ROOTEDGUIDETREE_H #include "Node.h" #include #include "../../alignment/Alignment.h" namespace clustalw { using namespace std; class RootedGuideTree { public: RootedGuideTree(); RootedGuideTree(Node* root); ~RootedGuideTree(); void setRoot(Node* r); void makeEmpty(); void calcSeqWeights(int firstSeq, int lastSeq, vector* seqWeights); Node* getRoot(){return root;} private: void orderNodes(); int calcOrderNode(Node* node); void calcWeights(vector* seqWeights); void doWeightCalc(float weightSoFar, vector* seqWeights, Node* t); Node* root; }; } #endif clustalx-2.1/clustalW/tree/UPGMA/RootedGuideTree.cpp0000644000175000017500000000712111470725216022163 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "RootedGuideTree.h" #include "../../general/userparams.h" using namespace std; namespace clustalw { RootedGuideTree::RootedGuideTree() : root(0){ } RootedGuideTree::RootedGuideTree(Node* r) : root(r) { } RootedGuideTree::~RootedGuideTree() { root->makeEmpty(); } void RootedGuideTree::setRoot(Node* r) { makeEmpty(); root = r; } void RootedGuideTree::makeEmpty() { root->makeEmpty(); } void RootedGuideTree::orderNodes() { calcOrderNode(root); } int RootedGuideTree::calcOrderNode(Node* node) { if(node != 0) { if(node->getLeft() == 0 && node->getRight() == 0) // Leaf Node { node->setOrder(1); return 1; } else // Internal node { node->setOrder(calcOrderNode(node->getLeft()) + calcOrderNode(node->getRight())); return node->getOrder(); } } return 0; } void RootedGuideTree::calcSeqWeights(int firstSeq, int lastSeq, vector* seqWeights) { if((int)seqWeights->size() < lastSeq - 1) { seqWeights->resize(lastSeq - 1); } int i = 0, _nSeqs = 0; int temp = 0, sum = 0; // // If there are more than three sequences.... // _nSeqs = lastSeq - firstSeq; if ((_nSeqs >= 2) && (userParameters->getDistanceTree() == true) && (userParameters->getNoWeights() == false)) { // // Calculate sequence weights based on Phylip tree. // orderNodes(); calcWeights(seqWeights); // // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR // sum = 0; for (i = firstSeq; i < lastSeq; i++) { sum += (*seqWeights)[i]; } if (sum == 0) { for (i = firstSeq; i < lastSeq; i++) { (*seqWeights)[i] = 1; } sum = i; } for (i = firstSeq; i < lastSeq; i++) { (*seqWeights)[i] = ((*seqWeights)[i] * INT_SCALE_FACTOR) / sum; if ((*seqWeights)[i] < 1) { (*seqWeights)[i] = 1; } } } else { // // Otherwise, use identity weights. // temp = INT_SCALE_FACTOR / _nSeqs; // AW 2009-07-09: goes wrong if we have more than // INT_SCALE_FACTOR seqs. if so, set to 1, just as above // same as in Tree.cpp if (temp < 1) temp = 1; for (i = firstSeq; i < lastSeq; i++) { (*seqWeights)[i] = temp; } } } void RootedGuideTree::calcWeights(vector* seqWeights) { vector weights; int sizeSeqWeights = seqWeights->size(); weights.resize(sizeSeqWeights, 0.0); doWeightCalc(0.0, &weights, root); for(int i = 0; i < sizeSeqWeights; i++) { (*seqWeights)[i] = static_cast(weights[i] * 100); } } void RootedGuideTree::doWeightCalc(float weightSoFar, vector* weights, Node* t) { if(t != 0) { if(t->getLeft() == 0 && t->getRight() == 0) // Leaf Node { (*weights)[t->getSeqNum() - 1] = weightSoFar; } else // Internal node { float w = weightSoFar + (t->getHeight() / t->getOrder()); doWeightCalc(w, weights, t->getLeft()); doWeightCalc(w, weights, t->getRight()); } } } } clustalx-2.1/clustalW/tree/UPGMA/RootedClusterTree.h0000644000175000017500000000141511470725216022214 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef ROOTEDCLUSTERTREE_H #define ROOTEDCLUSTERTREE_H #include "../ClusterTree.h" #include "RootedGuideTree.h" #include "../AlignmentSteps.h" namespace clustalw { class RootedClusterTree : private ClusterTree { public: //RootedClusterTree(); /** * NOTE these will have different signatures!!!! */ void treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr); auto_ptr treeFromDistMatrix(RootedGuideTree* phyloTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string& phylipName); }; } #endif clustalx-2.1/clustalW/tree/UPGMA/RootedClusterTree.cpp0000644000175000017500000001516611470725216022557 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "RootedClusterTree.h" #include "../../general/OutputFile.h" #include "UPGMAAlgorithm.h" #include "RootedTreeOutput.h" //#include "../RandomGenerator.h" namespace clustalw { auto_ptr RootedClusterTree::treeFromDistMatrix(RootedGuideTree* phyloTree,DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string& phylipName) { OutputFile phylipPhyTreeFile; auto_ptr progSteps; try { // Test to see if the inputs are valid if(seq1 < 1 || nSeqs < 1) { cerr << "Invalid inputs into treeFromDistMatrix \n" << "seq1 = " << seq1 << " nSeqs = " << nSeqs << "\n" << "Need to end program!\n"; exit(1); return progSteps; } float dist; string path; verbose = false; firstSeq = seq1; lastSeq = firstSeq + nSeqs - 1; SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = nSeqs; utilityObject->getPath(userParameters->getSeqName(), &path); if(nSeqs >= 2) { string name = phylipName; if(!phylipPhyTreeFile.openFile(&name, "\nEnter name for new GUIDE TREE file ", &path, "dnd", "Guide tree")) { return progSteps; } phylipName = name; } else { return progSteps; } RootedTreeOutput outputTree(&info); ofstream* ptrToFile = phylipPhyTreeFile.getPtrToFile(); if (nSeqs == 2) { dist = (*distMat)(firstSeq, firstSeq + 1) / 2.0; if(ptrToFile->is_open()) { (*ptrToFile) << "(" << alignPtr->getName(firstSeq) << ":" << setprecision(5) << dist << "," << alignPtr->getName(firstSeq + 1) << ":" << setprecision(5) << dist <<");\n"; } progSteps.reset(new AlignmentSteps); vector groups; groups.resize(nSeqs + 1, 0); groups[1] = 1; groups[2] = 2; } else { UPGMAAlgorithm clusAlgorithm; progSteps = clusAlgorithm.generateTree(phyloTree, distMat, &info, false); outputTree.printPhylipTree(phyloTree, ptrToFile, alignPtr, distMat); } return progSteps; } catch(const exception &ex) { cerr << "ERROR: Error has occured in treeFromDistMatrix. " << "Need to terminate program.\n" << ex.what(); exit(1); } catch(...) { cerr << "ERROR: Error has occured in treeFromDistMatrix. " << "Need to terminate program.\n"; exit(1); } } void RootedClusterTree::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr) { try { OutputFile phylipPhyTreeFile; OutputFile clustalPhyTreeFile; OutputFile distancesPhyTreeFile; OutputFile nexusPhyTreeFile; OutputFile pimFile; RootedGuideTree phyloTree; string path; int j; int overspill = 0; int totalDists; numSeqs = alignPtr->getNumSeqs(); // NOTE class variable /** * Check if numSeqs is ok */ if(!checkIfConditionsMet(numSeqs, 2)) { return; } firstSeq = 1; lastSeq = numSeqs; // The SeqInfo struct is passed to reduce the number of parameters passed! SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = numSeqs; RootedTreeOutput outputTree(&info); // No bootstrap! utilityObject->getPath(userParameters->getSeqName(), &path); /** * Open the required output files. */ if(!openFilesForTreeFromAlignment(&clustalPhyTreeFile, &phylipPhyTreeFile, &distancesPhyTreeFile, &nexusPhyTreeFile, &pimFile, treeNames, &path)) { return; // Problem opeing one of the files, cannot continue! } int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); bootPositions.clear(); bootPositions.resize(_lenFirstSeq + 2); for (j = 1; j <= _lenFirstSeq; ++j) { bootPositions[j] = j; } /** * Calculate quickDist and overspill */ overspill = calcQuickDistMatForAll(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), pimFile.getPtrToFile(), distancesPhyTreeFile.getPtrToFile(), alignPtr); // check if any distances overflowed the distance corrections if (overspill > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; overspillMessage(overspill, totalDists); } if (userParameters->getOutputTreeClustal()) { verbose = true; } // Turn on file output if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { UPGMAAlgorithm clusAlgorithm; clusAlgorithm.setVerbose(true); clusAlgorithm.generateTree(&phyloTree, quickDistMat.get(), &info, false, clustalPhyTreeFile.getPtrToFile()); clusAlgorithm.setVerbose(false); } if (userParameters->getOutputTreePhylip()) { outputTree.printPhylipTree(&phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get()); } if (userParameters->getOutputTreeNexus()) { outputTree.printNexusTree(&phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get()); } /** Free up resources!!!!! */ treeGaps.clear(); bootPositions.clear(); } catch(const exception& ex) { cerr << ex.what() << endl; utilityObject->error("Terminating program. Cannot continue\n"); exit(1); } } } clustalx-2.1/clustalW/tree/UPGMA/Node.h0000644000175000017500000000444111470725216017465 0ustar ddineenddineen#ifndef _NODE_H #define _NODE_H #include #include #include #include "upgmadata.h" namespace clustalw { using namespace std; class Node { public: Node(int seqNum, double *aptrToDistMatRow, int numDists); double getDist(int index){return ptrToDistMatRow[index];} void setDist(int index, double dist){ptrToDistMatRow[index] = dist;} double* getPtrToDistMatRow(){return ptrToDistMatRow;} void setDistMatRowToNull(){ptrToDistMatRow = 0;} int getNumDists(){return numDists;} double getMinDist(){return minDist;} int getIndexToMinDist(){return indexToMinDist;} void setMinDist(int index, double d){indexToMinDist = index; minDist = d;} int getOrder(){return order;} void setOrder(int o){order = o;} int getSeqNum(){return seqNum;} void setSeqNum(int sNum){seqNum = sNum;} void printNodeInfo(); void printDistMatRow(); void printMinDist(); string elementsToString(); Node* getLeft(){return left;} Node* getRight(){return right;} void setLeft(Node* l){left = l;} void setRight(Node* r){right = r;} double getHeight(){return height;} void setHeight(double h){height = h;} vector* getPtrToElements(){return &allElements;} int getFirstElement(){return allElements[0];} void clearElements(){allElements.clear();} int getFirstElem(){return allElements[0];} bool isLeafNode() { if(left == 0 && right == 0) { return true; } else { return false; } } void merge(Node **rightNode, double _height); void findMinDist(); void printElements(); void makeEmpty(); void makeEmpty(Node* t); /* Attributes */ Node *next; // For linked list of nodes Node *left, *right; int size; int seqNum; double height; vector allElements; // THis is for the groups!!! double *ptrToDistMatRow; double minDist; // minimal distance int indexToMinDist; // index of minimal distance int numDists; int order; }; } #endif clustalx-2.1/clustalW/tree/UPGMA/Node.cpp0000644000175000017500000000440611470725216020021 0ustar ddineenddineen#ifdef HAVE_CONFIG_H #include "config.h" #endif #include "Node.h" #include "../../general/VectorUtility.h" #include "../../general/debuglogObject.h" #include "../../general/clustalw.h" #include #include namespace clustalw { Node::Node(int _seqNum, double *dists, int numDist) : next(0), left(0), right(0), size(1), seqNum(_seqNum), height(0.0), ptrToDistMatRow(dists), minDist(numeric_limits::max()), indexToMinDist(-1), numDists(numDist), order(0) { allElements.resize(1); allElements[0] = seqNum; if (ptrToDistMatRow) { findMinDist(); } } void Node::merge(Node **rightNode, double _height) { left = new Node(*this); right = *rightNode; left->ptrToDistMatRow = 0; size = left->size + right->size; seqNum = -1; height = _height; left->height = height; right->height = height; vectorutils::mergeVectors(&allElements, &(right->allElements)); right->allElements.clear(); if (next == right) { next = right->next; } else { *rightNode = right->next; } } void Node::findMinDist() { double *distIterator = ptrToDistMatRow; double *minDistSoFar = distIterator++; // We search from the end of our area of the array for(int i = numDists; --i; distIterator++) // When --i gets to zero it will stop { if ((*distIterator >= 0) && (*distIterator < *minDistSoFar)) { minDistSoFar = distIterator; } } minDist = *minDistSoFar; indexToMinDist = minDistSoFar - ptrToDistMatRow; } void Node::printElements() { for(int i = 0; i < (int)allElements.size(); i++) { cout << " " << allElements[i]; } cout << "\n"; } string Node::elementsToString() { ostringstream elems; for(int i = 0; i < (int)allElements.size(); i++) { elems << " " << allElements[i]; } return elems.str(); } void Node::makeEmpty() { makeEmpty(this); } void Node::makeEmpty(Node* t) { if(t != 0) { makeEmpty(t->left); makeEmpty(t->right); delete t; } t = 0; } } clustalx-2.1/clustalW/tree/UPGMA/0000755000175000017500000000000011470725216016424 5ustar ddineenddineenclustalx-2.1/clustalW/tree/TreeInterface.h0000644000175000017500000002001611470725216020443 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function. * * */ #ifndef TREEINTERFACE_H #define TREEINTERFACE_H #include #include #include #include "AlignmentSteps.h" #include "../alignment/Alignment.h" #include "../general/clustalw.h" namespace clustalw { using namespace std; class Tree; class RootedGuideTree; class TreeInterface { public: /** * This function will be used to generate the phylogenetic tree from the distMat using * either UPGMA or NJ. It will then calculate the seqWeights and the steps. * Note the Distmat * will be in similarity form after this. It will no longer be distances!!!!! */ auto_ptr getWeightsAndStepsFromDistMat(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); /** * This will be called by sequencesAlignToProfile and QTcalcWeightsForLowScoreSeg */ void getWeightsFromDistMat(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); void getWeightsForQtLowScore(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); /** * This function will be called from doAlignUseOldTree */ auto_ptr getWeightsAndStepsFromTree(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int numSeqs, bool* success); /** * This function will be called from sequencesAlignToProfile, it doesnt calc the * steps. */ int getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success); /** * This function is used to generate 2 guide trees for the profile align part. * IT must be done on its own. It calls calcPairwiseForProfileAlign in MSA. * It is called by profileAlign and removeFirstIterate. */ void getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success); /** * This function is used to generate the guide tree from the distance matrix. * It doesnt return * any seqWeights or AlignmentSteps. It will be used by doGuideTreeOnly */ void generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); /** * This function is to simply to call either the UPGMA or NJ version of * this function, and print out all the trees. It will be called * from phylogeneticTree in the clustal class. */ void treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr); /** * This function will be used to bootstrap the tree and output the results. * It will use either * UPGMA or NJ for the bootstrapping, depending on what is selected. */ void bootstrapTree(TreeNames* treeNames, Alignment *alignPtr); private: auto_ptr getWeightsAndStepsFromDistMatNJ(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); auto_ptr getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success); int readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs); int getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success); void getWeightsFromDistMatNJ(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); void getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success); void generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); auto_ptr getWeightsAndStepsFromTreeNJ(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int numSeqs, bool* success); /** UPGMA functions */ auto_ptr getWeightsAndStepsFromDistMatUPGMA(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); auto_ptr generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); void getWeightsFromDistMatUPGMA(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success); void getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success); }; } #endif clustalx-2.1/clustalW/tree/TreeInterface.cpp0000644000175000017500000005205311470725216021004 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function. * Mark 22-5-2007: Made a change to getWeightsForQtLowScore * Mark 23-5-2007: Made a change to getWeightsForQtLowScore */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "TreeInterface.h" #include #include "UnRootedClusterTree.h" #include "Tree.h" #include "../multipleAlign/MSA.h" #include "../general/userparams.h" #include "../general/debuglogObject.h" #include "UPGMA/RootedGuideTree.h" #include "UPGMA/RootedClusterTree.h" namespace clustalw { /** * This will be called by align! * */ auto_ptr TreeInterface::getWeightsAndStepsFromDistMat(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("In getWeightsAndStepsFromDistMat\n"); } #endif if(userParameters->getClusterAlgorithm() == UPGMA) { return getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } else { return getWeightsAndStepsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } } /** * This will be called by sequencesAlignToProfile. This function will put the distMat into * a similarity matrix. */ void TreeInterface::getWeightsFromDistMat(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { if(userParameters->getClusterAlgorithm() == UPGMA) { getWeightsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } else { getWeightsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } } /** * This function will be called by profileAlign */ void TreeInterface::getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success) { if(userParameters->getClusterAlgorithm() == UPGMA) { getWeightsForProfileAlignUPGMA(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName, p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2, success); } else { getWeightsForProfileAlignNJ(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName, p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2, success); } } /** * This function will be called by doAlignUseOldGuideTree * */ auto_ptr TreeInterface::getWeightsAndStepsFromTree(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int numSeqs, bool* success) { /** * This will only use the NJ. It will not use UPGMA */ return getWeightsAndStepsFromTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, numSeqs, success); } /** * Called by sequencesAlignToProfile */ int TreeInterface::getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success) { /** * This will only use the NJ. It will not use UPGMA */ return getWeightsFromGuideTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs, success); } /** * This will be called by doGuideTreeOnly. This does not put the distMat into similarity. * */ void TreeInterface::generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { /** * This function does not put distMat into similarities */ if(userParameters->getClusterAlgorithm() == UPGMA) { RootedGuideTree guideTree; generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs, phylipName, success); } else { generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success); } } void TreeInterface::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr) { if(userParameters->getClusterAlgorithm() == UPGMA) { RootedClusterTree clusterTree; clusterTree.treeFromAlignment(treeNames, alignPtr); } else { UnRootedClusterTree clusterTree; clusterTree.treeFromAlignment(treeNames, alignPtr); } } void TreeInterface::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr) { /** * NOTE We only do bootstrapping on the NJ tree. */ UnRootedClusterTree clusterTree; clusterTree.bootstrapTree(treeNames, alignPtr); } /** * Private functions!!!! * */ /** ******************* * * Neighbour joining functions */ /** * Note: After this function has been called the distance matrix will all be in similarities. * */ auto_ptr TreeInterface::getWeightsAndStepsFromDistMatNJ(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { auto_ptr progSteps; generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success); progSteps = getWeightsAndStepsUseOldGuideTreeNJ(distMat, alignPtr, phylipName, seqWeights, seq1, nSeqs, success); return progSteps; } auto_ptr TreeInterface::getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success) { Tree groupTree; auto_ptr progSteps; if(nSeqs == 1) { utilityObject->info("Only 1 sequence, cannot do multiple alignment\n"); *success = false; return progSteps; } int status = 0; status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs); if(status == 0) { *success = false; return progSteps; } progSteps = groupTree.createSets(0, nSeqs); int _numSteps = progSteps->getNumSteps(); utilityObject->info("There are %d groups", _numSteps); // clear the memory used for the phylogenetic tree if (nSeqs >= 2) { groupTree.clearTree(NULL); } *success = true; return progSteps; } /** * The function readTreeAndCalcWeightsNJ is used to read in the tree given the treeName. * It then calls the appropriate functions to calc the seqWeights and make sure the matrix * is in similarity mode. */ int TreeInterface::readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs) { int status = 0; if (nSeqs >= 2) { status = groupTree->readTree(alignPtr, treeName->c_str(), fSeq - 1, nSeqs); if (status == 0) { return status; } } groupTree->calcSeqWeights(fSeq - 1, nSeqs, seqWeights); status = groupTree->calcSimilarities(alignPtr, distMat); return status; } int TreeInterface::getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success) { Tree groupTree; int status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs); if(status == 0) { *success = false; } else { *success = true; } return status; } void TreeInterface::getWeightsFromDistMatNJ(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int _nSeqs, string* phylipName, bool* success) { string copyOfPhylipName = string(*phylipName); int nSeqs = _nSeqs; int status = 0; generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success); Tree groupTree; status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, phylipName, seqWeights, seq1, nSeqs); if(status == 0) { *success = false; } else { *success = true; } } void TreeInterface::getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success) { if(!useTree1) { if (profile1NumSeqs >= 2) { generateTreeFromDistMatNJ(distMat, alignPtr, 1, profile1NumSeqs, p1TreeName, success); } } if(!useTree2) { if(numSeqs - profile1NumSeqs >= 2) { generateTreeFromDistMatNJ(distMat, alignPtr, profile1NumSeqs + 1, numSeqs - profile1NumSeqs, p2TreeName, success); } } if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) { *success = false; return; } // MSA->CALCPAIRWISE MSA* msaObj = new MSA(); int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat); if (count == 0) { *success = false; return; } Tree groupTree1, groupTree2; int status = 0; if (profile1NumSeqs >= 2) { status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs); if (status == 0) { *success = false; return; } } groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights); if (profile1NumSeqs >= 2) { groupTree1.clearTree(NULL); } if (numSeqs - profile1NumSeqs >= 2) { status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs); if (status == 0) { *success = false; return; } } groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights); /* clear the memory for the phylogenetic tree */ if (numSeqs - profile1NumSeqs >= 2) { groupTree2.clearTree(NULL); } /** * Convert distances to similarities!!!!!! */ for (int i = 1; i < numSeqs; i++) { for (int j = i + 1; j <= numSeqs; j++) { (*distMat)(i, j) = 100.0 - (*distMat)(i, j) * 100.0; (*distMat)(j, i) = (*distMat)(i, j); } } *success = true; } void TreeInterface::generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { string copyOfPhylipName = string(*phylipName); if (nSeqs >= 2) { UnRootedClusterTree* clusterTree = new UnRootedClusterTree; clusterTree->treeFromDistMatrix(distMat, alignPtr, seq1, nSeqs, copyOfPhylipName); *phylipName = copyOfPhylipName; // AW: message outputted by OutputFile function // utilityObject->info("Guide tree file created: [%s]", // phylipName->c_str()); delete clusterTree; } *success = true; } auto_ptr TreeInterface::getWeightsAndStepsFromTreeNJ(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int numSeqs, bool* success) { auto_ptr progSteps; Tree groupTree; if(numSeqs == 1) { utilityObject->info("Only 1 sequence, cannot do multiple alignment\n"); *success = false; return progSteps; } int status; status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, seqWeights, fSeq, numSeqs); if (status == 0) { *success = false; return progSteps; } progSteps = groupTree.createSets(0, numSeqs); int _numSteps = progSteps->getNumSteps(); utilityObject->info("There are %d groups", _numSteps); // clear the memory used for the phylogenetic tree if (numSeqs >= 2) { groupTree.clearTree(NULL); } *success = true; return progSteps; } /** * UPGMA functions * */ auto_ptr TreeInterface::getWeightsAndStepsFromDistMatUPGMA(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { auto_ptr progSteps; RootedGuideTree guideTree; progSteps = generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs, phylipName, success); guideTree.calcSeqWeights(0, nSeqs, seqWeights); distMat->makeSimilarityMatrix(); return progSteps; } auto_ptr TreeInterface::generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { auto_ptr progSteps; string copyOfPhylipName = string(*phylipName); if (nSeqs >= 2) { RootedClusterTree clusterTree; progSteps = clusterTree.treeFromDistMatrix(guideTree, distMat, alignPtr, seq1, nSeqs, copyOfPhylipName); *phylipName = copyOfPhylipName; // AW: message outputted by OutputFile function // utilityObject->info("Guide tree file created: [%s]", // phylipName->c_str()); } *success = true; return progSteps; } void TreeInterface::getWeightsFromDistMatUPGMA(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } /** * The function getWeightsForProfileAlignUPGMA is used to generate the sequence weights * that will be used in a profile alignment. It also recalculates the distance matrix, and * then returns it as a similarity matrix. This function uses the NJ code to read in a tree * from a file if we are using a previous tree. This is because that part of the code is able * to do the finding of the root etc. */ void TreeInterface::getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success) { int status = 0; if(useTree1) { // Use the code to read in the tree and get the seqWeights Tree groupTree1; if (profile1NumSeqs >= 2) { status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs); if (status == 0) { *success = false; return; } } groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights); if (profile1NumSeqs >= 2) { groupTree1.clearTree(NULL); } } else { if (profile1NumSeqs >= 2) { RootedGuideTree guideTree; generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, 1, profile1NumSeqs, p1TreeName, success); guideTree.calcSeqWeights(0, profile1NumSeqs, p1Weights); } } if(useTree2) { Tree groupTree2; if (numSeqs - profile1NumSeqs >= 2) { status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs); if (status == 0) { *success = false; return; } } groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights); if (numSeqs - profile1NumSeqs >= 2) { groupTree2.clearTree(NULL); } } else { if(numSeqs - profile1NumSeqs >= 2) { RootedGuideTree guideTree; generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, profile1NumSeqs + 1, numSeqs - profile1NumSeqs, p2TreeName, success); guideTree.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights); } } if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) { *success = false; return; } MSA* msaObj = new MSA(); int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat); delete msaObj; if (count == 0) { *success = false; return; } /** * Convert distances to similarities!!!!!! */ distMat->makeSimilarityMatrix(); *success = true; } /** * Mark 10-5-2007: Bug fix # 42 * The following function is to be used only for calculating the weights for the Qt * low scoring segments. */ void TreeInterface::getWeightsForQtLowScore(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { string copyOfPhylipName = string(*phylipName); int _nSeqs = nSeqs; int status = 0; generateTreeFromDistMatNJ(distMat, alignPtr, seq1, _nSeqs, phylipName, success); Tree groupTree; status = 0; if (_nSeqs >= 2) { status = groupTree.readTree(alignPtr, phylipName->c_str(), seq1 - 1, seq1 + _nSeqs-1); // mark 22-5-07 if(status == 0) { *success = false; return; } else { *success = true; } } groupTree.calcSeqWeights(seq1 - 1, seq1 + _nSeqs - 1, seqWeights); // mark 23-5-07 } } clustalx-2.1/clustalW/tree/Tree.h0000644000175000017500000000424011470725216016623 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef TREE_H #define TREE_H #include #include #include #include #include "../alignment/Alignment.h" #include "AlignmentSteps.h" #include namespace clustalw { class Tree { public: /* Functions */ void calcSeqWeights(int firstSeq, int lastSeq, vector* sweight); int readTree(Alignment* alignPtr, const string& treeFileName, int firstSeq, int lastSeq); auto_ptr createSets(int firstSeq, int lastSeq); int calcSimilarities(Alignment* alignPtr, DistMatrix* distMat); void clearTree(TreeNode* p); /* Attributes */ private: /* Functions */ void createTree(TreeNode* ptree, TreeNode* parent, ifstream* file); void createNode(TreeNode* pptr, TreeNode* parent); TreeNode* insertNode(TreeNode* pptr); void clearTreeNodes(TreeNode* p); TreeNode* reRoot(TreeNode* ptree, int nseqs); TreeNode* insertRoot(TreeNode* p, float diff); float calcRootMean(TreeNode* root, float *maxDist); float calcMean(TreeNode* nptr, float *maxDist, int nSeqs); void orderNodes(); int calcWeight(int leaf); void skipSpace(ifstream* file); void groupSeqs(TreeNode* p, int *nextGroups, int nSeqs, AlignmentSteps* stepsPtr); void markGroup1(TreeNode* p, int *groups, int n); void markGroup2(TreeNode* p, int *groups, int n); TreeNode* avail(); void setInfo(TreeNode* p, TreeNode* parent, int pleaf, string pname, float pdist); void debugPrintAllNodes(int nseqs); /* Attributes */ AlignmentSteps progSteps; char charFromFile; ifstream file; TreeNode** lptr; TreeNode** olptr; TreeNode** nptr; TreeNode** ptrs; int nnodes; int ntotal; bool rootedTree; TreeNode* seqTree; TreeNode* root; int* groups; int numSeq; int numSets; const static int MAXERRS = 10; }; } #endif clustalx-2.1/clustalW/tree/Tree.cpp0000644000175000017500000006535111470725216017170 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "Tree.h" namespace clustalw { /** * * @param firstSeq * @param lastSeq * @param sweight */ void Tree::calcSeqWeights(int firstSeq, int lastSeq, vector* sweight) { if((int)sweight->size() < lastSeq - 1) { sweight->resize(lastSeq - 1); } int i, _nSeqs; int temp, sum; int *weight; /* * If there are more than three sequences.... */ _nSeqs = lastSeq - firstSeq; if ((_nSeqs >= 2) && (clustalw::userParameters->getDistanceTree() == true) && (clustalw::userParameters->getNoWeights() == false)) { /* * Calculate sequence weights based on Phylip tree. */ weight = new int[lastSeq + 1]; for (i = firstSeq; i < lastSeq; i++) { weight[i] = calcWeight(i); } /* * Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR */ sum = 0; for (i = firstSeq; i < lastSeq; i++) { sum += weight[i]; } if (sum == 0) { for (i = firstSeq; i < lastSeq; i++) { weight[i] = 1; } sum = i; } for (i = firstSeq; i < lastSeq; i++) { (*sweight)[i] = (weight[i] * INT_SCALE_FACTOR) / sum; if ((*sweight)[i] < 1) { (*sweight)[i] = 1; } } delete []weight; weight = NULL; } else { /* * Otherwise, use identity weights. */ temp = INT_SCALE_FACTOR / _nSeqs; // AW 2009-05-28: goes wrong if we have more than // INT_SCALE_FACTOR seqs. if so, set to 1, just as above if (temp < 1) temp = 1; for (i = firstSeq; i < lastSeq; i++) { (*sweight)[i] = temp; } } } /** * * @param alignPtr * @param treeFileName * @param firstSeq * @param lastSeq * @return */ int Tree::readTree(clustalw::Alignment* alignPtr, const string& treeFileName, int firstSeq, int lastSeq) { if(alignPtr == NULL || firstSeq < 0 || lastSeq < 1) { return 0; } char c; string name1 = "", name2 = ""; int i, j, k; bool found; numSeq = 0; nnodes = 0; ntotal = 0; rootedTree = true; // Need to check what happens if I try to open a file that doesnt exist! if(treeFileName.empty()) { return 0; // Cannot open, empty string! } else { file.open(treeFileName.c_str(), ifstream::in); if(!file.is_open()) { clustalw::utilityObject->error("Cannot open output file [%s]\n", treeFileName.c_str()); } } skipSpace(&file); charFromFile = file.get(); if (charFromFile != '(') { clustalw::utilityObject->error("Wrong format in tree file %s\n", treeFileName.c_str()); return 0; } file.seekg(0, ios::beg); // Put get pointer back to the begining! clustalw::userParameters->setDistanceTree(true); // Allocate memory for tree // Allocate memory. nptr = new TreeNode*[3 * (lastSeq - firstSeq + 1)]; ptrs = new TreeNode*[3 * (lastSeq - firstSeq + 1)]; lptr = new TreeNode*[lastSeq - firstSeq + 1]; olptr = new TreeNode*[lastSeq + 1]; seqTree = avail(); setInfo(seqTree, NULL, 0, string(""), 0.0); createTree(seqTree, NULL, &file); file.close(); if (numSeq != lastSeq - firstSeq) { stringstream ss; ss << "tree not compatible with alignment\n(" << lastSeq - firstSeq << " sequences in alignment and "<< numSeq <<" in tree\n"; string errorMes; ss >> errorMes; clustalw::utilityObject->error(errorMes.c_str()); return 0; } // If the tree is unrooted, reroot the tree - ie. minimise the difference // between the mean root->leaf distances for the left and right branches of // the tree. if (clustalw::userParameters->getDistanceTree() == false) { if (rootedTree == false) { clustalw::utilityObject->error("input tree is unrooted and has no distances.\nCannot align sequences"); return 0; } } if (rootedTree == false) { root = reRoot(seqTree, lastSeq - firstSeq + 1); } else { root = seqTree; } // calculate the 'order' of each node. int nameLength; string nameSeq; orderNodes(); if (numSeq >= 2) { // If there are more than three sequences.... // assign the sequence nodes (in the same order as in the alignment file) for (i = firstSeq; i < lastSeq; i++) { nameLength = alignPtr->getName(i + 1).length(); nameSeq = alignPtr->getName(i + 1); name1 = ""; name2 = ""; if (nameLength > clustalw::MAXNAMES) { stringstream ss; ss << "name " << nameSeq << " is too long for PHYLIP tree format (max " << clustalw::MAXNAMES << " chars)"; string msg; ss >> msg; clustalw::utilityObject->warning(msg.c_str());// Mark change 17-5-07 } for (k = 0; k < nameLength && k < clustalw::MAXNAMES; k++) { c = nameSeq[k]; if ((c > 0x40) && (c < 0x5b)) { c = c | 0x20; } if (c == ' ') { c = '_'; } name2 += c; } found = false; for (j = 0; j < numSeq; j++) { name1 = ""; for (k = 0; k < (int)lptr[j]->name.length() && k < clustalw::MAXNAMES; k++) { c = lptr[j]->name[k]; if ((c > 0x40) && (c < 0x5b)) { c = c | 0x20; } name1 += c; } if (name1.compare(name2) == 0) { olptr[i] = lptr[j]; found = true; } } if (found == false) { utilityObject->error("tree not compatible with alignment:\n %s not found\n", name2.c_str()); return 0; } } } return (1); } /** * * @param firstSeq * @param lastSeq * @return */ auto_ptr Tree::createSets(int firstSeq, int lastSeq) { auto_ptr progAlignSteps; progAlignSteps.reset(new AlignmentSteps); int i, j, _nSeqs; numSets = 0; _nSeqs = lastSeq - firstSeq; if (_nSeqs >= 2) { // If there are more than three sequences.... groups = new int[_nSeqs + 1]; groupSeqs(root, groups, _nSeqs, progAlignSteps.get()); delete []groups; groups = NULL; } else { groups = new int[_nSeqs + 1]; for (i = 0; i < _nSeqs - 1; i++) { for (j = 0; j < _nSeqs; j++) if (j <= i) { groups[j] = 1; } else if (j == i + 1) { groups[j] = 2; } else { groups[j] = 0; } progAlignSteps->saveSet(_nSeqs, groups); } delete []groups; groups = NULL; } return progAlignSteps; } /** * calcSimilarities changes the distMat. * @param alignPtr * @param distMat * @return */ int Tree::calcSimilarities(clustalw::Alignment* alignPtr, clustalw::DistMatrix* distMat) { int depth = 0, i, j, k, n; bool found; int nerrs, seq1[MAXERRS], seq2[MAXERRS]; TreeNode *p, **pathToRoot; float dist; float *distToNode, badDist[MAXERRS]; double **dmat; ostringstream err1; char reply; int nSeqs = alignPtr->getNumSeqs(); pathToRoot = new TreeNode*[nSeqs]; distToNode = new float[nSeqs]; dmat = new double*[nSeqs]; for (i = 0; i < nSeqs; i++) { dmat[i] = new double[nSeqs]; } if (nSeqs >= 2) { /* * for each leaf, determine all nodes between the leaf and the root; */ for (i = 0; i < nSeqs; i++) { depth = 0;dist = 0.0; p = olptr[i]; while (p != NULL) { pathToRoot[depth] = p; dist += p->dist; distToNode[depth] = dist; p = p->parent; depth++; } /* * for each pair.... */ for (j = 0; j < i; j++) { p = olptr[j]; dist = 0.0; /* * find the common ancestor. */ found = false; n = 0; while ((found == false) && (p->parent != NULL)) { for (k = 0; k < depth; k++) if (p->parent == pathToRoot[k]) { found = true; n = k; } dist += p->dist; p = p->parent; } dmat[i][j] = dist + distToNode[n - 1]; } } nerrs = 0; for (i = 0; i < nSeqs; i++) { dmat[i][i] = 0.0; for (j = 0; j < i; j++) { if (dmat[i][j] < 0.01) { dmat[i][j] = 0.01; } if (dmat[i][j] > 1.0) { if (dmat[i][j] > 1.1 && nerrs < MAXERRS) { seq1[nerrs] = i; seq2[nerrs] = j; badDist[nerrs] = dmat[i][j]; nerrs++; } dmat[i][j] = 1.0; } } } if (nerrs > 0 && !userParameters->getGui()) { string errMess = "The following sequences are too divergent to be aligned:\n"; for (i = 0; i < nerrs && i < 5; i++) { err1 << " " << alignPtr->getName(seq1[i] + 1) << " and " << alignPtr->getName(seq2[i] + 1) << " (distance " << setprecision(3) << badDist[i] << ")\n"; } errMess += err1.str(); errMess += "(All distances should be between 0.0 and 1.0)\n"; errMess += "This may not be fatal but you have been warned!\n"; errMess += "SUGGESTION: Remove one or more problem sequences and try again"; if (clustalw::userParameters->getInteractive()) { reply = clustalw::utilityObject->promptForYesNo(errMess.c_str(), "Continue "); } else { reply = 'y'; } if ((reply != 'y') && (reply != 'Y')) { return 0; } } } else { for (i = 0; i < nSeqs; i++) { for (j = 0; j < i; j++) { dmat[i][j] = (*distMat)(i + 1, j + 1); } } } delete []pathToRoot; delete []distToNode; double value; for (i = 0; i < nSeqs; i++) { distMat->SetAt(i + 1, i + 1, 0.0); for (j = 0; j < i; j++) { value = 100.0 - (dmat[i][j]) * 100.0; distMat->SetAt(i + 1, j + 1, value); distMat->SetAt(j + 1, i + 1, value); } } for (i = 0; i < nSeqs; i++) { delete [] dmat[i]; } delete []dmat; return 1; } /** ************************************************************************* * Private functions!!!!!!!!!!!!!!! * ****************************************************************************/ /** * * @param ptree * @param parent * @param file */ void Tree::createTree(clustalw::TreeNode* ptree, clustalw::TreeNode* parent, ifstream* file) { TreeNode* p; int i, type; float dist; string name; // is this a node or a leaf ? skipSpace(file); charFromFile = file->get(); if (charFromFile == '(') { // this must be a node.... type = NODE; name = ""; ptrs[ntotal] = nptr[nnodes] = ptree; nnodes++; ntotal++; createNode(ptree, parent); p = ptree->left; createTree(p, ptree, file); if (charFromFile == ',') { p = ptree->right; createTree(p, ptree, file); if (charFromFile == ',') { ptree = insertNode(ptree); ptrs[ntotal] = nptr[nnodes] = ptree; nnodes++; ntotal++; p = ptree->right; createTree(p, ptree, file); rootedTree = false; } } skipSpace(file); charFromFile = file->get(); } // ...otherwise, this is a leaf else { type = LEAF; ptrs[ntotal++] = lptr[numSeq++] = ptree; // get the sequence name name = ""; name += charFromFile; charFromFile = file->get(); i = 1; while ((charFromFile != ':') && (charFromFile != ',') && (charFromFile != ')')) { if (i < MAXNAMES) { name += charFromFile; i++; } charFromFile = file->get(); } if (charFromFile != ':') { clustalw::userParameters->setDistanceTree(false); dist = 0.0; } } // get the distance information dist = 0.0; if (charFromFile == ':') { skipSpace(file); (*file) >> dist; skipSpace(file); charFromFile = file->get(); } setInfo(ptree, parent, type, name, dist); } /** * * @param pptr * @param parent */ void Tree::createNode(TreeNode* pptr, TreeNode* parent) { TreeNode* t; pptr->parent = parent; t = avail(); pptr->left = t; t = avail(); pptr->right = t; } /** * * @param pptr * @return */ TreeNode* Tree::insertNode(TreeNode* pptr) { TreeNode* newnode; newnode = avail(); createNode(newnode, pptr->parent); newnode->left = pptr; pptr->parent = newnode; setInfo(newnode, pptr->parent, NODE, "", 0.0); return newnode; } /** * * @param p */ void Tree::clearTree(clustalw::TreeNode* p) { clearTreeNodes(p); delete [] nptr; nptr = NULL; delete [] ptrs; ptrs = NULL; delete [] lptr; lptr = NULL; delete [] olptr; olptr = NULL; } /** * * @param p */ void Tree::clearTreeNodes(clustalw::TreeNode* p) { if (p == NULL) { p = root; } if (p->left != NULL) { clearTreeNodes(p->left); } if (p->right != NULL) { clearTreeNodes(p->right); } p->left = NULL; p->right = NULL; delete p; p = NULL; } /** * * @param * @param * @return */ void Tree::debugPrintAllNodes(int nseqs) { clustalw::TreeNode *p; int i; float diff, maxDist; cerr << "\nDEBUG: reportAllNodes\n"; for (i = 0; i < ntotal; i++) { p = ptrs[i]; // ios::sync_with_stdio(); // same design as TreeNode if (p->parent == NULL) diff = calcRootMean(p, &maxDist); else diff = calcMean(p, &maxDist, nseqs); fprintf(stdout, "i=%d p=%p: parent=%p left=%p right=%p dist=%f diff=%f\n", i, (void*)p, (void*)p->parent, (void*)p->left, (void*)p->right, p->dist, diff); } } /** * * @param ptree * @param nseqs * @return */ clustalw::TreeNode* Tree::reRoot(clustalw::TreeNode* ptree, int nseqs) { clustalw::TreeNode *p, *rootNode, *rootPtr; float diff, minDiff = 0.0, minDepth = 1.0, maxDist; int i; bool first = true; // find the difference between the means of leaf->node // distances on the left and on the right of each node rootPtr = ptree; for (i = 0; i < ntotal; i++) { p = ptrs[i]; if (p->parent == NULL) { /* AW Bug 94: p->parent must be chosen as rootNode (non-optimized executables (-O0) never do), otherwise insertRoot fails. Is p->parent == NULL valid at all? Simplest thing for now is to continue here. Tree code needs serious dismantling anyway. See debugPrintAllNodes */ continue; //diff = calcRootMean(p, &maxDist); } else { diff = calcMean(p, &maxDist, nseqs); } if ((diff == 0) || ((diff > 0) && (diff < 2 *p->dist))) { if ((maxDist < minDepth) || (first == true)) { first = false; rootPtr = p; minDepth = maxDist; minDiff = diff; } } } // insert a new node as the ancestor of the node which produces the shallowest // tree. /* AW Bug 94: could also be prevented here */ if (rootPtr == ptree) { minDiff = rootPtr->left->dist + rootPtr->right->dist; rootPtr = rootPtr->right; } rootNode = insertRoot(rootPtr, minDiff); diff = calcRootMean(rootNode, &maxDist); return rootNode; } /** * * @param p * @param diff * @return */ clustalw::TreeNode* Tree::insertRoot(clustalw::TreeNode* p, float diff) { clustalw::TreeNode *newp, *prev, *q, *t; float dist, prevDist, td; newp = avail(); if (p->parent==NULL) { // AW bug 94: question remains if access here should be handled differently cerr << "\n\n*** INTERNAL ERROR: Tree::insertRoot: TreeNode p->parent is NULL\n"; cerr << "To help us fix this bug, please send sequence file and used options to clustalw@ucd.ie\n"; exit(1); } t = p->parent; prevDist = t->dist; p->parent = newp; dist = p->dist; p->dist = diff / 2; if (p->dist < 0.0) { p->dist = 0.0; } if (p->dist > dist) { p->dist = dist; } t->dist = dist - p->dist; newp->left = t; newp->right = p; newp->parent = NULL; newp->dist = 0.0; newp->leaf = NODE; if (t->left == p) { t->left = t->parent; } else { t->right = t->parent; } prev = t; q = t->parent; t->parent = newp; while (q != NULL) { if (q->left == prev) { q->left = q->parent; q->parent = prev; td = q->dist; q->dist = prevDist; prevDist = td; prev = q; q = q->left; } else { q->right = q->parent; q->parent = prev; td = q->dist; q->dist = prevDist; prevDist = td; prev = q; q = q->right; } } /* * remove the old root node */ q = prev; if (q->left == NULL) { dist = q->dist; q = q->right; q->dist += dist; q->parent = prev->parent; if (prev->parent->left == prev) { prev->parent->left = q; } else { prev->parent->right = q; } prev->right = NULL; } else { dist = q->dist; q = q->left; q->dist += dist; q->parent = prev->parent; if (prev->parent->left == prev) { prev->parent->left = q; } else { prev->parent->right = q; } prev->left = NULL; } return (newp); } /** * * @param root * @param maxDist * @return */ float Tree::calcRootMean(clustalw::TreeNode* root, float *maxDist) { float dist, leftSum = 0.0, rightSum = 0.0, leftMean, rightMean, diff; clustalw::TreeNode* p; int i; int numLeft, numRight; int direction; // for each leaf, determine whether the leaf is left or right of the root. dist = (*maxDist) = 0; numLeft = numRight = 0; for (i = 0; i < numSeq; i++) { p = lptr[i]; dist = 0.0; while (p->parent != root) { dist += p->dist; p = p->parent; } if (p == root->left) { direction = LEFT; } else { direction = RIGHT; } dist += p->dist; if (direction == LEFT) { leftSum += dist; numLeft++; } else { rightSum += dist; numRight++; } if (dist > (*maxDist)) { *maxDist = dist; } } leftMean = leftSum / numLeft; rightMean = rightSum / numRight; diff = leftMean - rightMean; return diff; } /** * * @param nptr * @param maxDist * @param nSeqs * @return */ float Tree::calcMean(clustalw::TreeNode* nptr, float *maxDist, int nSeqs) { float dist, leftSum = 0.0, rightSum = 0.0, leftMean, rightMean, diff; clustalw::TreeNode* p; clustalw::TreeNode** pathToRoot; float *distToNode; int depth = 0, i, j, n = 0; int numLeft, numRight; int direction; bool found; pathToRoot = new clustalw::TreeNode*[nSeqs]; distToNode = new float[nSeqs]; // determine all nodes between the selected node and the root; depth = 0; (*maxDist) = dist = 0.0; numLeft = numRight = 0; p = nptr; while (p != NULL) { pathToRoot[depth] = p; dist += p->dist; distToNode[depth] = dist; p = p->parent; depth++; } // for each leaf, determine whether the leaf is left or right of the node. // (RIGHT = descendant, LEFT = not descendant) for (i = 0; i < numSeq; i++) { p = lptr[i]; if (p == nptr) { direction = RIGHT; dist = 0.0; } else { direction = LEFT; dist = 0.0; // find the common ancestor. found = false; n = 0; while ((found == false) && (p->parent != NULL)) { for (j = 0; j < depth; j++) if (p->parent == pathToRoot[j]) { found = true; n = j; } dist += p->dist; p = p->parent; } if (p == nptr) { direction = RIGHT; } } if (direction == LEFT) { leftSum += dist; leftSum += distToNode[n - 1]; numLeft++; } else { rightSum += dist; numRight++; } if (dist > (*maxDist)) { *maxDist = dist; } } delete [] distToNode; distToNode = NULL; delete [] pathToRoot; pathToRoot = NULL; leftMean = leftSum / numLeft; rightMean = rightSum / numRight; diff = leftMean - rightMean; return (diff); } /** * */ void Tree::orderNodes() { int i; clustalw::TreeNode* p; for (i = 0; i < numSeq; i++) { p = lptr[i]; while (p != NULL) { p->order++; p = p->parent; } } } /** * * @param leaf * @return */ int Tree::calcWeight(int leaf) { clustalw::TreeNode* p; float weight = 0.0; p = olptr[leaf]; while (p->parent != NULL) { weight += p->dist / p->order; p = p->parent; } weight *= 100.0; return ((int)weight); } /** * skipSpace is used to skip all the spaces at the begining of a file. The next read * will give a character other than a space. * @param file */ void Tree::skipSpace(ifstream* file) { char c; do { c = file->get(); } while (isspace(c)); file->putback(c); } /** * * @param p * @param nextGroups * @param nSeqs * @param stepsPtr */ void Tree::groupSeqs(clustalw::TreeNode* p, int *nextGroups, int nSeqs, AlignmentSteps* stepsPtr) { int i; int *tmpGroups; tmpGroups = new int[nSeqs + 1]; for (i = 0; i < nSeqs; i++) { tmpGroups[i] = 0; } if (p->left != NULL) { if (p->left->leaf == NODE) { groupSeqs(p->left, nextGroups, nSeqs, stepsPtr); for (i = 0; i < nSeqs; i++) if (nextGroups[i] != 0) { tmpGroups[i] = 1; } } else { markGroup1(p->left, tmpGroups, nSeqs); } } if (p->right != NULL) { if (p->right->leaf == NODE) { groupSeqs(p->right, nextGroups, nSeqs, stepsPtr); for (i = 0; i < nSeqs; i++) if (nextGroups[i] != 0) { tmpGroups[i] = 2; } } else { markGroup2(p->right, tmpGroups, nSeqs); } stepsPtr->saveSet(nSeqs, tmpGroups); } for (i = 0; i < nSeqs; i++) { nextGroups[i] = tmpGroups[i]; } delete [] tmpGroups; tmpGroups = NULL; } /** * * @param p * @param groups * @param n */ void Tree::markGroup1(clustalw::TreeNode* p, int *groups, int n) { int i; for (i = 0; i < n; i++) { if (olptr[i] == p) { groups[i] = 1; } else { groups[i] = 0; } } } /** * * @param p * @param groups * @param n */ void Tree::markGroup2(clustalw::TreeNode* p, int *groups, int n) { int i; for (i = 0; i < n; i++) { if (olptr[i] == p) { groups[i] = 2; } else if (groups[i] != 0) { groups[i] = 1; } } } /** * * @return */ clustalw::TreeNode* Tree::avail() { clustalw::TreeNode* p; p = new TreeNode; p->left = NULL; p->right = NULL; p->parent = NULL; p->dist = 0.0; p->leaf = 0; p->order = 0; return (p); } /** * * @param p * @param parent * @param pleaf * @param pname * @param pdist */ void Tree::setInfo(TreeNode* p, TreeNode* parent, int pleaf, string pname, float pdist) { p->parent = parent; p->leaf = pleaf; p->dist = pdist; p->order = 0; p->name = pname; if (p->leaf == true) { p->left = NULL; p->right = NULL; } } } clustalx-2.1/clustalW/tree/RandomGenerator.h0000644000175000017500000000144211470725216021014 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /* * * RandomGenerator * * -linear and additive congruential random number generators * (see R. Sedgewick, Algorithms, Chapter 35) * * Implementation: R. Fuchs, EMBL Data Library, 1991 * */ #ifndef RANDOMGENERATOR_H #define RANDOMGENERATOR_H namespace clustalw { class RandomGenerator { public: /* Functions */ RandomGenerator(unsigned long s); unsigned long addRand(unsigned long r); /* Attributes */ private: /* Functions */ unsigned long mult(unsigned long p, unsigned long q); /* Attributes */ unsigned long j; unsigned long a[55]; unsigned long m; unsigned long m1; }; } #endif clustalx-2.1/clustalW/tree/RandomGenerator.cpp0000644000175000017500000000213011470725216021342 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "RandomGenerator.h" namespace clustalw { /** * The contructor initialises the random algorithm. * @param s * @return */ RandomGenerator::RandomGenerator(unsigned long s) : m(100000000), m1(10000) { a[0] = s; j = 0; do { ++j; a[j] = (mult(31, a[j - 1]) + 1) % m; } while (j < 54); } /** * additive congruential method. * @param r * @return unsigned long random number in the range 0 to r-1 */ unsigned long RandomGenerator::addRand(unsigned long r) { int x, y; j = (j + 1) % 55; x = (j + 23) % 55; y = (j + 54) % 55; a[j] = (a[x] + a[y]) % m; return (((a[j] / m1) *r) / m1); } /** * * @param p * @param q * @return */ unsigned long RandomGenerator::mult(unsigned long p, unsigned long q) { unsigned long p1, p0, q1, q0; p1 = p / m1; p0 = p % m1; q1 = q / m1; q0 = q % m1; return ((((p0 *q1 + p1 * q0) % m1) *m1 + p0 * q0) % m); } } clustalx-2.1/clustalW/tree/NJTree.h0000644000175000017500000000227011470725216017054 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef NJTREE_H #define NJTREE_H #include #include #include "ClusterTreeAlgorithm.h" #include "../general/userparams.h" namespace clustalw { class NJTree : public ClusterTreeAlgorithm { public: NJTree(): verbose(false){}; virtual ~NJTree(){}; /** calculate an NJ tree * * @param phyTree the tree structure * @param distMat distance matrix * @param seqInfo holding sequence number info * @param log ofstream to log info to (used by -outputtree) * */ virtual void generateTree(clustalw::PhyloTree* phyTree, clustalw::DistMatrix* distMat, clustalw::SeqInfo* seqInfo, ofstream* log = 0); /** be verbose during tree generation * * if set to true, generateTree will need a log ofstream */ virtual void setVerbose(bool choice){verbose = choice;}; private: vector av; vector tkill; bool verbose; }; } #endif clustalx-2.1/clustalW/tree/NJTree.cpp0000644000175000017500000006253611470725216017422 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "NJTree.h" namespace clustalw { /**************************************************************************** * [ Improvement ideas in fast_nj_tree() ] by DDBJ & FUJITSU Limited. * written by Tadashi Koike * (takoike@genes.nig.ac.jp) ******************* * : Store the value of sum of the score to temporary array, * and use again and again. * * In the main cycle, these are calculated again and again : * diq = sum of distMat[n][ii] (n:1 to lastSeq-firstSeq+1), * djq = sum of distMat[n][jj] (n:1 to lastSeq-firstSeq+1), * dio = sum of distMat[n][mini] (n:1 to lastSeq-firstSeq+1), * djq = sum of distMat[n][minj] (n:1 to lastSeq-firstSeq+1) * // 'lastSeq' and 'firstSeq' are both constant values // * and the result of above calculations is always same until * a best pair of neighbour nodes is joined. * * So, we change the logic to calculate the sum[i] (=sum of distMat[n][i] * (n:1 to lastSeq-firstSeq+1)) and store it to array, before * beginning to find a best pair of neighbour nodes, and after that * we use them again and again. * * tmat[i][j] * 1 2 3 4 5 * +---+---+---+---+---+ * 1 | | | | | | * +---+---+---+---+---+ * 2 | | | | | | 1) calculate sum of distMat[n][i] * +---+---+---+---+---+ (n: 1 to lastSeq-firstSeq+1) * 3 | | | | | | 2) store that sum value to sum[i] * +---+---+---+---+---+ * 4 | | | | | | 3) use sum[i] during finding a best * +---+---+---+---+---+ pair of neibour nodes. * 5 | | | | | | * +---+---+---+---+---+ * | | | | | * V V V V V Calculate sum , and store it to sum[i] * +---+---+---+---+---+ * sum[i] | | | | | | * +---+---+---+---+---+ * * At this time, we thought that we use upper triangle of the matrix * because distMat[i][j] is equal to distMat[j][i] and distMat[i][i] is equal * to zero. Therefore, we prepared sum_rows[i] and sum_cols[i] instead * of sum[i] for storing the sum value. * * distMat[i][j] * 1 2 3 4 5 sum_cols[i] * +---+---+---+---+---+ +---+ * 1 | # | # | # | # | --> | | ... sum of distMat[1][2..5] * + - +---+---+---+---+ +---+ * 2 | # | # | # | --> | | ... sum of distMat[2][3..5] * + - + - +---+---+---+ +---+ * 3 | # | # | --> | | ... sum of distMat[3][4..5] * + - + - + - +---+---+ +---+ * 4 | # | --> | | ... sum of distMat[4][5] * + - + - + - + - +---+ +---+ * 5 | --> | | ... zero * + - + - + - + - + - + +---+ * | | | | | * V V V V V Calculate sum , sotre to sum[i] * +---+---+---+---+---+ * sum_rows[i] | | | | | | * +---+---+---+---+---+ * | | | | | * | | | | +----- sum of distMat[1..4][5] * | | | +--------- sum of distMat[1..3][4] * | | +------------- sum of distMat[1..2][3] * | +----------------- sum of distMat[1][2] * +--------------------- zero * * And we use (sum_rows[i] + sum_cols[i]) instead of sum[i]. * ******************* * : We manage valid nodes with chain list, instead of * tkill[i] flag array. * * In original logic, invalid(killed?) nodes after nodes-joining * are managed with tkill[i] flag array (set to 1 when killed). * By this method, it is conspicuous to try next node but skip it * at the latter of finding a best pair of neighbor nodes. * * So, we thought that we managed valid nodes by using a chain list * as below: * * 1) declare the list structure. * struct { * sint n; // entry number of node. * void *prev; // pointer to previous entry. * void *next; // pointer to next entry. * } * 2) construct a valid node list. * * +-----+ +-----+ +-----+ +-----+ +-----+ * NULL<-|prev |<---|prev |<---|prev |<---|prev |<- - - -|prev | * | 0 | | 1 | | 2 | | 3 | | n | * | next|--->| next|--->| next|--->| next|- - - ->| next|->NULL * +-----+ +-----+ +-----+ +-----+ +-----+ * * 3) when finding a best pair of neighbor nodes, we use * this chain list as loop counter. * * 4) If an entry was killed by node-joining, this chain list is * modified to remove that entry. * * EX) remove the entry No 2. * +-----+ +-----+ +-----+ +-----+ * NULL<-|prev |<---|prev |<--------------|prev |<- - - -|prev | * | 0 | | 1 | | 3 | | n | * | next|--->| next|-------------->| next|- - - ->| next|->NULL * +-----+ +-----+ +-----+ +-----+ * +-----+ * NULL<-|prev | * | 2 | * | next|->NULL * +-----+ * * By this method, speed is up at the latter of finding a best pair of * neighbor nodes. * ******************* * : Cut the frequency of division. * * At comparison between 'total' and 'tmin' in the main cycle, total is * divided by (2.0*fnseqs2) before comparison. If N nodes are available, * that division happen (N*(N-1))/2 order. * * We thought that the comparison relation between tmin and total/(2.0*fnseqs2) * is equal to the comparison relation between (tmin*2.0*fnseqs2) and total. * Calculation of (tmin*2.0*fnseqs2) is only one time. so we stop dividing * a total value and multiply tmin and (tmin*2.0*fnseqs2) instead. * ******************* * : some transformation of the equation (to cut operations). * * We transform an equation of calculating 'total' in the main cycle. * */ void NJTree::generateTree(clustalw::PhyloTree* phyTree, clustalw::DistMatrix* distMat, clustalw::SeqInfo* seqInfo, ofstream* log) { if (log == NULL) { verbose = false; } register int i; int l[4], nude, k; int nc, mini, minj, j, ii, jj; double fnseqs, fnseqs2 = 0, sumd; double diq, djq, dij, dio, djo, da; double tmin, total, dmin; double bi, bj, b1, b2, b3, branch[4]; int typei, typej; /* 0 = node; 1 = OTU */ int firstSeq = seqInfo->firstSeq; int lastSeq = seqInfo->lastSeq; int numSeqs = seqInfo->numSeqs; /* IMPROVEMENT 1, STEP 0 : declare variables */ double *sumCols, *sumRows, *join; sumCols = new double[numSeqs + 1]; sumRows = new double[numSeqs + 1]; join = new double[numSeqs + 1]; /* IMPROVEMENT 2, STEP 0 : declare variables */ int loop_limit; typedef struct _ValidNodeID { int n; struct _ValidNodeID *prev; struct _ValidNodeID *next; } ValidNodeID; ValidNodeID *tvalid, *lpi, *lpj, *lpii, *lpjj, *lp_prev, *lp_next; /* * correspondence of the loop counter variables. * i .. lpi->n, ii .. lpii->n * j .. lpj->n, jj .. lpjj->n */ fnseqs = (double)lastSeq - firstSeq + 1; /*********************** First initialisation ***************************/ if (verbose) { (*log) << "\n\n\t\t\tNeighbor-joining Method\n" << "\n Saitou, N. and Nei, M. (1987)" << " The Neighbor-joining Method:" << "\n A New Method for Reconstructing Phylogenetic Trees." << "\n Mol. Biol. Evol., 4(4), 406-425\n" << "\n\n This is an UNROOTED tree\n" << "\n Numbers in parentheses are branch lengths\n\n"; } if (fnseqs == 2) { if (verbose) { (*log) << "Cycle 1 = SEQ: 1 (" << setw(9) << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << ") joins SEQ: 2 (" << setw(9) << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << ")"; } return ; } mini = minj = 0; /* IMPROVEMENT 1, STEP 1 : Allocate memory */ /* IMPROVEMENT 1, STEP 2 : Initialize arrays to 0 */ phyTree->leftBranch.resize(numSeqs + 2, 0.0); phyTree->rightBranch.resize(numSeqs + 2, 0.0); tkill.resize(numSeqs + 1, 0); av.resize(numSeqs + 1, 0.0); /* IMPROVEMENT 2, STEP 1 : Allocate memory */ tvalid = new ValidNodeID[numSeqs + 1]; /* tvalid[0] is special entry in array. it points a header of valid entry list */ tvalid[0].n = 0; tvalid[0].prev = NULL; tvalid[0].next = &tvalid[1]; /* IMPROVEMENT 2, STEP 2 : Construct and initialize the entry chain list */ for (i = 1, loop_limit = lastSeq - firstSeq + 1, lpi = &tvalid[1], lp_prev = &tvalid[0], lp_next = &tvalid[2]; i <= loop_limit; ++i, ++lpi, ++lp_prev, ++lp_next) { (*distMat)(i, i) = av[i] = 0.0; tkill[i] = 0; lpi->n = i; lpi->prev = lp_prev; lpi->next = lp_next; } tvalid[loop_limit].next = NULL; /* * IMPROVEMENT 1, STEP 3 : Calculate the sum of score value that * is sequence[i] to others. */ double matValue; sumd = 0.0; for (lpj = tvalid[0].next; lpj != NULL; lpj = lpj->next) { double tmp_sum = 0.0; j = lpj->n; /* calculate sumRows[j] */ for (lpi = tvalid[0].next; lpi->n < j; lpi = lpi->next) { i = lpi->n; matValue = (*distMat)(i, j); tmp_sum = tmp_sum + matValue; } sumRows[j] = tmp_sum; tmp_sum = 0.0; /* Set lpi to that lpi->n is greater than j */ if ((lpi != NULL) && (lpi->n == j)) { lpi = lpi->next; } /* calculate sumCols[j] */ for (; lpi != NULL; lpi = lpi->next) { i = lpi->n; tmp_sum += (*distMat)(j, i); } sumCols[j] = tmp_sum; } /*********************** Enter The Main Cycle ***************************/ for (nc = 1, loop_limit = (lastSeq - firstSeq + 1-3); nc <= loop_limit; ++nc) { sumd = 0.0; /* IMPROVEMENT 1, STEP 4 : use sum value */ for (lpj = tvalid[0].next; lpj != NULL; lpj = lpj->next) { sumd += sumCols[lpj->n]; } /* IMPROVEMENT 3, STEP 0 : multiply tmin and 2*fnseqs2 */ fnseqs2 = fnseqs - 2.0; /* Set fnseqs2 at this point. */ tmin = 99999.0 * 2.0 * fnseqs2; /*.................compute SMATij values and find the smallest one ........*/ mini = minj = 0; /* jj must starts at least 2 */ if ((tvalid[0].next != NULL) && (tvalid[0].next->n == 1)) { lpjj = tvalid[0].next->next; } else { lpjj = tvalid[0].next; } for (; lpjj != NULL; lpjj = lpjj->next) { jj = lpjj->n; for (lpii = tvalid[0].next; lpii->n < jj; lpii = lpii->next) { ii = lpii->n; diq = djq = 0.0; /* IMPROVEMENT 1, STEP 4 : use sum value */ diq = sumCols[ii] + sumRows[ii]; djq = sumCols[jj] + sumRows[jj]; /* * always ii < jj in this point. Use upper * triangle of score matrix. */ dij = (*distMat)(ii, jj); /* * IMPROVEMENT 3, STEP 1 : fnseqs2 is * already calculated. */ /* fnseqs2 = fnseqs - 2.0 */ /* IMPROVEMENT 4 : transform the equation */ /*-------------------------------------------------------------------* * OPTIMIZE of expression 'total = d2r + fnseqs2*dij + dr*2.0' * * total = d2r + fnseq2*dij + 2.0*dr * * = d2r + fnseq2*dij + 2(sumd - dij - d2r) * * = d2r + fnseq2*dij + 2*sumd - 2*dij - 2*d2r * * = fnseq2*dij + 2*sumd - 2*dij - 2*d2r + d2r * * = fnseq2*dij + 2*sumd - 2*dij - d2r * * = fnseq2*dij + 2*sumd - 2*dij - (diq + djq - 2*dij) * * = fnseq2*dij + 2*sumd - 2*dij - diq - djq + 2*dij * * = fnseq2*dij + 2*sumd - 2*dij + 2*dij - diq - djq * * = fnseq2*dij + 2*sumd - diq - djq * *-------------------------------------------------------------------*/ total = fnseqs2 * dij + 2.0 * sumd - diq - djq; /* * IMPROVEMENT 3, STEP 2 : abbrevlate * the division on comparison between * total and tmin. */ /* total = total / (2.0*fnseqs2); */ if (total < tmin) { tmin = total; mini = ii; minj = jj; } } } /* MEMO: always ii < jj in avobe loop, so mini < minj */ /*.................compute branch lengths and print the results ........*/ dio = djo = 0.0; /* IMPROVEMENT 1, STEP 4 : use sum value */ dio = sumCols[mini] + sumRows[mini]; djo = sumCols[minj] + sumRows[minj]; dmin = (*distMat)(mini, minj); dio = (dio - dmin) / fnseqs2; djo = (djo - dmin) / fnseqs2; bi = (dmin + dio - djo) *0.5; bj = dmin - bi; bi = bi - av[mini]; bj = bj - av[minj]; #if 0 (*log) << endl << "*** cycle " << nc << endl; (*log) << "dmin = " << setw(9) << right << setprecision(9) << dmin << endl; (*log) << "dio = " << setw(9) << right << setprecision(9) << dio << endl; (*log) << "djo = " << setw(9) << right << setprecision(9) << djo << endl; (*log) << "bi = " << setw(9) << right << setprecision(9) << bi << endl; (*log) << "bj = " << setw(9) << right << setprecision(9) << bj << endl; (*log) << "mini = " << setw(9) << mini << endl; (*log) << "minj = " << setw(9) << minj << endl; (*log) << "av[minj] = " << setw(9) << right << setprecision(9) << av[minj] << endl; (*log) << "av[minj] = " << setw(9) << right << setprecision(9) << av[minj] << endl; #endif if (av[mini] > 0.0) { typei = 0; } else { typei = 1; } if (av[minj] > 0.0) { typej = 0; } else { typej = 1; } if (verbose) { (*log) << "\n Cycle" << setw(4) << nc << " = "; } /* set (tiny? (AW&FS)) negative branch lengths to zero. Also set any tiny positive branch lengths to zero. */ if (fabs(bi) < 0.0001) { bi = 0.0; } if (fabs(bj) < 0.0001) { bj = 0.0; } if (verbose) { if (typei == 0) { (*log) << "Node:" << setw(4) << mini << " (" << setw(9) << setprecision(5) << bi << ") joins "; } else { (*log) << " SEQ:" << setw(4) << mini << " (" << setw(9) << setprecision(5) << bi << ") joins "; } if (typej == 0) { (*log) << "Node:" << setw(4) << minj << " (" << setw(9) << setprecision(5) << bj << ")"; } else { (*log) << " SEQ:" << setw(4) << minj << " (" << setw(9) << setprecision(5) << bj << ")"; } (*log) << "\n"; } phyTree->leftBranch[nc] = bi; phyTree->rightBranch[nc] = bj; for (i = 1; i <= lastSeq - firstSeq + 1; i++) { phyTree->treeDesc[nc][i] = 0; } if (typei == 0) { for (i = nc - 1; i >= 1; i--) if (phyTree->treeDesc[i][mini] == 1) { for (j = 1; j <= lastSeq - firstSeq + 1; j++) if (phyTree->treeDesc[i][j] == 1) { phyTree->treeDesc[nc][j] = 1; } break; } } else { phyTree->treeDesc[nc][mini] = 1; } if (typej == 0) { for (i = nc - 1; i >= 1; i--) if (phyTree->treeDesc[i][minj] == 1) { for (j = 1; j <= lastSeq - firstSeq + 1; j++) if (phyTree->treeDesc[i][j] == 1) { phyTree->treeDesc[nc][j] = 1; } break; } } else { phyTree->treeDesc[nc][minj] = 1; } /* Here is where the -0.00005 branch lengths come from for 3 or more identical seqs. */ /* if(dmin <= 0.0) dmin = 0.0001; */ if (dmin <= 0.0) { dmin = 0.000001; } av[mini] = dmin * 0.5; /*........................Re-initialisation................................*/ fnseqs = fnseqs - 1.0; tkill[minj] = 1; /* IMPROVEMENT 2, STEP 3 : Remove tvalid[minj] from chain list. */ /* [ Before ] * +---------+ +---------+ +---------+ * |prev |<-------|prev |<-------|prev |<--- * | n | | n(=minj)| | n | * | next|------->| next|------->| next|---- * +---------+ +---------+ +---------+ * * [ After ] * +---------+ +---------+ * |prev |<--------------------------|prev |<--- * | n | | n | * | next|-------------------------->| next|---- * +---------+ +---------+ * +---------+ * NULL---|prev | * | n(=minj)| * | next|---NULL * +---------+ */ (tvalid[minj].prev)->next = tvalid[minj].next; if (tvalid[minj].next != NULL) { (tvalid[minj].next)->prev = tvalid[minj].prev; } tvalid[minj].prev = tvalid[minj].next = NULL; /* IMPROVEMENT 1, STEP 5 : re-calculate sum values. */ for (lpj = tvalid[0].next; lpj != NULL; lpj = lpj->next) { double tmp_di = 0.0; double tmp_dj = 0.0; j = lpj->n; /* * subtrace a score value related with 'minj' from * sum arrays . */ if (j < minj) { tmp_dj = (*distMat)(j, minj); sumCols[j] -= tmp_dj; } else if (j > minj) { tmp_dj = (*distMat)(minj, j); sumRows[j] -= tmp_dj; } /* nothing to do when j is equal to minj. */ /* * subtrace a score value related with 'mini' from * sum arrays . */ if (j < mini) { tmp_di = (*distMat)(j, mini); sumCols[j] -= tmp_di; } else if (j > mini) { tmp_di = (*distMat)(mini, j); sumRows[j] -= tmp_di; } /* nothing to do when j is equal to mini. */ /* * calculate a score value of the new inner node. * then, store it temporary to join[] array. */ join[j] = (tmp_dj + tmp_di) *0.5; } /* * 1) * Set the score values (stored in join[]) into the matrix, * row/column position is 'mini'. * 2) * Add a score value of the new inner node to sum arrays. */ for (lpj = tvalid[0].next; lpj != NULL; lpj = lpj->next) { j = lpj->n; if (j < mini) { distMat->SetAt(j, mini, join[j]); sumCols[j] += join[j]; } else if (j > mini) { distMat->SetAt(mini, j, join[j]); sumRows[j] += join[j]; } /* nothing to do when j is equal to mini. */ } /* Re-calculate sumRows[mini],sumCols[mini]. */ sumCols[mini] = sumRows[mini] = 0.0; /* calculate sumRows[mini] */ da = 0.0; for (lpj = tvalid[0].next; lpj->n < mini; lpj = lpj->next) { da = da + join[lpj->n]; } sumRows[mini] = da; /* skip if 'lpj->n' is equal to 'mini' */ if ((lpj != NULL) && (lpj->n == mini)) { lpj = lpj->next; } /* calculate sumCols[mini] */ da = 0.0; for (; lpj != NULL; lpj = lpj->next) { da = da + join[lpj->n]; } sumCols[mini] = da; /* * Clean up sumRows[minj], sumCols[minj] and score matrix * related with 'minj'. */ sumCols[minj] = sumRows[minj] = 0.0; for (j = 1; j <= lastSeq - firstSeq + 1; ++j) { distMat->SetAt(minj, j, 0.0); distMat->SetAt(j, minj, 0.0); join[j] = 0.0; } } /** end main cycle **/ /******************************Last Cycle (3 Seqs. left)********************/ nude = 1; for (lpi = tvalid[0].next; lpi != NULL; lpi = lpi->next) { l[nude] = lpi->n; ++nude; } b1 = ((*distMat)(l[1], l[2]) + (*distMat)(l[1], l[3]) - (*distMat)(l[2], l[3])) *0.5; b2 = (*distMat)(l[1], l[2]) - b1; b3 = (*distMat)(l[1], l[3]) - b1; branch[1] = b1 - av[l[1]]; branch[2] = b2 - av[l[2]]; branch[3] = b3 - av[l[3]]; /* Reset tiny negative and positive branch lengths to zero */ if (fabs(branch[1]) < 0.0001) { branch[1] = 0.0; } if (fabs(branch[2]) < 0.0001) { branch[2] = 0.0; } if (fabs(branch[3]) < 0.0001) { branch[3] = 0.0; } phyTree->leftBranch[lastSeq - firstSeq + 1-2] = branch[1]; phyTree->leftBranch[lastSeq - firstSeq + 1-1] = branch[2]; phyTree->leftBranch[lastSeq - firstSeq + 1] = branch[3]; for (i = 1; i <= lastSeq - firstSeq + 1; i++) { phyTree->treeDesc[lastSeq - firstSeq + 1-2][i] = 0; } if (verbose) { (*log) << "\n Cycle" << setw(4) << nc << " (Last cycle, trichotomy):\n"; } for (i = 1; i <= 3; ++i) { if (av[l[i]] > 0.0) { if (verbose) { (*log) << "\n\t\t Node:" << setw(4) << l[i] <<" (" << setw(9) << setprecision(5) << branch[i] << ") "; } for (k = lastSeq - firstSeq + 1-3; k >= 1; k--) if (phyTree->treeDesc[k][l[i]] == 1) { for (j = 1; j <= lastSeq - firstSeq + 1; j++) if (phyTree->treeDesc[k][j] == 1) { phyTree->treeDesc[lastSeq - firstSeq + 1-2][j] = i; } break; } } else { if (verbose) { (*log) << "\n\t\t SEQ:" << setw(4) << l[i] << " (" << setw(9) << setprecision(5) << branch[i] << ") "; } phyTree->treeDesc[lastSeq - firstSeq + 1-2][l[i]] = i; } if (i < 3) { if (verbose) { (*log) << "joins"; } } } if (verbose) { (*log) << "\n"; } /* IMPROVEMENT 2, STEP 4 : release memory area */ delete [] tvalid; delete [] sumCols; delete [] sumRows; delete [] join; } } clustalx-2.1/clustalW/tree/ClusterTreeOutput.h0000644000175000017500000000272011470725216021407 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef CLUSTERTREEOUTPUT_H #define CLUSTERTREEOUTPUT_H #include #include #include "../alignment/Alignment.h" #include "../general/clustalw.h" namespace clustalw { class ClusterTreeOutput { public: ClusterTreeOutput(clustalw::SeqInfo* seqInfo, int boot); void printNexusTree(clustalw::PhyloTree* phyloTree, ofstream* tree, clustalw::Alignment *alignPtr, clustalw::DistMatrix* distMat, vector* bootTotals); void printTree(clustalw::PhyloTree* phyloTree, ofstream* tree, vector* totals); void printPhylipTree(clustalw::PhyloTree* phyloTree, ofstream* tree, clustalw::Alignment *alignPtr, clustalw::DistMatrix* distMat, vector* bootTotals); void printTreeDesc(clustalw::PhyloTree* phyloTree); private: ClusterTreeOutput(); // Dont allow contruction with default!!!! int twoWaySplit(clustalw::PhyloTree* phyloTree, ofstream* tree, int startRow, int flag, clustalw::Alignment *alignPtr, vector* bootTotals); int twoWaySplitNexus(clustalw::PhyloTree* phyloTree, ofstream* tree, int startRow, int flag, clustalw::Alignment *alignPtr, vector* bootTotals); /* Attributes! */ int firstSeq; int lastSeq; int numSeqs; int bootstrap; }; } #endif clustalx-2.1/clustalW/tree/ClusterTreeOutput.cpp0000644000175000017500000003111011470725216021735 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ClusterTreeOutput.h" namespace clustalw { ClusterTreeOutput::ClusterTreeOutput(SeqInfo* seqInfo, int boot) : bootstrap(boot) { firstSeq = seqInfo->firstSeq; lastSeq = seqInfo->lastSeq; numSeqs = seqInfo->numSeqs; } void ClusterTreeOutput::printTreeDesc(PhyloTree* phyloTree) { for(int i = 1; i <= numSeqs; i++) { for(int j = 1; j <= numSeqs; j++) { cout << " " << phyloTree->treeDesc[i][j]; } cout << "\n"; } } /** * The function printPhylipTree is used to print out the unrooted clustered tree in * phylip format. * @param phyloTree A pointer to the PhyloTree struct that contains the description of the * tree. * @param tree The file to print the phylip tree to. Must be open! * @param alignPtr The alignment object. Needed for the names. * @param distMat The distance matrix that was used for the generation of the cluster. * @param bootTotals Holds the bootstrap values. Only used if the tree has been bootstrapped */ void ClusterTreeOutput::printPhylipTree(PhyloTree* phyloTree, ofstream* tree, Alignment *alignPtr, DistMatrix* distMat, vector* bootTotals) { int oldRow; if (lastSeq - firstSeq + 1 == 2) { (*tree) << "(" << alignPtr->getName(firstSeq) << ":" << fixed <getName(firstSeq + 1) << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << ");"; return ; } (*tree) << "(\n"; oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 1, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[lastSeq - firstSeq + 1 - 2]; if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ",\n"; oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 2, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[lastSeq - firstSeq + 1 - 1]; if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ",\n"; oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 3, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[lastSeq - firstSeq + 1]; if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ")"; if (bootstrap == BS_NODE_LABELS) { (*tree) << "TRICHOTOMY"; } (*tree) << ";\n"; } int ClusterTreeOutput::twoWaySplit(PhyloTree* phyloTree, ofstream* tree, int startRow, int flag, Alignment *alignPtr, vector* bootTotals) { int row, newRow = 0, oldRow, col, testCol = 0; bool singleSeq; if (startRow != lastSeq - firstSeq + 1-2) { (*tree) << "(\n"; } for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if (phyloTree->treeDesc[startRow][col] == flag) { testCol = col; break; } } singleSeq = true; for (row = startRow - 1; row >= 1; row--) if (phyloTree->treeDesc[row][testCol] == 1) { singleSeq = false; newRow = row; break; } if (singleSeq) { phyloTree->treeDesc[startRow][testCol] = 0; (*tree) << alignPtr->getName(testCol + firstSeq - 1); if (startRow == lastSeq - firstSeq + 1 - 2) { return (0); } (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow] << ",\n"; } else { for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if ((phyloTree->treeDesc[startRow][col] == 1) && (phyloTree->treeDesc[newRow][col] == 1)) { phyloTree->treeDesc[startRow][col] = 0; } } oldRow = twoWaySplit(phyloTree, tree, newRow, 1, alignPtr, bootTotals); if (startRow == lastSeq - firstSeq + 1-2) { return (newRow); } (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow]; if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ",\n"; } for (col = 1; col <= lastSeq - firstSeq + 1; col++) if (phyloTree->treeDesc[startRow][col] == flag) { testCol = col; break; } singleSeq = true; newRow = 0; for (row = startRow - 1; row >= 1; row--) if (phyloTree->treeDesc[row][testCol] == 1) { singleSeq = false; newRow = row; break; } if (singleSeq) { phyloTree->treeDesc[startRow][testCol] = 0; (*tree) << alignPtr->getName(testCol + firstSeq - 1); (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow] <<")\n"; } else { for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if ((phyloTree->treeDesc[startRow][col] == 1) && (phyloTree->treeDesc[newRow][col] == 1)) { phyloTree->treeDesc[startRow][col] = 0; } } oldRow = twoWaySplit(phyloTree, tree, newRow, 1, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow]; if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ")\n"; } if ((bootstrap == BS_NODE_LABELS) && ((*bootTotals)[startRow] > 0)) { (*tree) << (*bootTotals)[startRow]; } return (startRow); } void ClusterTreeOutput::printNexusTree(PhyloTree* phyloTree, ofstream* tree, Alignment *alignPtr, DistMatrix* distMat, vector* bootTotals) { int i; int oldRow; (*tree) << "#NEXUS\n\n"; (*tree) << "BEGIN TREES;\n\n"; (*tree) << "\tTRANSLATE\n"; for(i = 1; i < numSeqs; i++) { (*tree) << "\t\t" << i << "\t" << alignPtr->getName(i) <<",\n"; } (*tree) << "\t\t" << numSeqs << "\t" << alignPtr->getName(numSeqs) <<"\n"; (*tree) << "\t\t;\n"; (*tree) << "\tUTREE PAUP_1= "; if(lastSeq - firstSeq + 1 == 2) { (*tree) << "(" << firstSeq << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << "," << firstSeq + 1 << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << ")"; } else { (*tree) << "("; oldRow = twoWaySplitNexus(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 1, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[lastSeq-firstSeq+1-2]; if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) && ((*bootTotals)[oldRow]>0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ","; oldRow = twoWaySplitNexus(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 2, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[lastSeq-firstSeq+1-1]; if ((bootstrap==BS_BRANCH_LABELS) && (oldRow>0) && ((*bootTotals)[oldRow]>0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ","; oldRow = twoWaySplitNexus(phyloTree, tree, lastSeq-firstSeq+1-2, 3, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[lastSeq-firstSeq+1]; if ((bootstrap==BS_BRANCH_LABELS) && (oldRow>0) && ((*bootTotals)[oldRow]>0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ")"; if (bootstrap == BS_NODE_LABELS) { (*tree) << "TRICHOTOMY"; } (*tree) << ";"; } (*tree) << "\nENDBLOCK;\n"; } int ClusterTreeOutput::twoWaySplitNexus(PhyloTree* phyloTree, ofstream* tree, int startRow, int flag, Alignment *alignPtr, vector* bootTotals) { int row, newRow = 0, oldRow, col, testCol = 0; bool singleSeq; if (startRow != lastSeq - firstSeq + 1 - 2) { (*tree) << "("; } for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if (phyloTree->treeDesc[startRow][col] == flag) { testCol = col; break; } } singleSeq = true; for (row = startRow - 1; row >= 1; row--) if (phyloTree->treeDesc[row][testCol] == 1) { singleSeq = false; newRow = row; break; } if (singleSeq) { phyloTree->treeDesc[startRow][testCol] = 0; (*tree) << testCol + firstSeq - 1;; if (startRow == lastSeq - firstSeq + 1-2) { return (0); } (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow] << ","; } else { for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if ((phyloTree->treeDesc[startRow][col] == 1) && (phyloTree->treeDesc[newRow][col] == 1)) { phyloTree->treeDesc[startRow][col] = 0; } } oldRow = twoWaySplitNexus(phyloTree, tree, newRow, 1, alignPtr, bootTotals); if (startRow == lastSeq - firstSeq + 1-2) { return (newRow); } (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow]; if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ","; } for (col = 1; col <= lastSeq - firstSeq + 1; col++) if (phyloTree->treeDesc[startRow][col] == flag) { testCol = col; break; } singleSeq = true; newRow = 0; for (row = startRow - 1; row >= 1; row--) if (phyloTree->treeDesc[row][testCol] == 1) { singleSeq = false; newRow = row; break; } if (singleSeq) { phyloTree->treeDesc[startRow][testCol] = 0; (*tree) << testCol + firstSeq - 1; (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow] << ")"; } else { for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if ((phyloTree->treeDesc[startRow][col] == 1) && (phyloTree->treeDesc[newRow][col] == 1)) { phyloTree->treeDesc[startRow][col] = 0; } } oldRow = twoWaySplitNexus(phyloTree, tree, newRow, 1, alignPtr, bootTotals); (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow]; if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0)) { (*tree) << "[" << (*bootTotals)[oldRow] << "]"; } (*tree) << ")"; } if ((bootstrap == BS_NODE_LABELS) && ((*bootTotals)[startRow] > 0)) { (*tree) << (*bootTotals)[startRow]; } return (startRow); } void ClusterTreeOutput::printTree(PhyloTree* phyloTree, ofstream* tree, vector* totals) { int row, col; (*tree) << "\n"; for (row = 1; row <= lastSeq - firstSeq + 1 - 3; row++) { (*tree) << " \n"; for (col = 1; col <= lastSeq - firstSeq + 1; col++) { if (phyloTree->treeDesc[row][col] == 0) { (*tree) << "*"; } else { (*tree) << "."; } } if ((*totals)[row] > 0) { (*tree) << setw(7) << (*totals)[row]; } } (*tree) << " \n"; for (col = 1; col <= lastSeq - firstSeq + 1; col++) { (*tree) << setw(1) << phyloTree->treeDesc[lastSeq - firstSeq + 1 -2][col]; } (*tree) << "\n"; } } clustalx-2.1/clustalW/tree/ClusterTreeAlgorithm.h0000644000175000017500000000113411470725216022033 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef CLUSTERTREEALGORITHM_H #define CLUSTERTREEALGORITHM_H #include #include #include #include "../general/clustalw.h" namespace clustalw { class ClusterTreeAlgorithm { public: virtual ~ClusterTreeAlgorithm(){}; virtual void generateTree(clustalw::PhyloTree* phyTree, clustalw::DistMatrix* distMat, clustalw::SeqInfo* seqInfo, ofstream* tree = 0) = 0; virtual void setVerbose(bool choice) = 0; }; } #endif clustalx-2.1/clustalW/tree/ClusterTree.h0000644000175000017500000000672111470725216020173 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This is the interface class for the clustering part of the code. The 3 public functions * are the 3 things that can be done. 1) Generate a tree from alignment (treeFromAlignment), * 2) Generate a tree from a distance matrix (treeFromDistMatrix), or * 3) Bootstrap a tree (bootstrapTree). **/ #ifndef CLUSTALTREEBASE_H #define CLUSTALTREEBASE_H #include #include #include #include #include #include "../alignment/Alignment.h" #include "NJTree.h" #include "ClusterTreeOutput.h" #include "../general/OutputFile.h" #include "ClusterTreeAlgorithm.h" #include namespace clustalw { class OutputFile; class ClusterTree { public: /* Functions */ ClusterTree(); /* Attributes */ protected: // This is because we will have a derived class that needs these. /* Functions */ void overspillMessage(int overspill,int total_dists); void treeGapDelete(clustalw::Alignment *alignPtr); int dnaDistanceMatrix(ofstream* treeFile, clustalw::Alignment *alignPtr); int protDistanceMatrix(ofstream* treeFile, clustalw::Alignment *alignPtr); bool isAmbiguity(int c); void calcPercIdentity(ofstream* pfile, clustalw::Alignment *alignPtr); void compareTree(clustalw::PhyloTree* tree1, clustalw::PhyloTree* tree2, vector* hits, int n); //string getOutputFileName(const string prompt, string path, // const string fileExtension); bool transition(int base1, int base2); void distanceMatrixOutput(ofstream* outFile, clustalw::DistMatrix* matToPrint, clustalw::Alignment *alignPtr); bool openFilesForBootstrap(clustalw::OutputFile* clustalFile, clustalw::OutputFile* phylipFile, clustalw::OutputFile* nexusFile, clustalw::TreeNames* treeNames, string* path); bool openFilesForTreeFromAlignment(clustalw::OutputFile* clustalFile, clustalw::OutputFile* phylipFile, clustalw::OutputFile* distFile, clustalw::OutputFile* nexusFile, clustalw::OutputFile* pimFile, clustalw::TreeNames* treeNames, string* path); int calcQuickDistMatForAll(ofstream* clustalFile, ofstream* phylipFile, ofstream* nexusFile, ofstream* pimFile, ofstream* distFile, clustalw::Alignment* alignPtr); int calcQuickDistMatForSubSet(ofstream* clustalFile, ofstream* phylipFile, ofstream* nexusFile, clustalw::Alignment* alignPtr, bool inBootLoop = false); void printBootstrapHeaderToClustalFile(ofstream* clustalFile); void promptForBoolSeedAndNumTrials(); void printErrorMessageForBootstrap(int totalOverspill, int totalDists, int nfails); bool checkIfConditionsMet(int numSeqs, int min); /* Attributes */ ClusterTreeAlgorithm* clusAlgorithm; auto_ptr quickDistMat; vector bootTotals; vector bootPositions; bool verbose; vector treeGaps; int numSeqs; int firstSeq; int lastSeq; string bootstrapPrompt; string bootstrapFileTypeMsg; }; } #endif clustalx-2.1/clustalW/tree/ClusterTree.cpp0000644000175000017500000010121611470725216020521 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ClusterTree.h" #include "../general/utils.h" #include "dayhoff.h" #include "RandomGenerator.h" #include #include #include "../general/OutputFile.h" namespace clustalw { ClusterTree::ClusterTree() : numSeqs(0), firstSeq(0), lastSeq(0) { bootstrapPrompt = "\nEnter name for bootstrap output file "; bootstrapFileTypeMsg = "Bootstrap output"; } /** **************************************************************************************** * The rest are private functions. * * * * * * * *******************************************************************************************/ void ClusterTree::distanceMatrixOutput(ofstream* outFile, clustalw::DistMatrix* matToPrint, clustalw::Alignment *alignPtr) { if(outFile == NULL || !outFile->is_open()) { clustalw::utilityObject->error("Cannot output the distance matrix, file is not open\n"); return; } int i, j; int _maxNames = alignPtr->getMaxNames(); (*outFile) << setw(6) << lastSeq - firstSeq + 1; for (i = 1; i <= lastSeq - firstSeq + 1; i++) { (*outFile) << "\n" << left << setw(_maxNames) << alignPtr->getName(i) << " "; for (j = 1; j <= lastSeq - firstSeq + 1; j++) { (*outFile) << " " << setw(6) << setprecision(3) << fixed << (*matToPrint)(i, j); if (j % 8 == 0) { if (j != lastSeq - firstSeq + 1) { (*outFile) << "\n"; } if (j != lastSeq - firstSeq + 1) { (*outFile) << " "; } } } } } void ClusterTree::overspillMessage(int overspill,int totalDists) { std::ostringstream ssOverSpill; std::ostringstream ssTotalDists; string message; ssOverSpill << overspill; message += ssOverSpill.str(); message += " of the distances out of a total of "; ssTotalDists << totalDists; message += ssTotalDists.str(); message += "\n were out of range for the distance correction.\n" "\n SUGGESTIONS: 1) remove the most distant sequences" "\n or 2) use the PHYLIP package" "\n or 3) turn off the correction." "\n Note: Use option 3 with caution! With this degree" "\n of divergence you will have great difficulty" "\n getting robust and reliable trees.\n\n"; clustalw::utilityObject->warning(message.c_str()); } /* * The function treeGapDelete flags all the positions that have a gap in any sequence. * */ // NOTE there is something wrong with using _lenFirstSeq. But this is what old clustal does. void ClusterTree::treeGapDelete(clustalw::Alignment *alignPtr) { int seqn; int posn; int _maxAlnLength = alignPtr->getMaxAlnLength(); int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); int _gapPos1 = clustalw::userParameters->getGapPos1(); int _gapPos2 = clustalw::userParameters->getGapPos2(); treeGaps.resize(_maxAlnLength + 1); for (posn = 1; posn <= _lenFirstSeq; ++posn) { treeGaps[posn] = 0; for (seqn = 1; seqn <= lastSeq - firstSeq + 1; ++seqn) { const vector* _seqM = alignPtr->getSequence(seqn + firstSeq - 1); if(posn > alignPtr->getSeqLength(seqn + firstSeq - 1)) { break; // Dont read locations that cannot be read! } if (((*_seqM)[posn] == _gapPos1) || ((*_seqM)[posn] == _gapPos2)) { treeGaps[posn] = 1; break; } } } } int ClusterTree::dnaDistanceMatrix(ofstream* treeFile, clustalw::Alignment *alignPtr) { int m, n; int j, i; int res1, res2; int overspill = 0; double p, q, e, a, b, k; treeGapDelete(alignPtr); // flag positions with gaps (tree_gaps[i] = 1 ) if (verbose) { (*treeFile) << "\n"; (*treeFile) << "\n DIST = percentage divergence (/100)"; (*treeFile) << "\n p = rate of transition (A <-> G; C <-> T)"; (*treeFile) << "\n q = rate of transversion"; (*treeFile) << "\n Length = number of sites used in comparison"; (*treeFile) << "\n"; if (clustalw::userParameters->getTossGaps()) { (*treeFile) << "\n All sites with gaps (in any sequence) deleted!"; (*treeFile) << "\n"; } if (clustalw::userParameters->getKimura()) { (*treeFile) << "\n Distances corrected by Kimura's 2 parameter model:"; (*treeFile) << "\n\n Kimura, M. (1980)"; (*treeFile) << " A simple method for estimating evolutionary "; (*treeFile) << "rates of base"; (*treeFile) << "\n substitutions through comparative studies of "; (*treeFile) << "nucleotide sequences."; (*treeFile) << "\n J. Mol. Evol., 16, 111-120."; (*treeFile) << "\n\n"; } } int _numSeqs = alignPtr->getNumSeqs(); quickDistMat.reset(new clustalw::DistMatrix(_numSeqs + 1)); int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); int _gapPos1 = clustalw::userParameters->getGapPos1(); int _gapPos2 = clustalw::userParameters->getGapPos2(); int lenSeqM, lenSeqN; // for every pair of sequence for (m = 1; m < lastSeq - firstSeq + 1; ++m) { const vector* _seqM = alignPtr->getSequence(m + firstSeq - 1); lenSeqM = alignPtr->getSeqLength(m + firstSeq - 1); for (n = m + 1; n <= lastSeq - firstSeq + 1; ++n) { const vector* _seqN = alignPtr->getSequence(n + firstSeq - 1); lenSeqN = alignPtr->getSeqLength(n + firstSeq - 1); p = q = e = 0.0; quickDistMat->SetAt(m, n, 0.0); quickDistMat->SetAt(n ,m, 0.0); for (i = 1; i <= _lenFirstSeq; ++i) { j = bootPositions[i]; if (clustalw::userParameters->getTossGaps() && (treeGaps[j] > 0)) { goto skip; } // gap position /** ******************************************************************************* * BUG!!!!!!! NOTE this was found for protein. Presuming the same here * * NOTE: the following if statements were coded in so as to produce * * the same distance results as the old clustal. Old clustal compares * * up to the length of the first sequence. If this is longer than the * * other sequences, then the -3 and 0's are compared at the end of the * * array. These should not be compared, but I need to stick to this to * * produce the same results as the old version for testing! * **********************************************************************************/ if(j > lenSeqM) { if(j == lenSeqM + 1) { res1 = -3; } else { res1 = 0; } } else { res1 = (*_seqM)[j]; } if(j > lenSeqN) { if(j == lenSeqN + 1) { res2 = -3; } else { res2 = 0; } } else { res2 = (*_seqN)[j]; } if ((res1 == _gapPos1) || (res1 == _gapPos2) || (res2 == _gapPos1) || (res2 == _gapPos2)) { goto skip; } // gap in a seq if (!clustalw::userParameters->getUseAmbiguities()) { if (isAmbiguity(res1) || isAmbiguity(res2)) { goto skip; } } // ambiguity code in a seq e = e+1.0; if (res1 != res2) { if (transition(res1, res2)) { p = p + 1.0; } else { q = q + 1.0; } } skip: ; } // Kimura's 2 parameter correction for multiple substitutions if (!clustalw::userParameters->getKimura()) { if (e == 0) { cerr << "\n WARNING: sequences " << m << " and " << n << " are non-overlapping\n"; k = 0.0; p = 0.0; q = 0.0; } else { k = (p + q) / e; if (p > 0.0) { p = p / e; } else { p = 0.0; } if (q > 0.0) { q = q / e; } else { q = 0.0; } } quickDistMat->SetAt(m, n, k); quickDistMat->SetAt(n ,m, k); if (verbose) { (*treeFile) << setw(4) << m << " vs." << setw(4) << n << ": DIST = " << setw(4) << fixed << setprecision(4) << k << "; p = " << fixed << setprecision(4) << p << "; q = " << fixed << setprecision(4) << q << "; length = " << setw(6) << fixed << setprecision(0) << e << "\n"; } } else { if (e == 0) { cerr << "\n WARNING: sequences " << m << " and " << n << " are non-overlapping\n";; p = 0.0; q = 0.0; } else { if (p > 0.0) { p = p / e; } else { p = 0.0; } if (q > 0.0) { q = q / e; } else { q = 0.0; } } if (((2.0 *p) + q) == 1.0) { a = 0.0; } else { a = 1.0 / (1.0 - (2.0 *p) - q); } if (q == 0.5) { b = 0.0; } else { b = 1.0 / (1.0 - (2.0 *q)); } // watch for values going off the scale for the correction. if ((a <= 0.0) || (b <= 0.0)) { overspill++; k = 3.5; // arbitrary high score } else { k = 0.5 * log(a) + 0.25 * log(b); } quickDistMat->SetAt(m, n, k); quickDistMat->SetAt(n ,m, k); if (verbose) // if screen output { (*treeFile) << setw(4) << m << " vs." << setw(4) << n << ": DIST = " << fixed << setprecision(4) << k << "; p = " << fixed << setprecision(4) << p << "; q = " << fixed << setprecision(4) << q << "; length = " << setw(6) << fixed << setprecision(0) << e << "\n"; } } } } return overspill; // return the number of off-scale values } int ClusterTree::protDistanceMatrix(ofstream* treeFile, clustalw::Alignment *alignPtr) { int m, n; int j, i; int res1, res2; int overspill = 0; double p, e, k, tableEntry; treeGapDelete(alignPtr); // flag positions with gaps (tree_gaps[i] = 1 ) if (verbose) { (*treeFile) << "\n"; (*treeFile) << "\n DIST = percentage divergence (/100)"; (*treeFile) << "\n Length = number of sites used in comparison"; (*treeFile) << "\n\n"; if (clustalw::userParameters->getTossGaps()) { (*treeFile) << "\n All sites with gaps (in any sequence) deleted"; (*treeFile) << "\n"; } if (clustalw::userParameters->getKimura()) { (*treeFile) << "\n Distances up to 0.75 corrected by Kimura's empirical method:"; (*treeFile) << "\n\n Kimura, M. (1983)"; (*treeFile) << " The Neutral Theory of Molecular Evolution."; (*treeFile) << "\n Page 75. Cambridge University Press, Cambridge, England."; (*treeFile) << "\n\n"; } } int _numSeqs = alignPtr->getNumSeqs(); int _lenSeq1 = alignPtr->getSeqLength(1); quickDistMat.reset(new clustalw::DistMatrix(_numSeqs + 1)); int _gapPos1 = clustalw::userParameters->getGapPos1(); int _gapPos2 = clustalw::userParameters->getGapPos2(); int lenSeqM, lenSeqN; // for every pair of sequence for (m = 1; m < _numSeqs; ++m) { const vector* _seqM = alignPtr->getSequence(m); lenSeqM = alignPtr->getSeqLength(m); for (n = m + 1; n <= _numSeqs; ++n) { const vector* _seqN = alignPtr->getSequence(n); lenSeqN = alignPtr->getSeqLength(n); p = e = 0.0; quickDistMat->SetAt(m, n, 0.0); quickDistMat->SetAt(n ,m, 0.0); for (i = 1; i <= _lenSeq1; ++i) // It may be this here! { j = bootPositions[i]; if (clustalw::userParameters->getTossGaps() && (treeGaps[j] > 0)) { goto skip; } // gap position /** ******************************************************************************* * BUG!!!!!!! * * NOTE: the following if statements were coded in so as to produce * * the same distance results as the old clustal. Old clustal compares * * up to the length of the first sequence. If this is longer than the * * other sequences, then the -3 and 0's are compared at the end of the * * array. These should not be compared, but I need to stick to this to * * produce the same results as the old version for testing! * **********************************************************************************/ if(j > lenSeqM) { if(j == lenSeqM + 1) { res1 = -3; } else { res1 = 0; } } else { res1 = (*_seqM)[j]; } if(j > lenSeqN) { if(j == lenSeqN + 1) { res2 = -3; } else { res2 = 0; } } else { res2 = (*_seqN)[j]; } if ((res1 == _gapPos1) || (res1 == _gapPos2) || (res2 == _gapPos1) || (res2 == _gapPos2)) { goto skip; } // gap in a seq e = e + 1.0; if (res1 != res2) { p = p + 1.0; } skip: ; } if (p <= 0.0) { k = 0.0; } else { k = p / e; } if (clustalw::userParameters->getKimura()) { if (k < 0.75) { // use Kimura's formula if (k > 0.0) { k = - log(1.0 - k - (k * k / 5.0)); } } else { if (k > 0.930) { overspill++; k = 10.0; // arbitrarily set to 1000% } else // dayhoff_pams is from dayhoff.h file { tableEntry = (k * 1000.0) - 750.0; k = (double)dayhoff_pams[(int)tableEntry]; k = k / 100.0; } } } quickDistMat->SetAt(m, n, k); quickDistMat->SetAt(n ,m, k); if (verbose) { (*treeFile) << setw(4) << m << " vs." << setw(4) << n << " DIST = " << fixed << setprecision(4) << k << "; length = " << setw(6) << setprecision(0) << e << "\n"; } } } return overspill; } bool ClusterTree::isAmbiguity(int c) { int i; char codes[] = "ACGTU"; if (clustalw::userParameters->getUseAmbiguities() == true) { return false; } for (i = 0; i < 5; i++) if (clustalw::userParameters->getAminoAcidCode(c) == codes[i]) { return false; } return true; } /* * The function calcPercIdentity calculates the percent identity of the sequences * and outputs it to a the file pfile. NOTE this is not used at the moment. It was in * the old code, but there was no way to access it from the menu. This may change. */ void ClusterTree::calcPercIdentity(ofstream* pfile, clustalw::Alignment *alignPtr) { clustalw::DistMatrix percentMat; float ident; int nmatch; int val1, val2; int i,j,k, length_longest; int length_shortest; int rs = 0, rl = 0; // findout sequence length, longest and shortest; length_longest = 0; length_shortest = 0; int _numSeqs = alignPtr->getNumSeqs(); int _seqLength; for (i = 1; i <= _numSeqs; i++) { _seqLength = alignPtr->getSeqLength(i); if (length_longest < _seqLength) { length_longest = _seqLength; rs = i; } if (length_shortest > _seqLength) { length_shortest = _seqLength; rl = i; } } percentMat.ResizeRect(_numSeqs + 1); nmatch = 0; int _lenSeqI, _lenSeqJ; int _maxAA = clustalw::userParameters->getMaxAA(); for (i = 1; i <= _numSeqs; i++) { const vector* _seqI = alignPtr->getSequence(i); _lenSeqI = alignPtr->getSeqLength(i); for (j = i; j <= _numSeqs; j++) { const vector* _seqJ = alignPtr->getSequence(j); _lenSeqJ = alignPtr->getSeqLength(j); cout << "\n " << alignPtr->getName(j) << " "; ident = 0; nmatch = 0; for(k = 1; k <= length_longest; k++) { if((k > _lenSeqI) || (k > _lenSeqJ)) { break; } val1 = (*_seqI)[k]; val2 = (*_seqJ)[k]; if (((val1 < 0) || (val1 > _maxAA)) || ((val2 < 0) || (val2 > _maxAA))) { continue; // residue = '-'; } if (val1 == val2) { ident++ ; nmatch++; } else { nmatch++ ; } } ident = ident/nmatch * 100.0 ; percentMat.SetAt(i, j, ident); percentMat.SetAt(j, i, ident); } } int _maxNameSize = alignPtr->getMaxNames(); (*pfile) << "#\n#\n# Percent Identity Matrix - created by Clustal" << clustalw::userParameters->getRevisionLevel() << " \n#\n#\n"; for(i = 1; i <= _numSeqs; i++) { (*pfile) << "\n " << right << setw(5) << i << ": "; (*pfile) << left << setw(_maxNameSize) << alignPtr->getName(i); for(j = 1; j <= _numSeqs; j++) { (*pfile) << setw(8) << right << fixed << setprecision(0) << percentMat(i, j); } } (*pfile) << "\n"; } void ClusterTree::compareTree(PhyloTree* tree1, PhyloTree* tree2, vector* hits, int n) { int i, j, k; int nhits1, nhits2; for (i = 1; i <= n - 3; i++) { for (j = 1; j <= n - 3; j++) { nhits1 = 0; nhits2 = 0; for (k = 1; k <= n; k++) { if (tree1->treeDesc[i][k] == tree2->treeDesc[j][k]) { nhits1++; } if (tree1->treeDesc[i][k] != tree2->treeDesc[j][k]) { nhits2++; } } if ((nhits1 == lastSeq - firstSeq + 1) || (nhits2 == lastSeq - firstSeq + 1)) { (*hits)[i]++; } } } } /** * NOTE this will go into the OutputFile class and will not be needed here anymore. * */ /*string ClusterTree::getOutputFileName(const string prompt, string path, const string fileExtension) { string temp; string _fileName; // Will return this name. string message; _fileName = path + fileExtension; if(_fileName.compare(clustalw::userParameters->getSeqName()) == 0) { cout << "Output file name is the same as input file.\n"; if (clustalw::userParameters->getMenuFlag()) { message = "\n\nEnter new name to avoid overwriting [" + _fileName + "]: "; clustalw::utilityObject->getStr(message, temp); if(temp != "") { _fileName = temp; } } } else if (clustalw::userParameters->getMenuFlag()) { message = prompt + " [" + _fileName + "]"; clustalw::utilityObject->getStr(message, temp); if(temp != "") { _fileName = temp; } } return _fileName; }*/ bool ClusterTree::transition(int base1, int base2) { // assumes that the bases of DNA sequences have been translated as // a,A = 0; c,C = 1; g,G = 2; t,T,u,U = 3; N = 4; // a,A = 0; c,C = 2; g,G = 6; t,T,u,U =17; // A <--> G and T <--> C are transitions; all others are transversions. if (((base1 == 0) && (base2 == 6)) || ((base1 == 6) && (base2 == 0))) { return true; } // A <--> G if (((base1 == 17) && (base2 == 2)) || ((base1 == 2) && (base2 == 17))) { return true; } // T <--> C return false; } /** * This function is used to open all the bootstrap tree files. It opens them with the * correct message prompt. */ bool ClusterTree::openFilesForBootstrap(clustalw::OutputFile* clustalFile, clustalw::OutputFile* phylipFile, clustalw::OutputFile* nexusFile, clustalw::TreeNames* treeNames, string* path) { if (clustalw::userParameters->getOutputTreeClustal()) { if(!clustalFile || !clustalFile->openFile(&(treeNames->clustalName), bootstrapPrompt, path, "njb", bootstrapFileTypeMsg)) { return false; } } if (clustalw::userParameters->getOutputTreePhylip()) { if(!phylipFile || !phylipFile->openFile(&(treeNames->phylipName), bootstrapPrompt, path, "phb", bootstrapFileTypeMsg)) { return false; } } if (clustalw::userParameters->getOutputTreeNexus()) { if(!nexusFile || !nexusFile->openFile(&(treeNames->nexusName), bootstrapPrompt, path, "treb", bootstrapFileTypeMsg)) { return false; } } return true; } bool ClusterTree::openFilesForTreeFromAlignment(clustalw::OutputFile* clustalFile, clustalw::OutputFile* phylipFile, clustalw::OutputFile* distFile, clustalw::OutputFile* nexusFile, clustalw::OutputFile* pimFile, clustalw::TreeNames* treeNames, string* path) { if (clustalw::userParameters->getOutputTreeClustal()) { if(!clustalFile || !clustalFile->openFile(&(treeNames->clustalName), "\nEnter name for CLUSTAL tree output file ", path, "nj", "Phylogenetic tree")) { return false; } } if (clustalw::userParameters->getOutputTreePhylip()) { if(!phylipFile || !phylipFile->openFile(&(treeNames->phylipName), "\nEnter name for PHYLIP tree output file ", path, "ph", "Phylogenetic tree")) { return false; } } if (clustalw::userParameters->getOutputTreeDistances()) { if(!distFile || !distFile->openFile(&(treeNames->distName), "\nEnter name for distance matrix output file ", path, "dst", "Distance matrix")) { return false; } } if (clustalw::userParameters->getOutputTreeNexus()) { if(!nexusFile || !nexusFile->openFile(&(treeNames->nexusName), "\nEnter name for NEXUS tree output file ", path, "tre", "Nexus tree")) { return false; } } if (clustalw::userParameters->getOutputPim()) { if(!pimFile || !pimFile->openFile(&(treeNames->pimName), "\nEnter name for % Identity matrix output file ", path, "pim", "perc identity")) { return false; } } return true; } int ClusterTree::calcQuickDistMatForAll(ofstream* clustalFile, ofstream* phylipFile, ofstream* nexusFile, ofstream* pimFile, ofstream* distFile, clustalw::Alignment* alignPtr) { int overspill = 0; bool _DNAFlag = clustalw::userParameters->getDNAFlag(); overspill = calcQuickDistMatForSubSet(clustalFile, phylipFile, nexusFile, alignPtr); if (pimFile && clustalw::userParameters->getOutputPim()) { verbose = false; // Turn off file output if (_DNAFlag) { calcPercIdentity(pimFile, alignPtr); } else { calcPercIdentity(pimFile, alignPtr); } } if (distFile && clustalw::userParameters->getOutputTreeDistances()) { verbose = false; // Turn off file output if (_DNAFlag) { overspill = dnaDistanceMatrix(distFile, alignPtr); } else { overspill = protDistanceMatrix(distFile, alignPtr); } distanceMatrixOutput(distFile, quickDistMat.get(), alignPtr); } return overspill; } int ClusterTree::calcQuickDistMatForSubSet(ofstream* clustalFile, ofstream* phylipFile, ofstream* nexusFile, clustalw::Alignment* alignPtr, bool inBootLoop) { int overspill = 0; bool _DNAFlag = clustalw::userParameters->getDNAFlag(); if (clustalFile && clustalw::userParameters->getOutputTreeClustal()) { if(!inBootLoop) { verbose = true; // Turn on file output } else { verbose = false; // Turn off when we are in the loop in bootstrap! } if (_DNAFlag) { overspill = dnaDistanceMatrix(clustalFile, alignPtr); } else { overspill = protDistanceMatrix(clustalFile, alignPtr); } } if (phylipFile && clustalw::userParameters->getOutputTreePhylip()) { verbose = false; // Turn off file output if (_DNAFlag) { overspill = dnaDistanceMatrix(phylipFile, alignPtr); } else { overspill = protDistanceMatrix(phylipFile, alignPtr); } } if (nexusFile && clustalw::userParameters->getOutputTreeNexus()) { verbose = false; // Turn off file output if (_DNAFlag) { overspill = dnaDistanceMatrix(nexusFile, alignPtr); } else { overspill = protDistanceMatrix(nexusFile, alignPtr); } } return overspill; } void ClusterTree::printBootstrapHeaderToClustalFile(ofstream* clustalFile) { if(clustalFile) { (*clustalFile) << "\n\n\t\t\tBootstrap Confidence Limits\n\n"; (*clustalFile) << "\n Random number generator seed = " << setw(7) << clustalw::userParameters->getBootRanSeed() << "\n"; (*clustalFile) << "\n Number of bootstrap trials = " << setw(7) << clustalw::userParameters->getBootNumTrials() << "\n"; (*clustalFile) << "\n\n Diagrammatic representation of the above tree: \n"; (*clustalFile) << "\n Each row represents 1 tree cycle;"; (*clustalFile) << " defining 2 groups.\n"; (*clustalFile) << "\n Each column is 1 sequence; "; (*clustalFile) << "the stars in each line show 1 group; "; (*clustalFile) << "\n the dots show the other\n"; (*clustalFile) << "\n Numbers show occurences in bootstrap samples."; } } void ClusterTree::promptForBoolSeedAndNumTrials() { if (clustalw::userParameters->getMenuFlag()) { unsigned int tempSeed; tempSeed = clustalw::utilityObject->getInt( "\n\nEnter seed no. for random number generator ", 1, 1000, clustalw::userParameters->getBootRanSeed()); clustalw::userParameters->setBootRanSeed(tempSeed); clustalw::userParameters->setBootNumTrials( clustalw::utilityObject->getInt("\n\nEnter number of bootstrap trials ", 1, 10000, clustalw::userParameters->getBootNumTrials())); } } void ClusterTree::printErrorMessageForBootstrap(int totalOverspill, int totalDists, int nfails) { cerr << "\n"; cerr << "\n WARNING: " << totalOverspill << " of the distances out of a total of " << totalDists << " times" << clustalw::userParameters->getBootNumTrials(); cerr << "\n were out of range for the distance correction."; cerr << "\n This affected " << nfails << " out of " << clustalw::userParameters->getBootNumTrials() << " bootstrap trials."; cerr << "\n This may not be fatal but you have been warned!" << "\n"; cerr << "\n SUGGESTIONS: 1) turn off the correction"; cerr << "\n or 2) remove the most distant sequences"; cerr << "\n or 3) use the PHYLIP package.\n\n"; if (clustalw::userParameters->getMenuFlag()) { string dummy; clustalw::utilityObject->getStr(string("Press [RETURN] to continue"), dummy); } } bool ClusterTree::checkIfConditionsMet(int numSeqs, int min) { if (clustalw::userParameters->getEmpty()) { clustalw::utilityObject->error("You must load an alignment first"); return false; } if (numSeqs < min) { clustalw::utilityObject->error("Alignment has only %d sequences", numSeqs); return false; } return true; } } clustalx-2.1/clustalW/tree/AlignmentSteps.h0000644000175000017500000000231411470725216020661 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * The AlignmentSteps class is used to hold the progressive alignment steps that have * been calculated from the guide tree. * * Note: I have pushed an empty vector onto steps, so that the steps will match up * with the old sets array. * ***************************************************************************************/ #ifndef ALIGNMENTSTEPS_H #define ALIGNMENTSTEPS_H #include #include #include using namespace std; namespace clustalw { class AlignmentSteps { public: /* Functions */ AlignmentSteps() : numSteps(0){steps.push_back(vector());}; // Empty vector void saveSet(int n, int *groups); void saveSet(vector* groups); int getNumSteps(); string getNextStep(); void printAlignSteps(); const vector >* getSteps(){return &steps;}; vector* getStep(int i){return &steps[i];}; void clear(); /* Attributes */ private: /* Functions */ /* Attributes */ vector > steps; int numSteps; }; } #endif clustalx-2.1/clustalW/tree/AlignmentSteps.cpp0000644000175000017500000000212511470725216021214 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "AlignmentSteps.h" namespace clustalw { void AlignmentSteps::saveSet(int n, int *groups) { vector tempVec; tempVec.resize(n + 1); tempVec[0] = 0; for(int i = 1; i < n + 1; i++) { tempVec[i] = groups[i - 1]; } steps.push_back(tempVec); numSteps++; } void AlignmentSteps::saveSet(vector* groups) { steps.push_back(*groups); numSteps++; } int AlignmentSteps::getNumSteps() { return numSteps; } void AlignmentSteps::printAlignSteps() { int rows = steps.size(); for(int i = 1; i < rows; i++) { for(int j = 1; j < (int)steps[i].size(); j++) { cout << " " << steps[i][j]; } cout << "\n"; } cout << "\n\n"; } void AlignmentSteps::clear() { int size = steps.size(); for(int i = 0; i < size; i++) { steps[i].clear(); } steps.clear(); steps.push_back(vector()); numSteps = 0; } } clustalx-2.1/clustalW/tree/0000755000175000017500000000000011470725216015553 5ustar ddineenddineenclustalx-2.1/clustalW/substitutionMatrix/matrices.h0000644000175000017500000011602011470725216022535 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef MATRICES_H #define MATRICES_H namespace clustalw { short blosum30mt[]={ 4, 0, 5, -3, -2, 17, 0, 5, -3, 9, 0, 0, 1, 1, 6, -2, -3, -3, -5, -4, 10, 0, 0, -4, -1, -2, -3, 8, -2, -2, -5, -2, 0, -3, -3, 14, 0, -2, -2, -4, -3, 0, -1, -2, 6, 0, 0, -3, 0, 2, -1, -1, -2, -2, 4, -1, -1, 0, -1, -1, 2, -2, -1, 2, -2, 4, 1, -2, -2, -3, -1, -2, -2, 2, 1, 2, 2, 6, 0, 4, -1, 1, -1, -1, 0, -1, 0, 0, -2, 0, 8, -1, -2, -3, -1, 1, -4, -1, 1, -3, 1, -3, -4, -3, 11, 1, -1, -2, -1, 2, -3, -2, 0, -2, 0, -2, -1, -1, 0, 8, -1, -2, -2, -1, -1, -1, -2, -1, -3, 1, -2, 0, -2, -1, 3, 8, 1, 0, -2, 0, 0, -1, 0, -1, -1, 0, -2, -2, 0, -1, -1, -1, 4, 1, 0, -2, -1, -2, -2, -2, -2, 0, -1, 0, 0, 1, 0, 0, -3, 2, 5, 1, -2, -2, -2, -3, 1, -3, -3, 4, -2, 1, 0, -2, -4, -3, -1, -1, 1, 5, -5, -5, -2, -4, -1, 1, 1, -5, -3, -2, -2, -3, -7, -3, -1, 0, -3, -5, -3, 20, 0, -1, -2, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, -2, -1, -4, -3, -6, -1, -2, 3, -3, 0, -1, -1, 3, -1, -4, -2, -1, 0, -2, -1, 1, 5, -1, 9, 0, 0, 0, 0, 5, -4, -2, 0, -3, 1, -1, -1, -1, 0, 4, 0, -1, -1, -3, -1, 0, -2, 4}; /* short blosum35mt[]={ 5, -1, 5, -2, -2, 15, -1, 5, -3, 8, -1, 0, -1, 2, 6, -2, -2, -4, -3, -3, 8, 0, 0, -3, -2, -2, -3, 7, -2, 0, -4, 0, -1, -3, -2, 12, -1, -2, -4, -3, -3, 1, -3, -3, 5, 0, 0, -2, -1, 1, -1, -1, -2, -2, 5, -2, -2, -2, -2, -1, 2, -3, -2, 2, -2, 5, 0, -2, -4, -3, -2, 0, -1, 1, 1, 0, 3, 6, -1, 4, -1, 1, -1, -1, 1, 1, -1, 0, -2, -1, 7, -2, -1, -4, -1, 0, -4, -2, -1, -1, 0, -3, -3, -2, 10, 0, 0, -3, -1, 2, -4, -2, -1, -2, 0, -2, -1, 1, 0, 7, -1, -1, -3, -1, -1, -1, -2, -1, -3, 2, -2, 0, -1, -2, 2, 8, 1, 0, -3, -1, 0, -1, 1, -1, -2, 0, -2, -1, 0, -2, 0, -1, 4, 0, -1, -1, -1, -1, -1, -2, -2, -1, 0, 0, 0, 0, 0, 0, -2, 2, 5, 0, -2, -2, -2, -2, 1, -3, -4, 4, -2, 2, 1, -2, -3, -3, -1, -1, 1, 5, -2, -3, -5, -3, -1, 1, -1, -4, -1, 0, 0, 1, -2, -4, -1, 0, -2, -2, -2, 16, 0, -1, -2, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, -1, -1, -1, -2, -5, -2, -1, 3, -2, 0, 0, -1, 0, 0, -2, -3, 0, 0, -1, -2, 0, 3, -1, 8, -1, 0, -2, 1, 5, -3, -2, -1, -3, 1, -2, -2, 0, 0, 4, 0, 0, -1, -2, -1, 0, -1, 4}; */ short blosum40mt[]={ 5, -1, 5, -2, -2, 16, -1, 6, -2, 9, -1, 1, -2, 2, 7, -3, -3, -2, -4, -3, 9, 1, -1, -3, -2, -3, -3, 8, -2, 0, -4, 0, 0, -2, -2, 13, -1, -3, -4, -4, -4, 1, -4, -3, 6, -1, 0, -3, 0, 1, -3, -2, -1, -3, 6, -2, -3, -2, -3, -2, 2, -4, -2, 2, -2, 6, -1, -3, -3, -3, -2, 0, -2, 1, 1, -1, 3, 7, -1, 4, -2, 2, -1, -3, 0, 1, -2, 0, -3, -2, 8, -2, -2, -5, -2, 0, -4, -1, -2, -2, -1, -4, -2, -2, 11, 0, 0, -4, -1, 2, -4, -2, 0, -3, 1, -2, -1, 1, -2, 8, -2, -1, -3, -1, -1, -2, -3, 0, -3, 3, -2, -1, 0, -3, 2, 9, 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -3, -2, 1, -1, 1, -1, 5, 0, 0, -1, -1, -1, -1, -2, -2, -1, 0, -1, -1, 0, 0, -1, -2, 2, 6, 0, -3, -2, -3, -3, 0, -4, -4, 4, -2, 2, 1, -3, -3, -3, -2, -1, 1, 5, -3, -4, -6, -5, -2, 1, -2, -5, -3, -2, -1, -2, -4, -4, -1, -2, -5, -4, -3, 19, 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -2, -1, -1, 0, 0, -1, -2, -1, -2, -3, -4, -3, -2, 4, -3, 2, 0, -1, 0, 1, -2, -3, -1, -1, -2, -1, -1, 3, -1, 9, -1, 2, -3, 1, 5, -4, -2, 0, -4, 1, -2, -2, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5}; short blosum45mt[]={ 5, -1, 4, -1, -2, 12, -2, 5, -3, 7, -1, 1, -3, 2, 6, -2, -3, -2, -4, -3, 8, 0, -1, -3, -1, -2, -3, 7, -2, 0, -3, 0, 0, -2, -2, 10, -1, -3, -3, -4, -3, 0, -4, -3, 5, -1, 0, -3, 0, 1, -3, -2, -1, -3, 5, -1, -3, -2, -3, -2, 1, -3, -2, 2, -3, 5, -1, -2, -2, -3, -2, 0, -2, 0, 2, -1, 2, 6, -1, 4, -2, 2, 0, -2, 0, 1, -2, 0, -3, -2, 6, -1, -2, -4, -1, 0, -3, -2, -2, -2, -1, -3, -2, -2, 9, -1, 0, -3, 0, 2, -4, -2, 1, -2, 1, -2, 0, 0, -1, 6, -2, -1, -3, -1, 0, -2, -2, 0, -3, 3, -2, -1, 0, -2, 1, 7, 1, 0, -1, 0, 0, -2, 0, -1, -2, -1, -3, -2, 1, -1, 0, -1, 4, 0, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, 0, -3, -1, -3, -3, 0, -3, -3, 3, -2, 1, 1, -3, -3, -3, -2, -1, 0, 5, -2, -4, -5, -4, -3, 1, -2, -3, -2, -2, -2, -2, -4, -3, -2, -2, -4, -3, -3, 15, 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -2, -1, -2, -2, -3, -2, -2, 3, -3, 2, 0, -1, 0, 0, -2, -3, -1, -1, -2, -1, -1, 3, -1, 8, -1, 2, -3, 1, 4, -3, -2, 0, -3, 1, -2, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 4}; /* short blosum50mt[]={ 5, -2, 5, -1, -3, 13, -2, 5, -4, 8, -1, 1, -3, 2, 6, -3, -4, -2, -5, -3, 8, 0, -1, -3, -1, -3, -4, 8, -2, 0, -3, -1, 0, -1, -2, 10, -1, -4, -2, -4, -4, 0, -4, -4, 5, -1, 0, -3, -1, 1, -4, -2, 0, -3, 6, -2, -4, -2, -4, -3, 1, -4, -3, 2, -3, 5, -1, -3, -2, -4, -2, 0, -3, -1, 2, -2, 3, 7, -1, 4, -2, 2, 0, -4, 0, 1, -3, 0, -4, -2, 7, -1, -2, -4, -1, -1, -4, -2, -2, -3, -1, -4, -3, -2, 10, -1, 0, -3, 0, 2, -4, -2, 1, -3, 2, -2, 0, 0, -1, 7, -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 7, 1, 0, -1, 0, -1, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5, 0, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -3, -3, -3, -3, -2, 0, 5, -3, -5, -5, -5, -3, 1, -3, -3, -3, -3, -2, -1, -4, -4, -1, -3, -4, -3, -3, 15, -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, -1, -3, -1, -2, -3, -3, -3, -2, 4, -3, 2, -1, -2, -1, 0, -2, -3, -1, -1, -2, -2, -1, 2, -1, 8, -1, 2, -3, 1, 5, -4, -2, 0, -3, 1, -3, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5}; short blosum55mt[]={ 5, -2, 5, 0, -4, 13, -2, 5, -4, 8, -1, 1, -4, 2, 7, -3, -5, -3, -5, -4, 9, 0, -1, -3, -2, -3, -4, 8, -2, 0, -4, -1, -1, -1, -2, 11, -2, -4, -2, -4, -4, 0, -5, -4, 6, -1, 0, -4, -1, 1, -4, -2, 0, -4, 6, -2, -4, -2, -5, -4, 1, -5, -3, 2, -3, 6, -1, -3, -2, -4, -3, 0, -3, -2, 2, -2, 3, 8, -2, 4, -3, 2, 0, -4, 0, 1, -4, 0, -4, -3, 8, -1, -2, -3, -2, -1, -5, -3, -3, -3, -1, -4, -3, -2, 10, -1, 0, -4, 0, 2, -4, -2, 1, -4, 2, -3, 0, 0, -1, 7, -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 8, 2, 0, -1, 0, 0, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5, 0, -1, -1, -1, -1, -3, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 2, 6, 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -4, -3, -3, -3, -2, 0, 5, -4, -5, -4, -5, -3, 2, -3, -3, -3, -4, -3, -2, -5, -5, -2, -3, -4, -3, -4, 15, -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1, -2, -3, -3, -3, -2, 4, -4, 2, -1, -2, -1, -1, -2, -4, -1, -2, -2, -2, -2, 3, -1, 9, -1, 2, -4, 1, 5, -4, -3, 0, -4, 1, -3, -2, 0, -1, 4, 0, 0, -1, -3, -3, -1, -2, 5}; short blosum62mt[]={ 4, -2, 4, 0, -3, 9, -2, 4, -3, 6, -1, 1, -4, 2, 5, -2, -3, -2, -3, -3, 6, 0, -1, -3, -1, -2, -3, 6, -2, 0, -3, -1, 0, -1, -2, 8, -1, -3, -1, -3, -3, 0, -4, -3, 4, -1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4}; */ short blosum62mt2[]={ 8, -4, 8, 0, -6, 18, -4, 8, -6, 12, -2, 2, -8, 4, 10, -4, -6, -4, -6, -6, 12, 0, -2, -6, -2, -4, -6, 12, -4, 0, -6, -2, 0, -2, -4, 16, -2, -6, -2, -6, -6, 0, -8, -6, 8, -2, 0, -6, -2, 2, -6, -4, -2, -6, 10, -2, -8, -2, -8, -6, 0, -8, -6, 4, -4, 8, -2, -6, -2, -6, -4, 0, -6, -4, 2, -2, 4, 10, -4, 6, -6, 2, 0, -6, 0, 2, -6, 0, -6, -4, 12, -2, -4, -6, -2, -2, -8, -4, -4, -6, -2, -6, -4, -4, 14, -2, 0, -6, 0, 4, -6, -4, 0, -6, 2, -4, 0, 0, -2, 10, -2, -2, -6, -4, 0, -6, -4, 0, -6, 4, -4, -2, 0, -4, 2, 10, 2, 0, -2, 0, 0, -4, 0, -2, -4, 0, -4, -2, 2, -2, 0, -2, 8, 0, -2, -2, -2, -2, -4, -4, -4, -2, -2, -2, -2, 0, -2, -2, -2, 2, 10, 0, -6, -2, -6, -4, -2, -6, -6, 6, -4, 2, 2, -6, -4, -4, -6, -4, 0, 8, -6, -8, -4, -8, -6, 2, -4, -4, -6, -6, -4, -2, -8, -8, -4, -6, -6, -4, -6, 22, 0, -2, -4, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -4, -2, -2, 0, 0, -2, -4, -2, -4, -6, -4, -6, -4, 6, -6, 4, -2, -4, -2, -2, -4, -6, -2, -4, -4, -4, -2, 4, -2, 14, -2, 2, -6, 2, 8, -6, -4, 0, -6, 2, -6, -2, 0, -2, 6, 0, 0, -2, -4, -6, -2, -4, 8}; /* short blosum65mt[]={ 4, -2, 4, 0, -3, 9, -2, 4, -4, 6, -1, 1, -4, 2, 5, -2, -3, -2, -4, -3, 6, 0, -1, -3, -1, -2, -3, 6, -2, 0, -3, -1, 0, -1, -2, 8, -1, -3, -1, -3, -3, 0, -4, -3, 4, -1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -4, -1, -4, -3, 0, -4, -3, 2, -3, 4, -1, -3, -2, -3, -2, 0, -3, -2, 1, -2, 2, 6, -2, 3, -3, 1, 0, -3, -1, 1, -3, 0, -4, -2, 6, -1, -2, -3, -2, -1, -4, -2, -2, -3, -1, -3, -3, -2, 8, -1, 0, -3, 0, 2, -3, -2, 1, -3, 1, -2, 0, 0, -1, 6, -1, -1, -4, -2, 0, -3, -2, 0, -3, 2, -2, -2, 0, -2, 1, 6, 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -3, -2, 1, -1, 0, -1, 4, 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -3, -1, -3, -3, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -4, -2, -5, -3, 1, -3, -2, -2, -3, -2, -2, -4, -4, -2, -3, -3, -3, -3, 10, -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -2, -1, -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -2, -2, -2, -2, -1, 2, -1, 7, -1, 1, -4, 1, 4, -3, -2, 0, -3, 1, -3, -2, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4}; short blosum70mt[]={ 4, -2, 4, -1, -4, 9, -2, 4, -4, 6, -1, 1, -4, 1, 5, -2, -4, -2, -4, -4, 6, 0, -1, -3, -2, -2, -4, 6, -2, -1, -4, -1, 0, -1, -2, 8, -2, -4, -1, -4, -4, 0, -4, -4, 4, -1, -1, -4, -1, 1, -3, -2, -1, -3, 5, -2, -4, -2, -4, -3, 0, -4, -3, 2, -3, 4, -1, -3, -2, -3, -2, 0, -3, -2, 1, -2, 2, 6, -2, 3, -3, 1, 0, -3, -1, 0, -4, 0, -4, -2, 6, -1, -2, -3, -2, -1, -4, -3, -2, -3, -1, -3, -3, -2, 8, -1, 0, -3, -1, 2, -3, -2, 1, -3, 1, -2, 0, 0, -2, 6, -2, -1, -4, -2, 0, -3, -3, 0, -3, 2, -3, -2, -1, -2, 1, 6, 1, 0, -1, 0, 0, -3, -1, -1, -3, 0, -3, -2, 0, -1, 0, -1, 4, 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 1, 5, 0, -3, -1, -4, -3, -1, -4, -3, 3, -3, 1, 1, -3, -3, -2, -3, -2, 0, 4, -3, -4, -3, -5, -4, 1, -3, -2, -3, -3, -2, -2, -4, -4, -2, -3, -3, -3, -3, 11, -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1, -2, -3, -3, -4, -3, 3, -4, 2, -1, -2, -1, -1, -2, -3, -2, -2, -2, -2, -2, 2, -2, 7, -1, 0, -4, 1, 4, -4, -2, 0, -3, 1, -3, -2, 0, -1, 3, 0, 0, -1, -3, -3, -1, -2, 4}; short blosum75mt[]={ 4, -2, 4, -1, -4, 9, -2, 4, -4, 6, -1, 1, -5, 1, 5, -3, -4, -2, -4, -4, 6, 0, -1, -3, -2, -3, -4, 6, -2, -1, -4, -1, 0, -2, -2, 8, -2, -4, -1, -4, -4, 0, -5, -4, 4, -1, -1, -4, -1, 1, -4, -2, -1, -3, 5, -2, -4, -2, -4, -4, 0, -4, -3, 1, -3, 4, -1, -3, -2, -4, -2, 0, -3, -2, 1, -2, 2, 6, -2, 3, -3, 1, -1, -4, -1, 0, -4, 0, -4, -3, 6, -1, -2, -4, -2, -1, -4, -3, -2, -3, -1, -3, -3, -3, 8, -1, 0, -3, -1, 2, -4, -2, 1, -3, 1, -3, 0, 0, -2, 6, -2, -1, -4, -2, 0, -3, -3, 0, -3, 2, -3, -2, -1, -2, 1, 6, 1, 0, -1, -1, 0, -3, -1, -1, -3, 0, -3, -2, 0, -1, 0, -1, 5, 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 1, 5, 0, -4, -1, -4, -3, -1, -4, -4, 3, -3, 1, 1, -3, -3, -2, -3, -2, 0, 4, -3, -5, -3, -5, -4, 1, -3, -2, -3, -4, -2, -2, -4, -5, -2, -3, -3, -3, -3, 11, -1, -2, -2, -2, -1, -2, -2, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1, -2, -3, -3, -4, -3, 3, -4, 2, -2, -2, -1, -2, -3, -4, -2, -2, -2, -2, -2, 2, -2, 7, -1, 0, -4, 1, 4, -4, -2, 0, -4, 1, -3, -2, 0, -2, 3, 0, 0, -1, -3, -3, -1, -3, 4}; */ short blosum80mt[]={ 7, -3, 6, -1, -6, 13, -3, 6, -7, 10, -2, 1, -7, 2, 8, -4, -6, -4, -6, -6, 10, 0, -2, -6, -3, -4, -6, 9, -3, -1, -7, -2, 0, -2, -4, 12, -3, -6, -2, -7, -6, -1, -7, -6, 7, -1, -1, -6, -2, 1, -5, -3, -1, -5, 8, -3, -7, -3, -7, -6, 0, -7, -5, 2, -4, 6, -2, -5, -3, -6, -4, 0, -5, -4, 2, -3, 3, 9, -3, 5, -5, 2, -1, -6, -1, 1, -6, 0, -6, -4, 9, -1, -4, -6, -3, -2, -6, -5, -4, -5, -2, -5, -4, -4, 12, -2, -1, -5, -1, 3, -5, -4, 1, -5, 2, -4, -1, 0, -3, 9, -3, -2, -6, -3, -1, -5, -4, 0, -5, 3, -4, -3, -1, -3, 1, 9, 2, 0, -2, -1, -1, -4, -1, -2, -4, -1, -4, -3, 1, -2, -1, -2, 7, 0, -1, -2, -2, -2, -4, -3, -3, -2, -1, -3, -1, 0, -3, -1, -2, 2, 8, -1, -6, -2, -6, -4, -2, -6, -5, 4, -4, 1, 1, -5, -4, -4, -4, -3, 0, 7, -5, -8, -5, -8, -6, 0, -6, -4, -5, -6, -4, -3, -7, -7, -4, -5, -6, -5, -5, 16, -1, -3, -4, -3, -2, -3, -3, -2, -2, -2, -2, -2, -2, -3, -2, -2, -1, -1, -2, -5, -2, -4, -5, -5, -6, -5, 4, -6, 3, -3, -4, -2, -3, -4, -6, -3, -4, -3, -3, -3, 3, -3, 11, -2, 0, -7, 1, 6, -6, -4, 0, -6, 1, -5, -3, -1, -2, 5, 0, -1, -2, -4, -5, -1, -4, 6}; /* short blosum85mt[]={ 5, -2, 4, -1, -4, 9, -2, 4, -5, 7, -1, 0, -5, 1, 6, -3, -4, -3, -4, -4, 7, 0, -1, -4, -2, -3, -4, 6, -2, -1, -5, -2, -1, -2, -3, 8, -2, -5, -2, -5, -4, -1, -5, -4, 5, -1, -1, -4, -1, 0, -4, -2, -1, -3, 6, -2, -5, -2, -5, -4, 0, -5, -3, 1, -3, 4, -2, -4, -2, -4, -3, -1, -4, -3, 1, -2, 2, 7, -2, 4, -4, 1, -1, -4, -1, 0, -4, 0, -4, -3, 7, -1, -3, -4, -2, -2, -4, -3, -3, -4, -2, -4, -3, -3, 8, -1, -1, -4, -1, 2, -4, -3, 1, -4, 1, -3, 0, 0, -2, 6, -2, -2, -4, -2, -1, -4, -3, 0, -4, 2, -3, -2, -1, -2, 1, 6, 1, 0, -2, -1, -1, -3, -1, -1, -3, -1, -3, -2, 0, -1, -1, -1, 5, 0, -1, -2, -2, -1, -3, -2, -2, -1, -1, -2, -1, 0, -2, -1, -2, 1, 5, -1, -4, -1, -4, -3, -1, -4, -4, 3, -3, 0, 0, -4, -3, -3, -3, -2, 0, 5, -3, -5, -4, -6, -4, 0, -4, -3, -3, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11, -1, -2, -3, -2, -1, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -1, -3, -2, -3, -4, -3, -4, -4, 3, -5, 2, -2, -3, -2, -2, -3, -4, -2, -3, -2, -2, -2, 2, -2, 7, -1, 0, -5, 1, 4, -4, -3, 0, -4, 1, -4, -2, -1, -2, 4, 0, -1, -1, -3, -4, -1, -3, 4}; short blosum90mt[]={ 5, -2, 4, -1, -4, 9, -3, 4, -5, 7, -1, 0, -6, 1, 6, -3, -4, -3, -5, -5, 7, 0, -2, -4, -2, -3, -5, 6, -2, -1, -5, -2, -1, -2, -3, 8, -2, -5, -2, -5, -4, -1, -5, -4, 5, -1, -1, -4, -1, 0, -4, -2, -1, -4, 6, -2, -5, -2, -5, -4, 0, -5, -4, 1, -3, 5, -2, -4, -2, -4, -3, -1, -4, -3, 1, -2, 2, 7, -2, 4, -4, 1, -1, -4, -1, 0, -4, 0, -4, -3, 7, -1, -3, -4, -3, -2, -4, -3, -3, -4, -2, -4, -3, -3, 8, -1, -1, -4, -1, 2, -4, -3, 1, -4, 1, -3, 0, 0, -2, 7, -2, -2, -5, -3, -1, -4, -3, 0, -4, 2, -3, -2, -1, -3, 1, 6, 1, 0, -2, -1, -1, -3, -1, -2, -3, -1, -3, -2, 0, -2, -1, -1, 5, 0, -1, -2, -2, -1, -3, -3, -2, -1, -1, -2, -1, 0, -2, -1, -2, 1, 6, -1, -4, -2, -5, -3, -2, -5, -4, 3, -3, 0, 0, -4, -3, -3, -3, -2, -1, 5, -4, -6, -4, -6, -5, 0, -4, -3, -4, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11, -1, -2, -3, -2, -2, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -2, -3, -2, -3, -4, -4, -4, -4, 3, -5, 1, -2, -3, -2, -2, -3, -4, -3, -3, -3, -2, -3, 2, -2, 8, -1, 0, -5, 0, 4, -4, -3, 0, -4, 1, -4, -2, -1, -2, 4, 0, -1, -1, -3, -4, -1, -3, 4}; */ short pam20mt[]={ 6, -5, 6, -8,-14, 10, -4, 6,-16, 8, -3, 0,-16, 2, 8, -9,-12,-15,-17,-16, 9, -3, -4,-11, -4, -5,-10, 7, -8, -2, -8, -5, -6, -7,-10, 9, -6, -7, -7, -9, -6, -3,-13,-11, 9, -8, -3,-16, -6, -5,-16, -8, -8, -7, 7, -7,-10,-17,-15,-10, -4,-12, -7, -2, -9, 7, -6,-12,-16,-13, -8, -5,-10,-13, -2, -3, 0, 11, -5, 6,-13, 1, -3,-10, -4, -1, -6, -2, -8,-11, 8, -2, -8, -9, -9, -7,-11, -7, -5,-10, -8, -8, -9, -7, 8, -5, -4,-16, -4, 0,-15, -8, 0, -9, -4, -6, -5, -5, -4, 9, -8, -9, -9,-12,-11,-10,-11, -3, -6, -1,-10, -5, -7, -5, -2, 9, -1, -2, -4, -5, -5, -7, -3, -7, -8, -5, -9, -6, -1, -3, -6, -4, 7, -1, -4, -9, -6, -7,-10, -7, -8, -3, -4, -8, -5, -3, -5, -7, -8, 0, 7, -3, -9, -7, -9, -8, -9, -7, -7, 1,-10, -3, -2, -9, -7, -8, -9, -8, -4, 7, -16,-11,-18,-17,-19, -6,-17, -8,-16,-14, -7,-15, -9,-16,-15, -3, -6,-15,-18, 13, -4, -6,-11, -7, -6, -9, -6, -6, -6, -6, -7, -6, -4, -6, -6, -7, -4, -5, -6,-13, -6, -9, -7, -5,-13, -9, 1,-16, -4, -7,-10, -8,-13, -5,-16,-14,-11, -8, -7, -8, -6, -9, 10, -4, -1,-16, 0, 6,-16, -6, -2, -7, -5, -8, -6, -4, -5, 7, -5, -6, -7, -8,-17, -6,-11, 6}; short pam60mt[]={ 5, -2, 5, -5, -9, 9, -2, 5,-10, 7, -1, 2,-10, 3, 7, -6, -8, -9,-11,-10, 8, 0, -2, -7, -2, -2, -7, 6, -5, 0, -6, -2, -3, -4, -6, 8, -3, -4, -4, -5, -4, -1, -7, -6, 7, -5, -1,-10, -2, -3,-10, -5, -4, -4, 6, -4, -7,-11, -9, -7, -1, -8, -4, 0, -6, 6, -3, -6,-10, -7, -5, -2, -6, -7, 1, 0, 2, 10, -2, 5, -7, 2, 0, -6, -1, 1, -4, 0, -5, -6, 6, 0, -4, -6, -5, -3, -7, -4, -2, -6, -4, -5, -6, -4, 7, -3, -1,-10, -1, 2, -9, -5, 2, -5, -1, -3, -2, -2, -1, 7, -5, -5, -6, -6, -6, -7, -7, 0, -4, 2, -6, -2, -3, -2, 0, 8, 1, 0, -1, -2, -2, -5, 0, -4, -4, -2, -6, -4, 1, 0, -3, -2, 5, 1, -2, -5, -3, -4, -6, -3, -5, -1, -2, -5, -2, -1, -2, -4, -4, 1, 6, -1, -5, -4, -6, -4, -5, -4, -5, 3, -6, -1, 0, -5, -4, -5, -5, -4, -1, 6, -10, -8,-12,-11,-12, -3,-11, -5,-10, -8, -4, -9, -6,-10, -9, 0, -4, -9,-11, 13, -2, -3, -6, -3, -3, -5, -3, -3, -3, -3, -4, -3, -2, -3, -3, -4, -2, -2, -3, -8, -3, -6, -5, -2, -8, -7, 3,-10, -2, -4, -7, -5, -7, -3,-10, -8, -8, -5, -5, -5, -3, -5, 9, -2, 1,-10, 2, 5,-10, -3, 0, -4, -2, -5, -4, -1, -2, 6, -2, -3, -4, -5,-11, -3, -7, 5}; short pam120mt[]={ 3, 0, 4, -3, -6, 9, 0, 4, -7, 5, 0, 3, -7, 3, 5, -4, -5, -6, -7, -7, 8, 1, 0, -4, 0, -1, -5, 5, -3, 1, -4, 0, -1, -3, -4, 7, -1, -3, -3, -3, -3, 0, -4, -4, 6, -2, 0, -7, -1, -1, -7, -3, -2, -3, 5, -3, -4, -7, -5, -4, 0, -5, -3, 1, -4, 5, -2, -4, -6, -4, -3, -1, -4, -4, 1, 0, 3, 8, -1, 3, -5, 2, 1, -4, 0, 2, -2, 1, -4, -3, 4, 1, -2, -4, -3, -2, -5, -2, -1, -3, -2, -3, -3, -2, 6, -1, 0, -7, 1, 2, -6, -3, 3, -3, 0, -2, -1, 0, 0, 6, -3, -2, -4, -3, -3, -5, -4, 1, -2, 2, -4, -1, -1, -1, 1, 6, 1, 0, 0, 0, -1, -3, 1, -2, -2, -1, -4, -2, 1, 1, -2, -1, 3, 1, 0, -3, -1, -2, -4, -1, -3, 0, -1, -3, -1, 0, -1, -2, -2, 2, 4, 0, -3, -3, -3, -3, -3, -2, -3, 3, -4, 1, 1, -3, -2, -3, -3, -2, 0, 5, -7, -6, -8, -8, -8, -1, -8, -3, -6, -5, -3, -6, -4, -7, -6, 1, -2, -6, -8, 12, -1, -1, -4, -2, -1, -3, -2, -2, -1, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -5, -2, -4, -3, -1, -5, -5, 4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, -3, 8, -1, 2, -7, 3, 4, -6, -2, 1, -3, -1, -3, -2, 0, -1, 4, -1, -1, -2, -3, -7, -1, -5, 4}; /* short pam160mt[]={ 2, 0, 3, -2, -4, 9, 0, 3, -5, 4, 0, 2, -5, 3, 4, -3, -4, -5, -6, -5, 7, 1, 0, -3, 0, 0, -4, 4, -2, 1, -3, 0, 0, -2, -3, 6, -1, -2, -2, -3, -2, 0, -3, -3, 5, -2, 0, -5, 0, -1, -5, -2, -1, -2, 4, -2, -4, -6, -4, -3, 1, -4, -2, 2, -3, 5, -1, -3, -5, -3, -2, 0, -3, -3, 2, 0, 3, 7, 0, 2, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 3, 1, -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -2, -1, 5, -1, 1, -5, 1, 2, -5, -2, 2, -2, 0, -2, -1, 0, 0, 5, -2, -1, -3, -2, -2, -4, -3, 1, -2, 3, -3, -1, -1, -1, 1, 6, 1, 0, 0, 0, 0, -3, 1, -1, -2, -1, -3, -2, 1, 1, -1, -1, 2, 1, 0, -2, -1, -1, -3, -1, -2, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0, -2, -2, -3, -2, -2, -2, -2, 3, -3, 1, 1, -2, -2, -2, -3, -1, 0, 4, -5, -5, -7, -6, -7, -1, -7, -3, -5, -4, -2, -4, -4, -5, -5, 1, -2, -5, -6, 12, 0, -1, -3, -1, -1, -3, -1, -1, -1, -1, -2, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1, -3, -3, 0, -4, -4, 5, -5, 0, -2, -4, -2, -3, -2, -5, -4, -4, -3, -3, -3, -1, -3, 8, 0, 2, -5, 2, 3, -5, -1, 1, -2, 0, -3, -2, 1, -1, 3, 0, -1, -1, -2, -6, -1, -4, 3}; short pam250mt[]={ 2, 0, 3, -2, -4, 12, 0, 3, -5, 4, 0, 3, -5, 3, 4, -3, -4, -4, -6, -5, 9, 1, 0, -3, 1, 0, -5, 5, -1, 1, -3, 1, 1, -2, -2, 6, -1, -2, -2, -2, -2, 1, -3, -2, 5, -1, 1, -5, 0, 0, -5, -2, 0, -2, 5, -2, -3, -6, -4, -3, 2, -4, -2, 2, -3, 6, -1, -2, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6, 0, 2, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 2, 1, -1, -3, -1, -1, -5, 0, 0, -2, -1, -3, -2, 0, 6, 0, 1, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, -2, -1, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, 1, 0, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, 1, 0, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0, -2, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0, 4, -6, -5, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2, -5, -6, 17, 0, -1, -3, -1, -1, -2, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1, -3, -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2, 0, -2, 10, 0, 2, -5, 3, 3, -5, 0, 2, -2, 0, -3, -2, 1, 0, 3, 0, 0, -1, -2, -6, -1, -4, 3}; */ short pam350mt[]={ 2, 1, 3, -2, -5, 18, 1, 3, -6, 4, 1, 3, -6, 4, 4, -4, -5, -5, -6, -6, 13, 2, 1, -4, 1, 1, -6, 5, -1, 1, -4, 1, 1, -2, -2, 7, 0, -2, -3, -2, -2, 2, -2, -2, 5, -1, 1, -6, 1, 0, -6, -1, 1, -2, 5, -2, -4, -7, -4, -4, 3, -4, -2, 4, -3, 8, -1, -2, -6, -3, -2, 1, -3, -2, 3, 0, 5, 6, 0, 2, -4, 2, 2, -4, 1, 2, -2, 1, -3, -2, 2, 1, 0, -3, 0, 0, -5, 0, 0, -2, -1, -3, -2, 0, 6, 0, 2, -6, 2, 3, -5, -1, 3, -2, 1, -2, -1, 1, 1, 4, -1, 0, -4, -1, 0, -5, -2, 2, -2, 4, -3, 0, 1, 0, 2, 7, 1, 1, 0, 1, 0, -4, 1, -1, -1, 0, -3, -2, 1, 1, 0, 0, 1, 1, 0, -2, 0, 0, -3, 1, -1, 0, 0, -2, -1, 1, 1, 0, -1, 1, 2, 0, -2, -2, -2, -2, -1, -1, -2, 4, -2, 3, 2, -2, -1, -2, -3, -1, 0, 5, -7, -6,-10, -8, -8, 1, -8, -3, -6, -4, -2, -5, -5, -7, -5, 4, -3, -6, -7, 27, 0, 0, -3, -1, 0, -2, -1, 0, 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, 0, -5, -1, -4, -4, 1, -5, -5, 11, -6, 0, 0, -5, 0, -2, -3, -6, -5, -5, -3, -3, -2, 1, -2, 14, 0, 2, -6, 3, 3, -6, 0, 2, -2, 1, -3, -2, 2, 0, 3, 1, 0, 0, -2, -7, 0, -5, 3}; /* short md_40mt[]={ 9, 0, 0, -7, 0, 16, -6, 0,-13, 11, -5, 0,-15, 3, 11, -11, 0, -5,-15,-16, 13, -3, 0, -7, -4, -4,-15, 10, -9, 0, -6, -4, -8, -7,-10, 14, -6, 0,-11,-12,-12, -5,-13,-11, 11, -8, 0,-12, -8, -3,-16, -9, -6,-11, 11, -9, 0,-10,-14,-13, -1,-14, -7, -1,-12, 9, -6, 0, -9,-12,-11, -7,-12, -9, 1, -7, 1, 14, -6, 0, -8, 1, -5,-12, -5, 0, -8, -1,-12, -9, 12, -2, 0,-11,-11,-11,-11, -9, -4,-11,-10, -5,-10, -9, 12, -7, 0,-12, -6, 0,-14, -9, 2,-12, -1, -6, -8, -5, -3, 12, -7, 0, -5,-10, -8,-15, -4, 0,-10, 3, -9, -8, -6, -6, 0, 11, 0, 0, -2, -6, -8, -6, -2, -6, -8, -7, -7, -8, 1, -1, -7, -5, 9, 1, 0, -7, -8, -8,-11, -7, -7, -2, -5, -9, -2, -2, -4, -7, -6, 1, 10, -1, 0, -7, -9, -8, -6, -8,-12, 4,-12, -2, 0,-10, -9,-11,-11, -7, -4, 10, -14, 0, -4,-15,-15, -7, -7,-13,-13,-13, -8,-11,-14,-14,-11, -4, -9,-12,-10, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -13, 0, -2, -8,-14, 2,-13, 2, -9,-13, -9,-11, -6,-13, -9,-10, -7,-10,-11, -6, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short md_120mt[]={ 6, 0, 0, -3, 0, 14, -2, 0, -7, 8, -2, 0, -8, 5, 8, -6, 0, -2, -9,-10, 11, 0, 0, -3, 0, -1, -9, 8, -4, 0, -2, -1, -3, -2, -4, 11, -1, 0, -5, -7, -7, -1, -6, -6, 7, -4, 0, -6, -2, 0, -9, -4, -1, -6, 8, -4, 0, -5, -8, -8, 2, -8, -4, 2, -6, 7, -2, 0, -5, -7, -6, -2, -6, -5, 3, -4, 3, 10, -1, 0, -3, 3, -1, -6, -1, 2, -4, 1, -6, -5, 8, 0, 0, -5, -5, -5, -5, -4, -1, -5, -4, -2, -5, -3, 9, -3, 0, -6, -1, 2, -7, -4, 4, -6, 2, -3, -4, -1, 0, 9, -3, 0, -2, -4, -3, -8, -1, 2, -6, 4, -5, -4, -2, -2, 2, 8, 2, 0, 0, -2, -3, -3, 0, -2, -3, -3, -3, -3, 2, 1, -3, -2, 5, 2, 0, -3, -3, -4, -6, -2, -3, 0, -2, -4, 0, 1, 0, -3, -3, 2, 6, 1, 0, -3, -5, -5, -2, -4, -6, 5, -6, 1, 2, -5, -4, -6, -6, -3, 0, 7, -8, 0, 0, -9, -9, -3, -3, -6, -7, -6, -4, -6, -8, -8, -6, -1, -5, -7, -6, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -7, 0, 2, -4, -7, 5, -8, 4, -5, -7, -4, -6, -2, -7, -4, -5, -3, -6, -6, -2, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short md_250mt[]={ 2, 0, 0, -1, 0, 11, -1, 0, -3, 5, -1, 0, -4, 4, 5, -3, 0, 0, -5, -5, 8, 1, 0, -1, 1, 1, -5, 5, -2, 0, 0, 0, 0, 0, -2, 6, 0, 0, -2, -3, -3, 0, -3, -3, 4, -1, 0, -3, 0, 1, -5, -1, 1, -3, 5, -1, 0, -2, -4, -4, 2, -4, -2, 2, -3, 5, 0, 0, -2, -3, -3, 0, -3, -2, 3, -2, 3, 6, 0, 0, -1, 2, 1, -3, 0, 1, -2, 1, -3, -2, 3, 1, 0, -2, -2, -2, -2, -1, 0, -2, -1, 0, -2, -1, 6, -1, 0, -3, 0, 2, -4, -1, 3, -3, 2, -2, -2, 0, 0, 5, -1, 0, -1, -1, 0, -4, 0, 2, -3, 4, -3, -2, 0, -1, 2, 5, 1, 0, 1, 0, -1, -2, 1, -1, -1, -1, -2, -1, 1, 1, -1, -1, 2, 2, 0, -1, -1, -1, -2, 0, -1, 1, -1, -1, 0, 1, 1, -1, -1, 1, 2, 1, 0, -2, -3, -2, 0, -2, -3, 4, -3, 2, 2, -2, -1, -3, -3, -1, 0, 4, -4, 0, 1, -5, -5, -1, -1, -3, -4, -3, -2, -3, -4, -4, -3, 0, -3, -4, -3, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 2, -2, -4, 5, -4, 4, -2, -3, -1, -3, -1, -3, -2, -2, -1, -3, -3, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short md_350mt[]={ 1, 0, 0, 0, 0, 9, 0, 0, -2, 3, 0, 0, -2, 3, 3, -2, 0, 1, -3, -4, 6, 1, 0, 0, 1, 1, -3, 4, -1, 0, 0, 0, 0, 0, -1, 3, 0, 0, -1, -2, -2, 1, -2, -2, 3, -1, 0, -1, 0, 1, -3, 0, 1, -2, 3, -1, 0, -1, -3, -3, 2, -2, -1, 2, -2, 3, 0, 0, -1, -2, -2, 1, -2, -1, 2, -2, 2, 3, 0, 0, -1, 1, 1, -2, 0, 1, -1, 1, -2, -1, 2, 1, 0, -1, -1, -1, -2, -1, 0, -1, -1, 0, -1, 0, 4, -1, 0, -2, 1, 1, -2, 0, 2, -2, 2, -1, -1, 0, 0, 3, -1, 0, 0, 0, 0, -3, 0, 1, -2, 3, -2, -1, 0, 0, 2, 3, 1, 0, 0, 0, 0, -1, 1, 0, -1, 0, -1, -1, 1, 1, 0, 0, 1, 1, 0, 0, 0, -1, -1, 0, -1, 0, 0, -1, 0, 0, 1, -1, 0, 1, 1, 0, 0, -1, -2, -2, 0, -1, -2, 2, -2, 1, 2, -1, -1, -2, -2, 0, 0, 2, -3, 0, 1, -4, -3, 0, -1, -2, -3, -2, -1, -2, -3, -3, -2, 0, -2, -3, -2, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 2, -2, -2, 5, -3, 3, -1, -2, 0, -1, -1, -2, -1, -1, -1, -2, -2, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; */ short idmat[]={ 10, 0, 10, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10}; short gon40mt[]={ 92, 0, 0, -31, 0, 163, -56, 0,-135, 111, -37, 0,-140, 16, 105, -92, 0, -64,-152,-143, 126, -32, 0, -91, -51, -76,-152, 105, -65, 0, -67, -41, -40, -50, -81, 145, -76, 0, -87,-150,-106, -39,-158, -94, 104, -54, 0,-132, -47, -13,-127, -79, -34, -86, 103, -68, 0, -85,-155,-108, -13,-141, -85, 5, -85, 89, -45, 0, -63,-130, -80, -16,-114, -60, 10, -57, 16, 140, -62, 0, -83, 6, -38,-104, -40, -7, -99, -20,-112, -91, 115, -37, 0,-137, -69, -60,-128, -87, -71,-108, -62, -83,-119, -78, 124, -43, 0,-113, -32, 10,-100, -71, 0, -91, 2, -60, -35, -25, -46, 118, -61, 0, -86, -77, -50,-130, -69, -31,-103, 19, -84, -81, -47, -73, -6, 112, 0, 0, -35, -36, -41,-111, -37, -48, -95, -43, -95, -64, -11, -35, -35, -51, 99, -25, 0, -59, -47, -52, -90, -85, -46, -51, -34, -78, -44, -27, -42, -39, -52, 13, 100, -22, 0, -43,-133, -74, -58,-122, -98, 28, -82, -18, -22,-103, -86, -79, -88, -74, -25, 97, -120, 0, -68,-171,-131, -6,-108, -70, -93,-127, -71, -72,-119,-149, -87, -63, -98,-120,-115, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -95, 0, -56, -98,-107, 31,-129, 5, -76, -88, -64, -66, -62,-106, -81, -75, -69, -87, -73, 1, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short gon80mt[]={ 75, 0, 0, -10, 0, 154, -31, 0, -93, 96, -17, 0, -94, 31, 88, -64, 0, -39,-111,-102, 114, -11, 0, -61, -26, -47,-115, 97, -39, 0, -43, -17, -17, -26, -53, 127, -43, 0, -54,-106, -73, -15,-114, -64, 86, -30, 0, -88, -21, 4, -89, -50, -12, -59, 85, -43, 0, -55,-109, -75, 7,-104, -57, 22, -58, 77, -26, 0, -39, -88, -53, 3, -83, -38, 25, -37, 31, 117, -34, 0, -55, 21, -13, -75, -18, 9, -71, -2, -79, -62, 97, -16, 0, -93, -42, -35, -93, -58, -45, -75, -37, -58, -78, -48, 114, -22, 0, -76, -9, 23, -70, -44, 14, -60, 17, -39, -19, -6, -24, 95, -36, 0, -60, -44, -23, -90, -43, -10, -71, 33, -58, -53, -22, -45, 11, 97, 14, 0, -15, -14, -19, -77, -16, -25, -62, -20, -64, -41, 5, -14, -15, -27, 78, -5, 0, -34, -24, -27, -62, -52, -24, -28, -15, -49, -25, -7, -20, -18, -27, 25, 81, -6, 0, -21, -89, -51, -31, -86, -65, 41, -54, 3, 1, -69, -57, -51, -60, -43, -9, 80, -87, 0, -43,-124, -98, 16, -81, -43, -63, -89, -44, -45, -86,-112, -62, -41, -72, -87, -80, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -65, 0, -32, -69, -74, 49, -94, 21, -47, -60, -35, -37, -39, -76, -53, -50, -46, -58, -47, 23, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short gon120mt[]={ 59, 0, 0, -1, 0, 144, -18, 0, -69, 82, -9, 0, -68, 35, 72, -48, 0, -26, -87, -78, 102, -3, 0, -45, -14, -31, -92, 90, -26, 0, -31, -7, -6, -14, -37, 110, -27, 0, -36, -80, -55, -3, -87, -48, 72, -19, 0, -64, -8, 11, -67, -34, -2, -44, 69, -30, 0, -39, -82, -57, 15, -82, -42, 28, -44, 66, -17, 0, -26, -64, -40, 11, -65, -28, 29, -27, 34, 95, -20, 0, -41, 26, -1, -58, -7, 14, -55, 5, -61, -46, 80, -6, 0, -68, -28, -22, -72, -41, -31, -56, -24, -44, -56, -32, 105, -12, 0, -56, 1, 25, -53, -30, 17, -43, 20, -30, -14, 1, -14, 74, -23, 0, -45, -27, -10, -68, -30, -1, -53, 36, -44, -38, -10, -30, 16, 83, 16, 0, -7, -5, -9, -58, -6, -14, -44, -10, -47, -29, 10, -5, -7, -15, 60, 2, 0, -21, -13, -15, -47, -35, -14, -17, -6, -34, -16, 0, -10, -9, -16, 26, 64, 0, 0, -11, -65, -38, -17, -65, -47, 42, -39, 13, 10, -50, -42, -36, -44, -28, -3, 65, -68, 0, -29, -96, -78, 27, -66, -28, -46, -68, -29, -31, -68, -89, -49, -30, -57, -67, -59, 166, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -48, 0, -20, -53, -56, 55, -74, 26, -31, -44, -20, -22, -28, -59, -38, -37, -35, -42, -33, 33, 0, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short gon160mt[]={ 46, 0, 0, 3, 0, 135, -11, 0, -53, 70, -4, 0, -52, 34, 59, -38, 0, -18, -70, -62, 91, 2, 0, -34, -7, -21, -76, 82, -18, 0, -23, -1, -1, -7, -27, 93, -18, 0, -25, -62, -43, 3, -70, -37, 59, -12, 0, -48, -1, 13, -53, -24, 2, -35, 55, -22, 0, -29, -65, -45, 19, -67, -32, 30, -34, 57, -12, 0, -19, -50, -31, 14, -52, -21, 29, -21, 34, 76, -12, 0, -31, 26, 5, -47, -2, 15, -44, 8, -48, -36, 65, -1, 0, -52, -19, -14, -58, -30, -22, -43, -16, -35, -42, -22, 96, -7, 0, -42, 6, 23, -41, -21, 17, -32, 20, -24, -12, 5, -8, 56, -16, 0, -35, -16, -3, -53, -21, 3, -41, 35, -35, -29, -4, -21, 17, 71, 16, 0, -2, 0, -3, -45, -1, -8, -33, -4, -36, -23, 11, 0, -2, -9, 44, 5, 0, -14, -6, -8, -36, -24, -8, -12, -2, -24, -11, 3, -4, -4, -9, 23, 50, 1, 0, -6, -49, -30, -8, -52, -35, 40, -30, 17, 14, -38, -32, -27, -34, -20, 0, 53, -55, 0, -21, -78, -64, 32, -55, -19, -34, -54, -20, -22, -55, -74, -40, -24, -47, -54, -45, 158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -37, 0, -13, -42, -44, 56, -60, 27, -20, -35, -11, -13, -22, -48, -29, -29, -28, -32, -24, 38, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short gon250mt[]={ 24, 0, 0, 5, 0, 115, -3, 0, -32, 47, 0, 0, -30, 27, 36, -23, 0, -8, -45, -39, 70, 5, 0, -20, 1, -8, -52, 66, -8, 0, -13, 4, 4, -1, -14, 60, -8, 0, -11, -38, -27, 10, -45, -22, 40, -4, 0, -28, 5, 12, -33, -11, 6, -21, 32, -12, 0, -15, -40, -28, 20, -44, -19, 28, -21, 40, -7, 0, -9, -30, -20, 16, -35, -13, 25, -14, 28, 43, -3, 0, -18, 22, 9, -31, 4, 12, -28, 8, -30, -22, 38, 3, 0, -31, -7, -5, -38, -16, -11, -26, -6, -23, -24, -9, 76, -2, 0, -24, 9, 17, -26, -10, 12, -19, 15, -16, -10, 7, -2, 27, -6, 0, -22, -3, 4, -32, -10, 6, -24, 27, -22, -17, 3, -9, 15, 47, 11, 0, 1, 5, 2, -28, 4, -2, -18, 1, -21, -14, 9, 4, 2, -2, 22, 6, 0, -5, 0, -1, -22, -11, -3, -6, 1, -13, -6, 5, 1, 0, -2, 15, 25, 1, 0, 0, -29, -19, 1, -33, -20, 31, -17, 18, 16, -22, -18, -15, -20, -10, 0, 34, -36, 0, -10, -52, -43, 36, -40, -8, -18, -35, -7, -10, -36, -50, -27, -16, -33, -35, -26, 142, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -22, 0, -5, -28, -27, 51, -40, 22, -7, -21, 0, -2, -14, -31, -17, -18, -19, -19, -11, 41, 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short gon300mt[]={ 16, 0, 0, 5, 0, 104, -1, 0, -24, 37, 1, 0, -23, 23, 27, -18, 0, -5, -37, -31, 60, 5, 0, -15, 3, -4, -42, 58, -6, 0, -10, 5, 4, 0, -10, 45, -6, 0, -7, -30, -21, 11, -36, -16, 33, -2, 0, -21, 6, 11, -26, -7, 5, -17, 24, -9, 0, -10, -32, -22, 19, -36, -14, 25, -17, 33, -5, 0, -6, -24, -16, 15, -28, -10, 22, -11, 24, 31, -1, 0, -14, 18, 9, -25, 5, 10, -22, 8, -24, -17, 27, 3, 0, -23, -4, -2, -30, -11, -8, -20, -3, -18, -19, -6, 66, -1, 0, -18, 9, 14, -20, -6, 9, -15, 13, -13, -8, 7, -1, 18, -4, 0, -17, 0, 5, -25, -6, 6, -19, 22, -18, -13, 4, -6, 13, 37, 8, 0, 1, 5, 3, -22, 4, -1, -14, 2, -17, -11, 7, 4, 2, 0, 15, 5, 0, -3, 1, 1, -17, -7, -1, -4, 2, -9, -5, 4, 2, 1, -1, 11, 17, 0, 0, 1, -23, -15, 4, -26, -15, 26, -13, 17, 15, -17, -14, -12, -15, -8, 0, 26, -29, 0, -7, -42, -36, 36, -34, -5, -13, -28, -4, -6, -30, -41, -23, -14, -27, -28, -19, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17, 0, -3, -22, -22, 46, -33, 18, -3, -17, 3, 1, -12, -25, -14, -14, -15, -15, -7, 40, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short gon350mt[]={ 10, 0, 0, 4, 0, 93, 0, 0, -19, 29, 1, 0, -17, 19, 20, -14, 0, -3, -30, -25, 51, 5, 0, -12, 4, -2, -35, 51, -4, 0, -8, 5, 4, 1, -7, 33, -4, 0, -5, -24, -17, 11, -29, -13, 27, -1, 0, -16, 6, 9, -21, -4, 5, -13, 18, -7, 0, -7, -25, -18, 18, -30, -11, 22, -14, 28, -4, 0, -4, -19, -13, 14, -23, -8, 19, -9, 21, 23, 0, 0, -11, 15, 9, -20, 5, 8, -18, 7, -19, -14, 20, 3, 0, -18, -2, 0, -25, -7, -5, -16, -2, -15, -14, -3, 56, 0, 0, -14, 8, 11, -16, -4, 7, -11, 10, -11, -7, 6, 0, 12, -2, 0, -13, 2, 6, -20, -4, 6, -15, 18, -14, -11, 4, -4, 10, 28, 6, 0, 1, 5, 3, -18, 5, 0, -11, 2, -13, -9, 6, 4, 2, 1, 10, 4, 0, -2, 2, 1, -13, -5, -1, -3, 2, -7, -4, 4, 2, 1, 0, 8, 11, 0, 0, 2, -18, -12, 5, -21, -11, 22, -10, 16, 14, -13, -11, -9, -12, -6, 0, 21, -24, 0, -4, -35, -29, 35, -30, -3, -9, -23, -1, -3, -24, -34, -19, -12, -22, -23, -14, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 0, -1, -18, -17, 42, -27, 15, -1, -14, 5, 2, -10, -20, -11, -12, -12, -12, -4, 39, 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short clustalvdnamt[]={ 10, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; short swgapdnamt[]={ 10, -9, 10, -9, 10, 10, 10, 10, -9, 10, -9, 10, -9, 10, 10, 10, 10, 10, 10, -9, 10, -9, 10, -9, 10, 10, 10, 10, 10, 10, 10, 10, -9, 10, -9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -9, 10, 10, 10, 10, 10, 10, 10, -9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -9, 10, -9, 10, -9, 10, 10, -9, 10, -9, -9, 10, -9, 10, -9, 10, -9, 10, 10, -9, 10, -9, -9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -9, -9, 10, 10, 10, -9, 10, -9, 10, 10, 10, 10, 10, -9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -9, 10, 10, 10, -9, 10, 10, 10, 10, -9, 10, 10, 10, 10, 10, 10, 10}; } #endif clustalx-2.1/clustalW/substitutionMatrix/globalmatrix.h0000644000175000017500000000065411470725216023420 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef GLOBALMATRIX_H #define GLOBALMATRIX_H #include "SubMatrix.h" namespace clustalw { // Mark Dec 13 2005 /* * The reason for having a global sub matrix object is that we have * user defined matrices that can be read in at any time and then used later in the * MSA stage. */ extern SubMatrix *subMatrix; } #endif clustalx-2.1/clustalW/substitutionMatrix/SubMatrix.h0000644000175000017500000001470711470725216022655 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This is the interface class to all the substitution matrices. * It provides the matrices in a form that the rest of the program can use. * It is also used to store the user defined matrix. This will be used mainly as an interface * to the matrices defined in matrices.h. * The way this class will work is the user can read in matrix series or a single matrix, * or they can select one of the matrix series (e.g Blosum). This will then be used in the * alignment stages. There are separate matrices for amino acid pairwise and progressive, * and for DNA alignments both pairwise and progressive. * It is possible to have a series of matrices that are user defined for amino acid * progressive ONLY!! * A single matrix is choosen for pairwise and for DNA alignments. * This class does 3 jobs. It reads in matrices from files/arrays, it provides * matrices in the formats that are used in the alignment stage, it allows users to select * which matrices they would like to use. */ #ifndef SUBMATRIX_H #define SUBMATRIX_H #include #include #include "../general/clustalw.h" #include "../general/userparams.h" #include "../general/utils.h" #include "../general/Array2D.h" namespace clustalw { using namespace std; typedef vector Xref; typedef vector Matrix; class SubMatrix { public: /* Functions */ SubMatrix(); ~SubMatrix(); bool getUserMatFromFile(char *str, int alignResidueType, int alignType); bool getAAScoreMatFromFile(char *str); bool getDNAScoreMatFromFile(char *str); bool getQTLowScoreMatFromFile(char *fileName, bool dna); bool getUserMatSeriesFromFile(char *str); void setCurrentNameAndNum(string _matrixName, int _matrixNum, int alignResidueType, int alignType); int getMatrixNumForMenu(int alignResidueType, int alignType); int getPairwiseMatrix(int matrix[NUMRES][NUMRES], PairScaleValues& scale, int& matAvg); int getProfileAlignMatrix(int matrix[NUMRES][NUMRES], double pcid, int minLen, PrfScaleValues& scaleParam, int& matAvg); int getAlnScoreMatrix(int matrix[NUMRES][NUMRES]); // Access functions for the interactive menu. int getMatrixNum(); int getDNAMatrixNum(); int getPWMatrixNum(); int getPWDNAMatrixNum(); void getQTMatrixForHistogram(int matrix[NUMRES][NUMRES]); // NOTE Qt int getQTAAHistMatNum(){return QTAAHistMatNum;}; int getQTDNAHistMatNum(){return QTDNAHistMatNum;}; void setQTAAHistMatNum(int num){QTAAHistMatNum = num;}; void setQTDNAHistMatNum(int num){QTDNAHistMatNum = num;}; void getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES]); int getQTsegmentDNAMatNum(){return QTsegmentDNAMatNum;} void setQTsegmentDNAMatNum(int dnaMat){QTsegmentDNAMatNum = dnaMat;} int getQTsegmentAAMatNum(){return QTsegmentAAMatNum;} void setQTsegmentAAMatNum(int aaMat){QTsegmentAAMatNum = aaMat;} void tempInterface(int alignResidueType, int alignType); void setValuesToDefault(); /* Attributes */ private: /* Functions */ int getMatrix(Matrix* matPtr, Xref* xref, int matrix[NUMRES][NUMRES], bool negFlag, int scale, bool minimise = false); int readMatrixSeries(const char *fileName, Matrix& userMat, Xref& xref); int readUserMatrix(const char *fileName, Matrix& userMat, Xref& xref); int getArgs(char *inline1, char *args[], int max); void setUpCrossReferences(); bool commentline(char* line); // The functions below are purely for testing purposes. void printGetMatrixResults(int mat[NUMRES][NUMRES]); void compareMatrices(int mat1[NUMRES][NUMRES], int mat2[NUMRES][NUMRES]); void printInFormat(vector& temp, char* name = "tempfile.out"); void printVectorToFile(vector& temp, char* name = "tempfile.out"); Matrix* getUserMatAddress(int alignResidueType, int alignType); Xref* getUserXrefAddress(int alignResidueType, int alignType); void checkResidueAndAlignType(int alignResidueType, int alignType); /* Attributes */ bool userSeries; int matrixNum; int DNAMatrixNum; int pwMatrixNum; int pwDNAMatrixNum; string* matrixName; string* DNAMatrixName; string* pwMatrixName; string* pwDNAMatrixName; // Matrix cross references. Xref defaultDNAXref; Xref defaultAAXref; Xref DNAXref; // User defined dna xref Xref AAXref; Xref pwAAXref; // pairwise Xref pwDNAXref; Xref QTscoreXref; Xref QTscoreDNAXref; Xref QTsegmentDNAXref; Xref QTsegmentAAXref; vector AAXrefseries; vector userMatSeries; Matrix userMat; Matrix pwUserMat; Matrix userDNAMat; Matrix pwUserDNAMat; Matrix QTscoreUserMatrix; Matrix QTscoreUserDNAMatrix; Matrix QTsegmentDNAMatrix; Matrix QTsegmentAAMatrix; /* These are vectors to store the matrices defined in matrices.h */ const int sizenAAMatrix; const int sizeDNAMatrix; Matrix* blosum30mtVec; Matrix* blosum40mtVec; Matrix* blosum45mtVec; Matrix* blosum62mt2Vec; Matrix* blosum80mtVec; Matrix* pam20mtVec; Matrix* pam60mtVec; Matrix* pam120mtVec; Matrix* pam350mtVec; Matrix* idmatVec; Matrix* gon40mtVec; Matrix* gon80mtVec; Matrix* gon120mtVec; Matrix* gon160mtVec; Matrix* gon250mtVec; Matrix* gon350mtVec; Matrix* clustalvdnamtVec; Matrix* swgapdnamtVec; int matrixAvgScore; // NOTE Needed by other classes. UserMatrixSeries matSeries; string line2; int QTDNAHistMatNum; int QTAAHistMatNum; int QTsegmentDNAMatNum; int QTsegmentAAMatNum; // Temp, to hold current selection Matrix* mat; Xref* xref; Matrix* _matPtr; Xref* _matXref; }; } #endif clustalx-2.1/clustalW/substitutionMatrix/SubMatrix.cpp0000644000175000017500000014521311470725216023205 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include #include "SubMatrix.h" #include "matrices.h" #include "../general/InvalidCombination.cpp" #include "../general/debuglogObject.h" namespace clustalw { using namespace std; /** * * @param log */ SubMatrix::SubMatrix() : sizenAAMatrix(276), sizeDNAMatrix(153), matrixAvgScore(0), QTDNAHistMatNum(DNAIUB), QTAAHistMatNum(AAHISTGONNETPAM250), QTsegmentDNAMatNum(DNAIUB), QTsegmentAAMatNum(QTAASEGGONNETPAM250) { userSeries = false; setUpCrossReferences(); #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Creating the SubMatrix object\n"); } #endif try { /* Set up the vectors with the matrices defined in matrices.h * The matrices are intially defined as a short array, as there is no way * to intiailize a vector with a {....} list. */ blosum30mtVec = new Matrix(blosum30mt, blosum30mt + sizenAAMatrix); blosum40mtVec = new Matrix(blosum40mt, blosum40mt + sizenAAMatrix); blosum45mtVec = new Matrix(blosum45mt, blosum45mt + sizenAAMatrix); blosum62mt2Vec = new Matrix(blosum62mt2, blosum62mt2 + sizenAAMatrix); blosum80mtVec = new Matrix(blosum80mt, blosum80mt + sizenAAMatrix); pam20mtVec = new Matrix(pam20mt, pam20mt + sizenAAMatrix); pam60mtVec = new Matrix(pam60mt, pam60mt + sizenAAMatrix); pam120mtVec = new Matrix(pam120mt, pam120mt + sizenAAMatrix); pam350mtVec = new Matrix(pam350mt, pam350mt + sizenAAMatrix); idmatVec = new Matrix(idmat, idmat + sizenAAMatrix); gon40mtVec = new Matrix(gon40mt, gon40mt + sizenAAMatrix); gon80mtVec = new Matrix(gon80mt, gon80mt + sizenAAMatrix); gon120mtVec = new Matrix(gon120mt, gon120mt + sizenAAMatrix); gon160mtVec = new Matrix(gon160mt, gon160mt + sizenAAMatrix); gon250mtVec = new Matrix(gon250mt, gon250mt + sizenAAMatrix); gon350mtVec = new Matrix(gon350mt, gon350mt + sizenAAMatrix); clustalvdnamtVec = new Matrix(clustalvdnamt, clustalvdnamt + sizeDNAMatrix); swgapdnamtVec = new Matrix(swgapdnamt, swgapdnamt + sizeDNAMatrix); /* * Set up the vectors for user defined types. * Probably dont need to do this, as they may not be used. It would be better * to initialise their size if thye are used. */ userMat.resize(NUMRES * NUMRES); pwUserMat.resize(NUMRES * NUMRES); userDNAMat.resize(NUMRES * NUMRES); pwUserDNAMat.resize(NUMRES * NUMRES); QTscoreUserMatrix.resize(NUMRES * NUMRES); QTscoreUserDNAMatrix.resize(NUMRES * NUMRES); QTsegmentDNAMatrix.resize(NUMRES * NUMRES); QTsegmentAAMatrix.resize(NUMRES * NUMRES); userMatSeries.resize(MAXMAT); // Maximum number of matrices vector::iterator firstM = userMatSeries.begin(); vector::iterator lastM = userMatSeries.end(); while(firstM != lastM) { firstM->resize(NUMRES * NUMRES); firstM++; } // Maybe I should put this in with the other xref intialisations! AAXrefseries.resize(MAXMAT); vector::iterator firstX = AAXrefseries.begin(); vector::iterator lastX = AAXrefseries.end(); while(firstX != lastX) { firstX->resize(NUMRES + 1); firstX++; } // Set the defaults. matrixNum = 3; matrixName = new string("gonnet"); DNAMatrixNum = 1; DNAMatrixName = new string("iub"); pwMatrixNum = 3; pwMatrixName = new string("gonnet"); pwDNAMatrixNum = 1; pwDNAMatrixName = new string("iub"); } catch(const exception &ex) { cerr << ex.what() << endl; cerr << "Terminating program. Cannot continue\n"; exit(1); } } void SubMatrix::setValuesToDefault() { matrixAvgScore = 0; QTDNAHistMatNum = DNAIUB; QTAAHistMatNum = AAHISTGONNETPAM250; QTsegmentDNAMatNum = DNAIUB; QTsegmentAAMatNum = QTAASEGGONNETPAM250; userSeries = false; matrixNum = 3; DNAMatrixNum = 1; pwMatrixNum = 3; pwDNAMatrixNum = 1; } /** * The destructor frees up any dynamically allocated memory. */ SubMatrix::~SubMatrix() { delete blosum30mtVec; delete blosum40mtVec; delete blosum45mtVec; delete blosum62mt2Vec; delete blosum80mtVec; delete pam20mtVec; delete pam60mtVec; delete pam120mtVec; delete pam350mtVec; delete idmatVec; delete gon40mtVec; delete gon80mtVec; delete gon120mtVec; delete gon160mtVec; delete gon250mtVec; delete gon350mtVec; delete clustalvdnamtVec; delete swgapdnamtVec; delete matrixName; delete DNAMatrixName; delete pwMatrixName; delete pwDNAMatrixName; } /** * This function sets up the initial cross references. */ void SubMatrix::setUpCrossReferences() { char c1, c2; short i, j, maxRes; defaultAAXref.resize(NUMRES + 1); defaultDNAXref.resize(NUMRES + 1); string aminoAcidOrder = "ABCDEFGHIKLMNPQRSTVWXYZ"; string nucleicAcidOrder = "ABCDGHKMNRSTUVWXY"; /* * I also need to resize the user defined xrefs. */ DNAXref.resize(NUMRES + 1); AAXref.resize(NUMRES + 1); pwAAXref.resize(NUMRES + 1); pwDNAXref.resize(NUMRES + 1); QTscoreXref.resize(NUMRES + 1); QTscoreDNAXref.resize(NUMRES + 1); QTsegmentDNAXref.resize(NUMRES + 1); QTsegmentAAXref.resize(NUMRES + 1); /* * set up cross-reference for default matrices hard-coded in matrices.h */ for (i = 0; i < NUMRES; i++) { defaultAAXref[i] = -1; } for (i = 0; i < NUMRES; i++) { defaultDNAXref[i] = -1; } maxRes = 0; for (i = 0; (c1 = aminoAcidOrder[i]); i++) { for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++) { if (c1 == c2) { defaultAAXref[i] = j; maxRes++; break; } } if ((defaultAAXref[i] == - 1) && (aminoAcidOrder[i] != '*')) { utilityObject->error("residue %c in matrices.h is not recognised", aminoAcidOrder[i]); } } maxRes = 0; for (i = 0; (c1 = nucleicAcidOrder[i]); i++) { for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++) { if (c1 == c2) { defaultDNAXref[i] = j; maxRes++; break; } } if ((defaultDNAXref[i] == - 1) && (nucleicAcidOrder[i] != '*')) { utilityObject->error("nucleic acid %c in matrices.h is not recognised", nucleicAcidOrder[i]); } } } /** * The function getPairwiseMatrix is called by the user to get the correct sub matrix for the * pairwise alignment stage. It calls getMatrix to actually calculate the matrix. * This function provides an interface for the user. It allows the user to get the matrix * they wish to use for the pairwise alignment part. * @param matrix[][] * @param scale * @param matAvg * @return */ int SubMatrix::getPairwiseMatrix(int matrix[NUMRES][NUMRES], PairScaleValues& scale, int& matAvg) { int _maxRes; // Local copy. /* Pointers to Matrix and xref to use in calculation */ Matrix* _matPtr; Xref* _matXref; #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("In the function getPairwiseMatrix: \n"); } #endif string matrixPointer; string xrefPointer; #ifdef OS_MAC scale.intScale = 10; #else scale.intScale = 100; #endif scale.gapOpenScale = scale.gapExtendScale = 1.0; if (userParameters->getDNAFlag()) { #if DEBUGFULL if(logObject && DEBUGLOG) { string msg = " (DNA AND Pairwise) " + *pwDNAMatrixName + "\n"; logObject->logMsg(msg); } #endif if (pwDNAMatrixName->compare("iub") == 0) { matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref"; _matPtr = swgapdnamtVec; _matXref = &defaultDNAXref; } else if (pwDNAMatrixName->compare("clustalw") == 0) { matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref"; _matPtr = clustalvdnamtVec; _matXref = &defaultDNAXref; scale.gapOpenScale = 0.6667; scale.gapExtendScale = 0.751; } else { matrixPointer = "pwUserDNAMat"; xrefPointer = "pwDNAXref"; _matPtr = &pwUserDNAMat; _matXref = &pwDNAXref; } _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale); if (_maxRes == 0) { return ((int) -1); } float _transitionWeight = userParameters->getTransitionWeight(); // Mark change 17-5-07 matrix[0][4] = static_cast(_transitionWeight * matrix[0][0]); matrix[4][0] = static_cast(_transitionWeight * matrix[0][0]); matrix[2][11] = static_cast(_transitionWeight * matrix[0][0]); matrix[11][2] = static_cast(_transitionWeight * matrix[0][0]); matrix[2][12] = static_cast(_transitionWeight * matrix[0][0]); matrix[12][2] = static_cast(_transitionWeight * matrix[0][0]); } else { #if DEBUGFULL if(logObject && DEBUGLOG) { string msg = " (Protein AND Pairwise) " + *pwMatrixName + "\n"; logObject->logMsg(msg); } #endif if (pwMatrixName->compare("blosum") == 0) { matrixPointer = "blosum30mtVec"; xrefPointer = "defaultAAXref"; _matPtr = blosum30mtVec; _matXref = &defaultAAXref; } else if (pwMatrixName->compare("pam") == 0) { matrixPointer = "pam350mtVec"; xrefPointer = "defaultAAXref"; _matPtr = pam350mtVec; _matXref = &defaultAAXref; } else if (pwMatrixName->compare("gonnet") == 0) { matrixPointer = "gon250mtVec"; xrefPointer = "defaultAAXref"; _matPtr = gon250mtVec; scale.intScale /= 10; _matXref = &defaultAAXref; } else if (pwMatrixName->compare("id") == 0) { matrixPointer = "idmatVec"; xrefPointer = "defaultAAXref"; _matPtr = idmatVec; _matXref = &defaultAAXref; } else { matrixPointer = "pwUserMat"; xrefPointer = "pwAAXref"; _matPtr = &pwUserMat; _matXref = &pwAAXref; } _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale); if (_maxRes == 0) { return ((int) -1); } } #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " Called getMatrix with " << matrixPointer << " and " << xrefPointer << ".\n" << " intScale = " << scale.intScale << ", gapOpenScale = " << scale.gapOpenScale << ", gapExtendScale = " << scale.gapExtendScale << "\n\n"; logObject->logMsg(outs.str()); } #endif matAvg = matrixAvgScore; return _maxRes; } /** * The function getProfileAlignMatrix provides an interface for the user to get * the matrix to be used in the profile alignment. This depends on the matrix series * that was chosen, and also on the percent identity. * @param matrix[][] * @param pcid * @param minLen * @param scaleParam * @param matAvg * @return */ int SubMatrix::getProfileAlignMatrix(int matrix[NUMRES][NUMRES], double pcid, int minLen, PrfScaleValues& scaleParam, int& matAvg) { bool found = false; bool errorGiven = false; bool _negMatrix = userParameters->getUseNegMatrix(); int i = 0, j = 0; int _maxRes = 0; scaleParam.intScale = 100; string matrixPointer; string xrefPointer; #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("In the function getProfileAlignMatrix: \n"); } #endif if (userParameters->getDNAFlag()) { #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " (DNA AND Multiple align) "<< DNAMatrixName->c_str() << "\n"; logObject->logMsg(outs.str()); } #endif scaleParam.scale = 1.0; if (DNAMatrixName->compare("iub") == 0) { _matPtr = swgapdnamtVec; _matXref = &defaultDNAXref; matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref"; } else if (DNAMatrixName->compare("clustalw") == 0) { _matPtr = clustalvdnamtVec; _matXref = &defaultDNAXref; scaleParam.scale = 0.66; matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref"; } else { _matPtr = &userDNAMat; _matXref = &DNAXref; matrixPointer = "userDNAMat"; xrefPointer = "DNAXref"; } _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix, static_cast(scaleParam.intScale)); // Mark change 17-5-07 if (_maxRes == 0) { return ((int) - 1); } float _transitionWeight = userParameters->getTransitionWeight(); // fix suggested by Chanan Rubin at Compugen matrix[(*_matXref)[0]][(*_matXref)[4]] = static_cast(_transitionWeight * matrix[0][0]); matrix[(*_matXref)[4]][(*_matXref)[0]] = static_cast(_transitionWeight * matrix[0][0]); matrix[(*_matXref)[2]][(*_matXref)[11]] = static_cast(_transitionWeight * matrix[0][0]); matrix[(*_matXref)[11]][(*_matXref)[2]] = static_cast(_transitionWeight * matrix[0][0]); matrix[(*_matXref)[2]][(*_matXref)[12]] = static_cast(_transitionWeight * matrix[0][0]); matrix[(*_matXref)[12]][(*_matXref)[2]] = static_cast(_transitionWeight * matrix[0][0]); } else // Amino acid alignment!!!! { #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " (Protein AND Multiple align) "<< matrixName->c_str() << "\n"; logObject->logMsg(outs.str()); } #endif scaleParam.scale = 0.75; if (matrixName->compare("blosum") == 0) { scaleParam.scale = 0.75; if (_negMatrix || userParameters->getDistanceTree() == false) { _matPtr = blosum40mtVec; matrixPointer = "blosum40mtVec"; } else if (pcid > 80.0) { _matPtr = blosum80mtVec; matrixPointer = "blosum80mtVec"; } else if (pcid > 60.0) { _matPtr = blosum62mt2Vec; matrixPointer = "blosum62mt2Vec"; } else if (pcid > 40.0) { _matPtr = blosum45mtVec; matrixPointer = "blosum45mtVec"; } else if (pcid > 30.0) { scaleParam.scale = 0.5; _matPtr = blosum45mtVec; matrixPointer = "blosum45mtVec"; } else if (pcid > 20.0) { scaleParam.scale = 0.6; _matPtr = blosum45mtVec; matrixPointer = "blosum45mtVec"; } else { scaleParam.scale = 0.6; _matPtr = blosum30mtVec; matrixPointer = "blosum30mtVec"; } _matXref = &defaultAAXref; xrefPointer = "defaultAAXref"; } else if (matrixName->compare("pam") == 0) { scaleParam.scale = 0.75; if (_negMatrix || userParameters->getDistanceTree() == false) { _matPtr = pam120mtVec; matrixPointer = "pam120mtVec"; } else if (pcid > 80.0) { _matPtr = pam20mtVec; matrixPointer = "pam20mtVec"; } else if (pcid > 60.0) { _matPtr = pam60mtVec; matrixPointer = "pam60mtVec"; } else if (pcid > 40.0) { _matPtr = pam120mtVec; matrixPointer = "pam120mtVec"; } else { _matPtr = pam350mtVec; matrixPointer = "pam350mtVec"; } _matXref = &defaultAAXref; xrefPointer = "defaultAAXref"; } else if (matrixName->compare("gonnet") == 0) { scaleParam.scale /= 2.0; if (_negMatrix || userParameters->getDistanceTree() == false) { _matPtr = gon250mtVec; matrixPointer = "gon250mtVec"; } else if (pcid > 35.0) { _matPtr = gon80mtVec; scaleParam.scale /= 2.0; matrixPointer = "gon80mtVec"; } else if (pcid > 25.0) { if (minLen < 100) { _matPtr = gon250mtVec; matrixPointer = "gon250mtVec"; } else { _matPtr = gon120mtVec; matrixPointer = "gon120mtVec"; } } else { if (minLen < 100) { _matPtr = gon350mtVec; matrixPointer = "gon350mtVec"; } else { _matPtr = gon160mtVec; matrixPointer = "gon160mtVec"; } } _matXref = &defaultAAXref; xrefPointer = "defaultAAXref"; scaleParam.intScale /= 10; } else if (matrixName->compare("id") == 0) { _matPtr = idmatVec; _matXref = &defaultAAXref; xrefPointer = "defaultAAXref"; matrixPointer = "idmatVec"; } else if (userSeries) { _matPtr = NULL; found = false; for (i = 0; i < matSeries.nmat; i++) { if (pcid >= matSeries.mat[i].llimit && pcid <= matSeries.mat[i].ulimit) { j = i; found = true; break; } } if (found == false) { if (!errorGiven) { utilityObject->warning( "\nSeries matrix not found for sequence percent identity = %d.\n""(Using first matrix in series as a default.)\n""This alignment may not be optimal!\n""SUGGESTION: Check your matrix series input file and try again.", (int)pcid); } errorGiven = true; j = 0; } _matPtr = matSeries.mat[j].matptr; _matXref = matSeries.mat[j].AAXref; // this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit scaleParam.scale = 0.5 + (pcid - matSeries.mat[j].llimit) / ( (matSeries.mat[j].ulimit - matSeries.mat[j].llimit) *2.0); xrefPointer = "matSeries.mat[j].AAXref"; matrixPointer = "matSeries.mat[j].matptr"; } else { _matPtr = &userMat; _matXref = &AAXref; xrefPointer = "AAXref"; matrixPointer = "userMat"; } _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix, static_cast(scaleParam.intScale)); if (_maxRes == 0) { cerr << "Error: matrix " << matrixName << " not found\n"; return ( - 1); } } #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " Called getMatrix with " << matrixPointer << " and " << xrefPointer << ".\n" << " intScale = " << scaleParam.intScale << ", scale = " << scaleParam.scale << ", pcid = " << pcid << "\n\n"; logObject->logMsg(outs.str()); } #endif matAvg = matrixAvgScore; return _maxRes; } /** * The function getMatrix is what the other parts of the code call to get a useable * substitution matrix. This is stored in matrix[NUMRES][NUMRES]. * @param matptr * @param xref * @param matrix[][] * @param negFlag * @param scale * @return */ int SubMatrix::getMatrix(Matrix* matptr, Xref* xref, int matrix[NUMRES][NUMRES], bool negFlag, int scale, bool minimise) { int ggScore = 0; int grScore = 0; int i, j, k, ix = 0; int ti, tj; int maxRes; int av1, av2, av3, min, max; for (i = 0; i < NUMRES; i++) { for (j = 0; j < NUMRES; j++) { matrix[i][j] = 0; } } ix = 0; maxRes = 0; for (i = 0; i <= userParameters->getMaxAA(); i++) { ti = (*xref)[i]; for (j = 0; j <= i; j++) { tj = (*xref)[j]; if ((ti != - 1) && (tj != - 1)) { k = (*matptr)[ix]; if (ti == tj) { matrix[ti][ti] = k * scale; maxRes++; } else { matrix[ti][tj] = k * scale; matrix[tj][ti] = k * scale; } ix++; } } } --maxRes; av1 = av2 = av3 = 0; for (i = 0; i <= userParameters->getMaxAA(); i++) { for (j = 0; j <= i; j++) { av1 += matrix[i][j]; if (i == j) { av2 += matrix[i][j]; } else { av3 += matrix[i][j]; } } } av1 /= (maxRes * maxRes) / 2; av2 /= maxRes; av3 = static_cast(av3 / (((float)(maxRes * maxRes - maxRes)) / 2)); matrixAvgScore = - av3; min = max = matrix[0][0]; for (i = 0; i <= userParameters->getMaxAA(); i++) for (j = 1; j <= i; j++) { if (matrix[i][j] < min) { min = matrix[i][j]; } if (matrix[i][j] > max) { max = matrix[i][j]; } } //cout << "MAX = " << max << "\n"; if(!minimise) { /* if requested, make a positive matrix - add -(lowest score) to every entry */ if (negFlag == false) { if (min < 0) { for (i = 0; i <= userParameters->getMaxAA(); i++) { ti = (*xref)[i]; if (ti != - 1) { for (j = 0; j <= userParameters->getMaxAA(); j++) { tj = (*xref)[j]; if (tj != - 1) { matrix[ti][tj] -= min; } } } } } } // local copies of the gap positions int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); for (i = 0; i < userParameters->getGapPos1(); i++) { matrix[i][_gapPos1] = grScore; matrix[_gapPos1][i] = grScore; matrix[i][_gapPos2] = grScore; matrix[_gapPos2][i] = grScore; } matrix[_gapPos1][_gapPos1] = ggScore; matrix[_gapPos2][_gapPos2] = ggScore; matrix[_gapPos2][_gapPos1] = ggScore; matrix[_gapPos1][_gapPos2] = ggScore; } else { // DO THE SAGA MATRIX for (i = 0; i <= userParameters->getMaxAA(); i++) { for (j = 0; j <= userParameters->getMaxAA(); j++) { matrix[i][j] = max - matrix[i][j]; } } } maxRes += 2; return (maxRes); } /** * The function getUserMatFromFile is used to read in a user defined matrix. * @param str * @param alignResidueType * @param alignType * @return */ bool SubMatrix::getUserMatFromFile(char *str, int alignResidueType, int alignType) { int maxRes; FILE *infile; // Need to check if the values are a valid combination! checkResidueAndAlignType(alignResidueType, alignType); if(userParameters->getMenuFlag()) { utilityObject->getStr(string("Enter name of the matrix file"), line2); } else { line2 = string(str); } if(line2.size() == 0) { return false; } if((infile = fopen(line2.c_str(), "r")) == NULL) { utilityObject->error("Cannot find matrix file [%s]", line2.c_str()); return false; } strcpy(str, line2.c_str()); // Find out which part of the code we are reading the matrix in for. mat = getUserMatAddress(alignResidueType, alignType); xref = getUserXrefAddress(alignResidueType, alignType); if ((alignResidueType == Protein) && (alignType == MultipleAlign)) { // Try read in a matrix series. maxRes = readMatrixSeries(str, userMat, AAXref); } else // Read in a single matrix!!! { maxRes = readUserMatrix(str, *mat, *xref); } if (maxRes <= 0) return false; return true; } /** * The function compareMatrices is used to compare 2 matrices that have been read in. * It will compare them in a given region. It will not compare all of them, as some of it * will be random memory. * @param mat1[][] * @param mat2[][] */ void SubMatrix::compareMatrices(int mat1[NUMRES][NUMRES], int mat2[NUMRES][NUMRES]) { int same = 1; for(int row = 0; row < NUMRES; row++) { for(int col = 0; col < NUMRES; col++) { if(mat1[row][col] != mat2[row][col]) { same = 0; cout << "The row is " << row << ". The column is " << col << endl; break; // It is not the same. End the loop. } } } if(same == 0) { cout << "It was not the same\n"; } else { cout << "It is the same\n"; } } /** * This function is simply to display the results of the getMatrix function. * This is so that I can compare it to the original clustal version of it. * @param mat[][] */ void SubMatrix::printGetMatrixResults(int mat[NUMRES][NUMRES]) { ofstream outfile("getmatrix.out"); if(!outfile) cerr<<"oops failed to open !!!\n"; for(int row = 0; row < NUMRES; row++) { for(int col = 0; col < NUMRES; col++) { if((mat[row][col] > 9) || (mat[row][col] < 0)) { outfile <<" "<< mat[row][col]<<","; } else { outfile <<" "<< mat[row][col]<<","; } } outfile<<"\n"; } } /** * This function is from the old interface. It simply calls the readMatrixSeries * function. It seems to only be called from the commandline part. * @param str * @return */ bool SubMatrix::getUserMatSeriesFromFile(char *str) { int maxRes; FILE *infile; if(userParameters->getMenuFlag()) // Check if we are using menu! { utilityObject->getStr(string("Enter name of the matrix file"), line2); } else { //strcpy(lin2,str); line2 = string(str); } if(line2.size() == 0) return false; if((infile = fopen(line2.c_str(), "r"))==NULL) { utilityObject->error("Cannot find matrix file [%s]",line2.c_str()); return false; } strcpy(str, line2.c_str()); maxRes = readMatrixSeries(str, userMat, AAXref); if (maxRes <= 0) return false; return true; } /* * The function readMatrixSeries is used to read in a series of matrices from a file. * It calls readUserMatrix to read in the individual matrices. The matrices are stored * in userMatSeries. */ int SubMatrix::readMatrixSeries(const char *fileName, Matrix& userMat, Xref& xref) { FILE *fd = NULL; char mat_fileName[FILENAMELEN]; char inline1[1024]; int maxRes = 0; int nmat; int n, llimit, ulimit; if (fileName[0] == '\0') { utilityObject->error("comparison matrix not specified"); return ((int)0); } if ((fd = fopen(fileName, "r")) == NULL) { utilityObject->error("cannot open %s", fileName); return ((int)0); } /* check the first line to see if it's a series or a single matrix */ while (fgets(inline1, 1024, fd) != NULL) { if (commentline(inline1)) { continue; } if (utilityObject->lineType(inline1, "CLUSTAL_SERIES")) { userSeries = true; } else { userSeries = false; } break; } /* it's a single matrix */ if (userSeries == false) { fclose(fd); maxRes = readUserMatrix(fileName, userMat, xref); return (maxRes); } /* it's a series of matrices, find the next MATRIX line */ nmat = 0; matSeries.nmat = 0; while (fgets(inline1, 1024, fd) != NULL) { if (commentline(inline1)) { continue; } if (utilityObject->lineType(inline1, "MATRIX")) // Have found a matrix { if (sscanf(inline1 + 6, "%d %d %s", &llimit, &ulimit, mat_fileName) != 3) { utilityObject->error("Bad format in file %s\n", fileName); fclose(fd); return ((int)0); } if (llimit < 0 || llimit > 100 || ulimit < 0 || ulimit > 100) { utilityObject->error("Bad format in file %s\n", fileName); fclose(fd); return ((int)0); } if (ulimit <= llimit) { utilityObject->error("in file %s: lower limit is greater than upper (%d-%d)\n", fileName, llimit, ulimit); fclose(fd); return ((int)0); } n = readUserMatrix(mat_fileName, userMatSeries[nmat], AAXrefseries[nmat]); //cout << "Read in matrix number " << nmat << "\n"; // NOTE Testing!!!!! char nameOfFile[] = "matrix"; printInFormat(userMatSeries[nmat], nameOfFile); if (n <= 0) { utilityObject->error("Bad format in matrix file %s\n", mat_fileName); fclose(fd); return ((int)0); } matSeries.mat[nmat].llimit = llimit; matSeries.mat[nmat].ulimit = ulimit; matSeries.mat[nmat].matptr = &userMatSeries[nmat]; matSeries.mat[nmat].AAXref = &AAXrefseries[nmat]; nmat++; if(nmat >= MAXMAT) { // We have read in all the matrices that we can read into this vector // Write a message to the screen, and break out of loop. cerr << "The matrix series file has more entries than allowed in \n" << "a user defined series. The most that are allowed is " << MAXMAT << ".\n" << "The first " << MAXMAT << " have been read in and will be used.\n"; break; // Get out of the loop! } } } fclose(fd); matSeries.nmat = nmat; maxRes = n; return (maxRes); } /* * This function is used to read a single user matrix from a file. * It can be called repeatedly if there are multiple matrices in the file. */ int SubMatrix::readUserMatrix(const char *fileName, Matrix& userMat, Xref& xref) { double f; FILE *fd; int numargs, farg; int i, j, k = 0; char codes[NUMRES]; char inline1[1024]; char *args[NUMRES + 4]; char c1, c2; int ix1, ix = 0; int maxRes = 0; float scale; if (fileName[0] == '\0') { utilityObject->error("comparison matrix not specified"); return ((int)0); } if ((fd = fopen(fileName, "r")) == NULL) { utilityObject->error("cannot open %s", fileName); return ((int)0); } maxRes = 0; while (fgets(inline1, 1024, fd) != NULL) { if (commentline(inline1)) { continue; } if (utilityObject->lineType(inline1, "CLUSTAL_SERIES")) { utilityObject->error("in %s - single matrix expected.", fileName); fclose(fd); return ((int)0); } /* read residue characters. */ k = 0; for (j = 0; j < (int)strlen(inline1); j++) { if (isalpha((int)inline1[j])) { codes[k++] = inline1[j]; } if (k > NUMRES) { utilityObject->error("too many entries in matrix %s", fileName); fclose(fd); return ((int)0); } } codes[k] = '\0'; break; } if (k == 0) { utilityObject->error("wrong format in matrix %s", fileName); fclose(fd); return ((int)0); } /* cross-reference the residues */ for (i = 0; i < NUMRES; i++) { xref[i] = - 1; } maxRes = 0; for (i = 0; (c1 = codes[i]); i++) { for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++) if (c1 == c2) { xref[i] = j; maxRes++; break; } if ((xref[i] == - 1) && (codes[i] != '*')) { utilityObject->warning("residue %c in matrix %s not recognised", codes[i], fileName); } } /* get the weights */ ix = ix1 = 0; while (fgets(inline1, 1024, fd) != NULL) { if (inline1[0] == '\n') { continue; } if (inline1[0] == '#' || inline1[0] == '!') { break; } numargs = getArgs(inline1, args, (int)(k + 1)); if (numargs < maxRes) { utilityObject->error("wrong format in matrix %s", fileName); fclose(fd); return ((int)0); } if (isalpha(args[0][0])) { farg = 1; } else { farg = 0; } /* decide whether the matrix values are float or decimal */ scale = 1.0; for (i = 0; i < (int)strlen(args[farg]); i++) if (args[farg][i] == '.') { /* we've found a float value */ scale = 10.0; break; } for (i = 0; i <= ix; i++) { if (xref[i] != - 1) { f = atof(args[i + farg]); userMat[ix1++] = (short)(f *scale); } } ix++; } if (ix != k + 1) { utilityObject->error("wrong format in matrix %s", fileName); fclose(fd); return ((int)0); } userMat.resize(ix1 + 1); maxRes += 2; fclose(fd); return (maxRes); } /** * * @param inline1 * @param args[] * @param max * @return */ int SubMatrix::getArgs(char *inline1,char *args[],int max) { char *inptr; int i; inptr = inline1; for (i = 0; i <= max; i++) { if ((args[i] = strtok(inptr, " \t\n")) == NULL) { break; } inptr = NULL; } return (i); } /** * * @return */ int SubMatrix::getMatrixNum() { return matrixNum; } /** * * @return */ int SubMatrix::getDNAMatrixNum() { return DNAMatrixNum; } /** * * @return */ int SubMatrix::getPWMatrixNum() { return pwMatrixNum; } /** * * @return */ int SubMatrix::getPWDNAMatrixNum() { return pwDNAMatrixNum; } /** * The function setCurrentNameAndNum is used to select a matrix series. * This will then be used for the alignment. The matrices will change, but the * series remains the same. We can set the series for pairwise/full for both * protein and DNA. NOTE: The default can be set to user defined matrices. * @param _matrixName * @param _matrixNum * @param alignResidueType * @param alignType */ void SubMatrix::setCurrentNameAndNum(string _matrixName, int _matrixNum, int alignResidueType,int alignType) { // Check if the values are valid. checkResidueAndAlignType(alignResidueType, alignType); string residue; string align; if((alignResidueType == Protein) && (alignType == Pairwise)) { residue = "Protein"; align = "Pairwise"; pwMatrixNum = _matrixNum; pwMatrixName = new string(_matrixName); } else if((alignResidueType == Protein) && (alignType == MultipleAlign)) { residue = "Protein"; align = "MultipleAlign"; matrixNum = _matrixNum; matrixName = new string(_matrixName); } else if((alignResidueType == DNA) && (alignType == Pairwise)) { residue = "DNA"; align = "Pairwise"; pwDNAMatrixNum = _matrixNum; pwDNAMatrixName = new string(_matrixName); } else if((alignResidueType == DNA) && (alignType == MultipleAlign)) { residue = "DNA"; align = "MultipleAlign"; DNAMatrixNum = _matrixNum; DNAMatrixName = new string(_matrixName); } #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << "The matrix/matrix series has been changed for " << "(" << residue << " AND " << align << ")." << " New value: " << _matrixName << "\n\n"; logObject->logMsg(outs.str()); } #endif } /** * * @param alignResidueType * @param alignType * @return */ int SubMatrix::getMatrixNumForMenu(int alignResidueType, int alignType) { checkResidueAndAlignType(alignResidueType, alignType); if((alignResidueType == Protein) && (alignType == Pairwise)) { return pwMatrixNum; } else if((alignResidueType == Protein) && (alignType == MultipleAlign)) { return matrixNum; } else if((alignResidueType == DNA) && (alignType == Pairwise)) { return pwDNAMatrixNum; } else if((alignResidueType == DNA) && (alignType == MultipleAlign)) { return DNAMatrixNum; } else return -100; // NOTE NONE of these. I need to put in better error checking } /** * * @param line * @return */ bool SubMatrix::commentline(char* line) { int i; if (line[0] == '#') { return true; } for (i = 0; line[i] != '\n' && line[i] != EOS; i++) { if (!isspace(line[i])) { return false; } } return true; } /** * This function prints out the vector to the file specified by name. * The vector is printed out in a triangular format, the same as the way * the arrays are displayed in matrices.h. This function is for testing purposes. * @param temp * @param name */ void SubMatrix::printInFormat(vector& temp, char* name) { char nameOfFile[30]; strcpy(nameOfFile, name); strcat(nameOfFile, ".out"); ofstream outfile(nameOfFile); if(!outfile) cerr<<"oops failed to open !!!\n"; outfile<<"short "< 9) || (temp[i] < 0)) { outfile <<" "<< temp[i]<<","; } else { outfile <<" "<< temp[i]<<","; } // Now increment so far soFar++; // Check to see if the next element is the last element if((i + 1) == (int)temp.size() - 1) { // Print out the last element. Then a curly brace, and break. if((temp[i+1] > 9) || (temp[i+1] < 0)) { outfile <<" "<< temp[i + 1]<<"};\n"; } else { outfile <<" "<< temp[i + 1]<<"};\n"; } break; } } ofstream outfile2("temp.out"); for(int i = 0; i < (int)temp.size(); i++) { outfile2 << temp[i] << " "; } } /** * * @param temp * @param name */ void SubMatrix::printVectorToFile(vector& temp, char* name) { char nameOfFile[30]; strcpy(nameOfFile, name); strcat(nameOfFile, ".out"); ofstream outfile(nameOfFile); if(!outfile) cerr<<"oops failed to open !!!\n"; for(int i = 0; i < (int)temp.size(); i++) { if((temp[i] > 9) || (temp[i] < 0)) { outfile <<" "<< temp[i]<<","; } else { outfile <<" "<< temp[i]<<","; } } outfile.close(); } /** * * @param alignResidueType * @param alignType * @return */ Matrix* SubMatrix::getUserMatAddress(int alignResidueType, int alignType) { if((alignResidueType == Protein) && (alignType == Pairwise)) { return &pwUserMat; } else if((alignResidueType == Protein) && (alignType == MultipleAlign)) { return &userMat; } else if((alignResidueType == DNA) && (alignType == Pairwise)) { return &pwUserDNAMat; } else if((alignResidueType == DNA) && (alignType == MultipleAlign)) { return &userDNAMat; } return NULL; } /** * * @param alignResidueType * @param alignType * @return */ Xref* SubMatrix::getUserXrefAddress(int alignResidueType, int alignType) { if((alignResidueType == Protein) && (alignType == Pairwise)) { return &pwAAXref; } else if((alignResidueType == Protein) && (alignType == MultipleAlign)) { return &AAXref; } else if((alignResidueType == DNA) && (alignType == Pairwise)) { return &pwDNAXref; } else if((alignResidueType == DNA) && (alignType == MultipleAlign)) { return &DNAXref; } return NULL; } /** * This is an error handling routine. If an incorrect combination of values * is given, it will terminate the program. * @param alignResidueType * @param alignType */ void SubMatrix::checkResidueAndAlignType(int alignResidueType, int alignType) { if(((alignResidueType != 0) && (alignResidueType != 1)) || ((alignType != 0) && (alignType != 1))) { InvalidCombination ex(alignResidueType, alignType); ex.whatHappened(); exit(1); } } /** * The function tempInterface is used to call the SubMatrix in the way it is * supposed to be used. It is for testing purposes. * @param alignResidueType * @param alignType */ void SubMatrix::tempInterface(int alignResidueType, int alignType) { char userFile[FILENAMELEN + 1]; userParameters->setDNAFlag(true); strcpy(userFile, "mat1"); userParameters->setMenuFlag(false); getUserMatFromFile(userFile, DNA, Pairwise); setCurrentNameAndNum(userFile, 4, 3, Pairwise); setCurrentNameAndNum("gonnet", 4, Protein, Pairwise); } /** * A single matrix is used in scoring the alignment. This is Blosum45. This is the * function to get it. * @param matrix[][] * @return */ int SubMatrix::getAlnScoreMatrix(int matrix[NUMRES][NUMRES]) { int _maxNumRes; /* //_maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100); _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, false, 1, true); //_maxNumRes = getMatrix(blosum62mt2Vec, &defaultAAXref, matrix, true, 100); */ // 1.83 style _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100); return _maxNumRes; } /** * This function is used to get the matrix that will be used for the calculation of * the histogram. The histogram values are used in ClustalQt. * @param matrix[][] * @param matNum * @param dnaMatNum */ void SubMatrix::getQTMatrixForHistogram(int matrix[NUMRES][NUMRES]) { Matrix* _matPtrLocal; Xref* _matXrefLocal; int maxRes; if(userParameters->getDNAFlag()) { if (QTDNAHistMatNum == DNAUSERDEFINED) { _matPtrLocal = &QTscoreUserDNAMatrix; _matXrefLocal = &QTscoreDNAXref; } else if (QTDNAHistMatNum == DNACLUSTALW) { _matPtrLocal = clustalvdnamtVec; _matXrefLocal = &defaultDNAXref; } else { _matPtrLocal = swgapdnamtVec; _matXrefLocal = &defaultDNAXref; } } else { if (QTAAHistMatNum == AAHISTIDENTITY) { _matPtrLocal = idmatVec; _matXrefLocal = &defaultAAXref; } else if (QTAAHistMatNum == AAHISTGONNETPAM80) { _matPtrLocal = gon80mtVec; _matXrefLocal = &defaultAAXref; } else if (QTAAHistMatNum == AAHISTGONNETPAM120) { _matPtrLocal = gon120mtVec; _matXrefLocal = &defaultAAXref; } else if (QTAAHistMatNum == AAHISTUSER) { _matPtrLocal = &QTscoreUserMatrix; _matXrefLocal = &QTscoreXref; } else if (QTAAHistMatNum == AAHISTGONNETPAM350) { _matPtrLocal = gon350mtVec; _matXrefLocal = &defaultAAXref; } else // Default { _matPtrLocal = gon250mtVec; _matXrefLocal = &defaultAAXref; } } maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100); } void SubMatrix::getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES]) { Matrix* _matPtrLocal; Xref* _matXrefLocal; int maxRes; int _maxAA = userParameters->getMaxAA(); int max = 0; int offset; if(userParameters->getDNAFlag()) { if (QTsegmentDNAMatNum == DNAUSERDEFINED) { _matPtrLocal = &QTsegmentDNAMatrix; _matXrefLocal = &QTsegmentDNAXref; } else if (QTsegmentDNAMatNum == DNACLUSTALW) { _matPtrLocal = clustalvdnamtVec; _matXrefLocal = &defaultDNAXref; } else { _matPtrLocal = swgapdnamtVec; _matXrefLocal = &defaultDNAXref; } /* get a positive matrix - then adjust it according to scale */ maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100); /* find the maximum value */ for(int i = 0; i <= _maxAA; i++) { for(int j = 0; j <= _maxAA; j++) { if(matrix[i][j] > max) { max = matrix[i][j]; } } } /* subtract max * scale / 2 from each matrix value */ offset = static_cast(static_cast(max * userParameters->getQTlowScoreDNAMarkingScale()) / 20.0); for(int i = 0; i <= _maxAA; i++) { for(int j = 0; j <= _maxAA; j++) { matrix[i][j] -= offset; } } } else { if (QTsegmentAAMatNum == QTAASEGGONNETPAM80) { _matPtrLocal = gon80mtVec; _matXrefLocal = &defaultAAXref; } else if (QTsegmentAAMatNum == QTAASEGGONNETPAM120) { _matPtrLocal = gon120mtVec; _matXrefLocal = &defaultAAXref; } else if (QTsegmentAAMatNum == QTAASEGUSER) { _matPtrLocal = &QTsegmentAAMatrix; _matXrefLocal = &QTsegmentAAXref; } else if (QTsegmentAAMatNum == QTAASEGGONNETPAM350) { _matPtrLocal = gon350mtVec; _matXrefLocal = &defaultAAXref; } else { _matPtrLocal = gon250mtVec; _matXrefLocal = &defaultAAXref; } /* get a negative matrix */ maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, true, 100); } } bool SubMatrix::getQTLowScoreMatFromFile(char* fileName, bool dna) { int maxRes; FILE *infile; line2 = string(fileName); if(line2.size() == 0) { return false; } if((infile = fopen(line2.c_str(), "r")) == NULL) { utilityObject->error("Cannot find matrix file [%s]", line2.c_str()); return false; } strcpy(fileName, line2.c_str()); if(dna) { maxRes = readUserMatrix(fileName, QTsegmentDNAMatrix, QTsegmentDNAXref); } else { maxRes = readUserMatrix(fileName, QTsegmentAAMatrix, QTsegmentAAXref); } if (maxRes <= 0) { return false; } return true; } bool SubMatrix::getAAScoreMatFromFile(char *str) { int maxRes; FILE *infile; line2 = string(str); if(line2.size() == 0) { return false; } if((infile = fopen(line2.c_str(), "r")) == NULL) { utilityObject->error("Cannot find matrix file [%s]", line2.c_str()); return false; } strcpy(str, line2.c_str()); maxRes = readUserMatrix(str, QTscoreUserMatrix, QTscoreXref); if (maxRes <= 0) { return false; } return true; } bool SubMatrix::getDNAScoreMatFromFile(char *str) { int maxRes; FILE *infile; line2 = string(str); if(line2.size() == 0) { return false; } if((infile = fopen(line2.c_str(), "r")) == NULL) { utilityObject->error("Cannot find matrix file [%s]", line2.c_str()); return false; } strcpy(str, line2.c_str()); maxRes = readUserMatrix(str, QTscoreUserDNAMatrix, QTscoreDNAXref); if (maxRes <= 0) { return false; } return true; } } clustalx-2.1/clustalW/substitutionMatrix/0000755000175000017500000000000011470725216020555 5ustar ddineenddineenclustalx-2.1/clustalW/pairwise/PairwiseAlignBase.h0000644000175000017500000000112711470725216022142 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef PAIRWISEALIGNBASE_H #define PAIRWISEALIGNBASE_H #include #include "../alignment/Alignment.h" namespace clustalw { class PairwiseAlignBase { public: virtual ~PairwiseAlignBase(){}; /* Functions */ virtual void pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd) = 0; /* Attributes */ private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/pairwise/FullPairwiseAlign.h0000644000175000017500000000341111470725216022170 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef FULLPAIRWISEALIGN_H #define FULLPAIRWISEALIGN_H #include "PairwiseAlignBase.h" namespace clustalw { class FullPairwiseAlign : public PairwiseAlignBase { public: /* Functions */ FullPairwiseAlign(); virtual ~FullPairwiseAlign(){}; virtual void pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd); /* Attributes */ private: /* Functions */ void add(int v); int calcScore(int iat, int jat, int v1, int v2); float tracePath(int tsb1, int tsb2); void forwardPass(const vector* seq1, const vector* seq2, int n, int m); void reversePass(const vector* ia, const vector* ib); int diff(int A, int B, int M, int N, int tb, int te); void del(int k); int gap(int k); int tbgap(int k, int tb); int tegap(int k, int te); /* Attributes */ // I have constant pointers to the data. This allows for the fastest access. const vector* _ptrToSeq1; const vector* _ptrToSeq2; int _maxAlnLength; int intScale; float mmScore; int printPtr; int lastPrint; vector displ; vector HH; vector DD; vector RR; vector SS; int _gapOpen; // scaled to be an integer, this is not a mistake int _gapExtend; // scaled to be an integer, not a mistake int seq1; int seq2; int matrix[NUMRES][NUMRES]; int maxScore; int sb1; int sb2; int se1; int se2; }; } #endif clustalx-2.1/clustalW/pairwise/FullPairwiseAlign.cpp0000644000175000017500000003461511470725216022535 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "FullPairwiseAlign.h" #include namespace clustalw { FullPairwiseAlign::FullPairwiseAlign() : _maxAlnLength(0), intScale(0), mmScore(0), printPtr(0), lastPrint(0), _gapOpen(0), _gapExtend(0), seq1(0), seq2(0), maxScore(0), sb1(0), sb2(0), se1(0), se2(0) { } void FullPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd) { int si, sj, i; int n, m, len1, len2; int maxRes; int _matAvgScore; int res; double _score; float gapOpenScale, gapExtendScale; try { if(distMat->getSize() != alignPtr->getNumSeqs() + 1) { cerr << "The distance matrix is not the right size!\n" << "Need to terminate program.\n"; exit(1); } if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart)) { cerr << "The range for pairwise Alignment is incorrect.\n" << "Need to terminate program.\n"; exit(1); } _maxAlnLength = alignPtr->getMaxAlnLength(); int _numSeqs = alignPtr->getNumSeqs(); if(_numSeqs == 0) { return; } int num = (2 * _maxAlnLength) + 1; bool _DNAFlag = userParameters->getDNAFlag(); float _pwGapOpen, _pwGapExtend; _pwGapOpen = userParameters->getPWGapOpen(); _pwGapExtend = userParameters->getPWGapExtend(); displ.resize(num); HH.resize(_maxAlnLength); DD.resize(_maxAlnLength); RR.resize(_maxAlnLength); SS.resize(_maxAlnLength); // Note these 2 lines replace the stuff above because it is all done in the SubMatrix PairScaleValues scaleValues; maxRes = subMatrix->getPairwiseMatrix(matrix, scaleValues, _matAvgScore); if (maxRes == 0) { cerr << "Could not get the substitution matrix\n"; return; } intScale = scaleValues.intScale; gapOpenScale = scaleValues.gapOpenScale; gapExtendScale = scaleValues.gapExtendScale; int _gapPos1, _gapPos2; _gapPos1 = userParameters->getGapPos1(); _gapPos2 = userParameters->getGapPos2(); const SeqArray* _ptrToSeqArray = alignPtr->getSeqArray(); //This is faster! for (si = utilityObject->MAX(0, iStart); si < _numSeqs && si < iEnd; si++) { n = alignPtr->getSeqLength(si + 1); len1 = 0; for (i = 1; i <= n; i++) { res = (*_ptrToSeqArray)[si + 1][i]; if ((res != _gapPos1) && (res != _gapPos2)) { len1++; } } for (sj = utilityObject->MAX(si+1, jStart+1); sj < _numSeqs && sj < jEnd; sj++) { m = alignPtr->getSeqLength(sj + 1); if (n == 0 || m == 0) { distMat->SetAt(si + 1, sj + 1, 1.0); distMat->SetAt(sj + 1, si + 1, 1.0); continue; } len2 = 0; for (i = 1; i <= m; i++) { res = (*_ptrToSeqArray)[sj + 1][i]; if ((res != _gapPos1) && (res != _gapPos2)) { len2++; } } if (_DNAFlag) { _gapOpen = static_cast(2 * _pwGapOpen * intScale * gapOpenScale); _gapExtend = static_cast(_pwGapExtend * intScale * gapExtendScale); } else { if (_matAvgScore <= 0) { _gapOpen = 2 * static_cast((_pwGapOpen + log(static_cast(utilityObject->MIN(n, m)))) * intScale); } else { _gapOpen = static_cast(2 * _matAvgScore * (_pwGapOpen + log(static_cast(utilityObject->MIN(n, m)))) * gapOpenScale); } _gapExtend = static_cast(_pwGapExtend * intScale); } // align the sequences seq1 = si + 1; seq2 = sj + 1; _ptrToSeq1 = alignPtr->getSequence(seq1); _ptrToSeq2 = alignPtr->getSequence(seq2); forwardPass(_ptrToSeq1, _ptrToSeq2, n, m); reversePass(_ptrToSeq1, _ptrToSeq2); lastPrint = 0; printPtr = 1; // use Myers and Miller to align two sequences maxScore = diff(sb1 - 1, sb2 - 1, se1 - sb1 + 1, se2 - sb2 + 1, (int)0, (int)0); // calculate percentage residue identity mmScore = tracePath(sb1, sb2); if (len1 == 0 || len2 == 0) { mmScore = 0; } else { mmScore /= (float)utilityObject->MIN(len1, len2); } _score = ((float)100.0 - mmScore) / (float)100.0; distMat->SetAt(si + 1, sj + 1, _score); distMat->SetAt(sj + 1, si + 1, _score); if(userParameters->getDisplayInfo()) { utilityObject->info("Sequences (%d:%d) Aligned. Score: %d", si+1, sj+1, (int)mmScore); } } } displ.clear(); HH.clear(); DD.clear(); RR.clear(); SS.clear(); } catch(const exception& e) { cerr << "An exception has occured in the FullPairwiseAlign class.\n" << e.what() << "\n"; exit(1); } } void FullPairwiseAlign::add(int v) { if (lastPrint < 0) { displ[printPtr - 1] = v; displ[printPtr++] = lastPrint; } else { lastPrint = displ[printPtr++] = v; } } inline int FullPairwiseAlign::calcScore(int iat, int jat, int v1, int v2) { return matrix[(*_ptrToSeq1)[v1 + iat]][(*_ptrToSeq2)[v2 + jat]]; } float FullPairwiseAlign::tracePath(int tsb1, int tsb2) { int res1, res2; int i1, i2; int i, k, pos, toDo; int count; float score; toDo = printPtr - 1; i1 = tsb1; i2 = tsb2; pos = 0; count = 0; for (i = 1; i <= toDo; ++i) { if (displ[i] == 0) { res1 = (*_ptrToSeq1)[i1]; res2 = (*_ptrToSeq2)[i2]; if ((res1 != userParameters->getGapPos1()) && (res2 != userParameters->getGapPos2()) && (res1 == res2)) { count++; } ++i1; ++i2; ++pos; } else { if ((k = displ[i]) > 0) { i2 += k; pos += k; } else { i1 -= k; pos -= k; } } } score = 100.0 *(float)count; return (score); } void FullPairwiseAlign::forwardPass(const vector* seq1, const vector* seq2, int n, int m) { int i, j; int f, hh, p, t; maxScore = 0; se1 = se2 = 0; for (i = 0; i <= m; i++) { HH[i] = 0; DD[i] = -_gapOpen; } for (i = 1; i <= n; i++) { hh = p = 0; f = -_gapOpen; for (j = 1; j <= m; j++) { f -= _gapExtend; t = hh - _gapOpen - _gapExtend; if (f < t) { f = t; } DD[j] -= _gapExtend; t = HH[j] - _gapOpen - _gapExtend; if (DD[j] < t) { DD[j] = t; } hh = p + matrix[(*seq1)[i]][(*seq2)[j]]; if (hh < f) { hh = f; } if (hh < DD[j]) { hh = DD[j]; } if (hh < 0) { hh = 0; } p = HH[j]; HH[j] = hh; if (hh > maxScore) { maxScore = hh; se1 = i; se2 = j; } } } } void FullPairwiseAlign::reversePass(const vector* seq1, const vector* seq2) { int i, j; int f, hh, p, t; int cost; cost = 0; sb1 = sb2 = 1; for (i = se2; i > 0; i--) { HH[i] = - 1; DD[i] = - 1; } for (i = se1; i > 0; i--) { hh = f = - 1; if (i == se1) { p = 0; } else { p = - 1; } for (j = se2; j > 0; j--) { f -= _gapExtend; t = hh - _gapOpen - _gapExtend; if (f < t) { f = t; } DD[j] -= _gapExtend; t = HH[j] - _gapOpen - _gapExtend; if (DD[j] < t) { DD[j] = t; } hh = p + matrix[(*seq1)[i]][(*seq2)[j]]; if (hh < f) { hh = f; } if (hh < DD[j]) { hh = DD[j]; } p = HH[j]; HH[j] = hh; if (hh > cost) { cost = hh; sb1 = i; sb2 = j; if (cost >= maxScore) { break; } } } if (cost >= maxScore) { break; } } } int FullPairwiseAlign::diff(int A, int B, int M, int N, int tb, int te) { int type; int midi, midj, i, j; int midh; static int f, hh, e, s, t; if (N <= 0) { if (M > 0) { del(M); } return ( -(int)tbgap(M, tb)); } if (M <= 1) { if (M <= 0) { add(N); return ( -(int)tbgap(N, tb)); } midh = - (tb + _gapExtend) - tegap(N, te); hh = - (te + _gapExtend) - tbgap(N, tb); if (hh > midh) { midh = hh; } midj = 0; for (j = 1; j <= N; j++) { hh = calcScore(1, j, A, B) - tegap(N - j, te) - tbgap(j - 1, tb); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { del(1); add(N); } else { if (midj > 1) { add(midj - 1); } displ[printPtr++] = lastPrint = 0; if (midj < N) { add(N - midj); } } return midh; } // Divide: Find optimum midpoint (midi,midj) of cost midh midi = M / 2; HH[0] = 0; t = - tb; for (j = 1; j <= N; j++) { HH[j] = t = t - _gapExtend; DD[j] = t - _gapOpen; } t = - tb; for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t - _gapExtend; f = t - _gapOpen; for (j = 1; j <= N; j++) { if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend)) { f = hh; } if ((hh = HH[j] - _gapOpen - _gapExtend) > (e = DD[j] - _gapExtend)) { e = hh; } hh = s + calcScore(i, j, A, B); if (f > hh) { hh = f; } if (e > hh) { hh = e; } s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; RR[N] = 0; t = - te; for (j = N - 1; j >= 0; j--) { RR[j] = t = t - _gapExtend; SS[j] = t - _gapOpen; } t = - te; for (i = M - 1; i >= midi; i--) { s = RR[N]; RR[N] = hh = t = t - _gapExtend; f = t - _gapOpen; for (j = N - 1; j >= 0; j--) { if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend)) { f = hh; } if ((hh = RR[j] - _gapOpen - _gapExtend) > (e = SS[j] - _gapExtend)) { e = hh; } hh = s + calcScore(i + 1, j + 1, A, B); if (f > hh) { hh = f; } if (e > hh) { hh = e; } s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N] = RR[N]; midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + _gapOpen; if (hh > midh) { midh = hh; midj = j; type = 2; } } // Conquer recursively around midpoint if (type == 1) { // Type 1 gaps diff(A, B, midi, midj, tb, _gapOpen); diff(A + midi, B + midj, M - midi, N - midj, _gapOpen, te); } else { diff(A, B, midi - 1, midj, tb, 0); del(2); diff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, te); } return midh; // Return the score of the best alignment } void FullPairwiseAlign::del(int k) { if (lastPrint < 0) { lastPrint = displ[printPtr - 1] -= k; } else { lastPrint = displ[printPtr++] = - (k); } } int FullPairwiseAlign::gap(int k) { if(k <= 0) { return 0; } else { return _gapOpen + _gapExtend * k; } } int FullPairwiseAlign::tbgap(int k, int tb) { if(k <= 0) { return 0; } else { return tb + _gapExtend * k; } } int FullPairwiseAlign::tegap(int k, int te) { if(k <= 0) { return 0; } else { return te + _gapExtend * k; } } } clustalx-2.1/clustalW/pairwise/FastPairwiseAlign.h0000644000175000017500000000271511470725216022171 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef FASTPAIRWISEALIGN_H #define FASTPAIRWISEALIGN_H #include "PairwiseAlignBase.h" namespace clustalw { class FastPairwiseAlign : public PairwiseAlignBase { public: /* Functions */ FastPairwiseAlign(); virtual ~FastPairwiseAlign(){}; virtual void pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd); /* Attributes */ private: /* Functions */ void pairAlign(const vector* seq, int l1, int l2); void makePPtrs(vector& tptr, vector& pl, const vector* seq, int length); void makeNPtrs(vector& tptr, vector& pl, const vector* seq, int length); void putFrag(int fs, int v1, int v2, int flen); int fragRelPos(int a1, int b1, int a2, int b2); void desQuickSort(vector& array1, vector& array2, int arraySize); /* Attributes */ vector displ; vector zza; vector zzb; vector zzc; vector zzd; int next; int currFrag; int maxSoFar; int vatend; Array2D accum; vector diagIndex; vector slopes; int _maxAlnLength; }; } #endif clustalx-2.1/clustalW/pairwise/FastPairwiseAlign.cpp0000644000175000017500000004137311470725216022527 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "FastPairwiseAlign.h" namespace clustalw { FastPairwiseAlign::FastPairwiseAlign() { _maxAlnLength = 0; } void FastPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd) { try { if(distMat->getSize() != alignPtr->getNumSeqs() + 1) { cerr << "The distance matrix is not the right size!\n" << "Need to terminate program.\n"; exit(1); } if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart)) { cout << "The range for pairwise Alignment is incorrect.\n" << "Need to terminate program.\n"; exit(1); } int i, j, dsr; double calcScore; bool _DNAFlag = userParameters->getDNAFlag(); _maxAlnLength = alignPtr->getMaxAlnLength(); int num = (2 * _maxAlnLength) + 1; accum.ResizeRect(5, num); displ.resize(num); slopes.resize(num); diagIndex.resize(num); zza.resize(_maxAlnLength + 1); zzb.resize(_maxAlnLength + 1); zzc.resize(_maxAlnLength + 1); zzd.resize(_maxAlnLength + 1); if (_DNAFlag) { userParameters->setDNAParams(); } else { userParameters->setProtParams(); } cout << "\n\n"; for (i = iStart + 1; i <= iEnd; ++i) { const vector* _seqIPtr = alignPtr->getSequence(i); int _seqILength = alignPtr->getSeqLength(i); if (_DNAFlag) { makeNPtrs(zza, zzc, _seqIPtr, _seqILength); } else { makePPtrs(zza, zzc, _seqIPtr, _seqILength); } double _score; for (j = jStart + 2 > i+1 ? jStart + 2 : i+1; j <= jEnd; ++j) { const vector* _seqJPtr = alignPtr->getSequence(j); int _seqJLength = alignPtr->getSeqLength(j); if (_DNAFlag) { makeNPtrs(zzb, zzd, _seqJPtr, _seqJLength); } else { makePPtrs(zzb, zzd, _seqJPtr, _seqJLength); } pairAlign(_seqIPtr, _seqILength, _seqJLength); if (!maxSoFar) { calcScore = 0.0; } else { calcScore = (double)accum[0][maxSoFar]; if (userParameters->getPercent()) { dsr = (_seqILength < _seqJLength) ? _seqILength : _seqJLength; calcScore = (calcScore / (double)dsr) *100.0; } } _score = (100.0 - calcScore) / 100.0; distMat->SetAt(i, j, _score); //distMat->SetAt(j, i, _score); /* distMat symmetric, FS, 2009-04-06 */ if(userParameters->getDisplayInfo()) { if (calcScore > 0.1) { utilityObject->info("Sequences (%d:%d) Aligned. Score: %lg", i, j, calcScore); } else { utilityObject->info("Sequences (%d:%d) Not Aligned", i, j); } } } } accum.clearArray(); displ.clear(); slopes.clear(); diagIndex.clear(); zza.clear(); zzb.clear(); zzc.clear(); zzd.clear(); } catch(const exception& e) { cerr << "An exception has occured in the FastPairwiseAlign class.\n" << e.what() << "\n"; exit(1); } } /* * Note: There is a problem with the treatment of DNA/RNA. * During file reading all residues are encoded as AminoAcids, * even before it has been established if they are AA or not. * This is bad and will have to be changed (later). * 'A' is assigned code 0, C is assigned 2, G = 6, T=18, U=19. * However, the fast alignment routines require that * A=0, C=1, G=2, T=U=3. In the best case the results of the * fast alignment (of DNA) will simply be meaningless, the * worst case is a core dump. * As a quick fix I implemented the following mask, that * (for DNA/RNA) translates 0->0, 2->1, 6->2, 18->3, 19->3. * This is awfull (it is not OO compliant) but it was quick, * uses (much) less memory than a second DNA array, and is * (much) faster than calling a translation function. * Ideally this will be removed, but this requires changes * to (i) the sequence encoding during file-reading AND * (ii) all the DNA substitution matrices. (Fabian, 2009-02-25) * * A B C D E F G H I K L M 0 N P Q R S * T U W X Y Z (goodmeasuregoodmeasuregood) */ int ziAA2DNA[] = {0,-1, 1,-1,-1,-1, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; const int ciMaxResID = 3; void FastPairwiseAlign::pairAlign(const vector* seq, int l1, int l2) { int pot[8], i, j, l, m, limit, pos, tl1, vn1, vn2, flen, osptr, fs; int tv1, tv2, encrypt, subt1, subt2, rmndr; bool flag; int residue; bool _DNAFlag = userParameters->getDNAFlag(); int _ktup = userParameters->getKtup(); int _maxAA = userParameters->getMaxAA(); int _windowGap = userParameters->getWindowGap(); int _window = userParameters->getWindow(); int _signif = userParameters->getSignif(); if (_DNAFlag) { for (i = 1; i <= _ktup; ++i) { pot[i] = (int)pow((double)4, (double)(i - 1)); } limit = (int)pow((double)4, (double)_ktup); } else { for (i = 1; i <= _ktup; i++) { pot[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1)); } limit = (int)pow((double)(_maxAA + 1), (double)_ktup); } tl1 = (l1 + l2) - 1; for (i = 1; i <= tl1; ++i) { slopes[i] = displ[i] = 0; diagIndex[i] = i; } // increment diagonal score for each k_tuple match for (i = 1; i <= limit; ++i) { vn1 = zzc[i]; while (true) { if (!vn1) { break; } vn2 = zzd[i]; while (vn2 != 0) { osptr = vn1 - vn2 + l2; ++displ[osptr]; vn2 = zzb[vn2]; } vn1 = zza[vn1]; } } // choose the top SIGNIF diagonals desQuickSort(displ, diagIndex, tl1); j = tl1 - _signif + 1; if (j < 1) { j = 1; } // flag all diagonals within WINDOW of a top diagonal for (i = tl1; i >= j; i--) if (displ[i] > 0) { pos = diagIndex[i]; l = (1 > pos - _window) ? 1 : pos - _window; m = (tl1 < pos + _window) ? tl1 : pos + _window; for (; l <= m; l++) { slopes[l] = 1; } } for (i = 1; i <= tl1; i++) { displ[i] = 0; } currFrag = maxSoFar = 0; for (i = 1; i <= (l1 - _ktup + 1); ++i) { encrypt = flag = 0; if (_DNAFlag){ for (j = 1; j <= _ktup; ++j) { residue = ziAA2DNA[(*seq)[i + j - 1]]; if ((residue < 0) || (residue > ciMaxResID)) { flag = true; break; } encrypt += ((residue) * pot[j]); } } else { for (j = 1; j <= _ktup; ++j) { residue = (*seq)[i + j - 1]; if ((residue < 0) || (residue > _maxAA)) { flag = true; break; } encrypt += ((residue) * pot[j]); } } if (flag) { continue; } ++encrypt; vn2 = zzd[encrypt]; flag = false; while (true) { if (!vn2) { flag = true; break; } osptr = i - vn2 + l2; if (slopes[osptr] != 1) { vn2 = zzb[vn2]; continue; } flen = 0; fs = _ktup; next = maxSoFar; // A-loop while (true) { if (!next) { ++currFrag; if (currFrag >= 2 * _maxAlnLength) { utilityObject->info("(Partial alignment)"); vatend = 1; return ; } displ[osptr] = currFrag; putFrag(fs, i, vn2, flen); } else { tv1 = accum[1][next]; tv2 = accum[2][next]; if (fragRelPos(i, vn2, tv1, tv2)) { if (i - vn2 == accum[1][next] - accum[2][next]) { if (i > accum[1][next] + (_ktup - 1)) { fs = accum[0][next] + _ktup; } else { rmndr = i - accum[1][next]; fs = accum[0][next] + rmndr; } flen = next; next = 0; continue; } else { if (displ[osptr] == 0) { subt1 = _ktup; } else { if (i > accum[1][displ[osptr]] + (_ktup - 1)) { subt1 = accum[0][displ[osptr]] + _ktup; } else { rmndr = i - accum[1][displ[osptr]]; subt1 = accum[0][displ[osptr]] + rmndr; } } subt2 = accum[0][next] - _windowGap + _ktup; if (subt2 > subt1) { flen = next; fs = subt2; } else { flen = displ[osptr]; fs = subt1; } next = 0; continue; } } else { next = accum[4][next]; continue; } } break; } // End of Aloop vn2 = zzb[vn2]; } } vatend = 0; } void FastPairwiseAlign::makePPtrs(vector& tptr, vector& pl, const vector* seq, int length) { int a[10]; int i, j, limit, code; bool flag; int residue; int _ktup = userParameters->getKtup(); int _maxAA = userParameters->getMaxAA(); for (i = 1; i <= _ktup; i++) { a[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1)); } limit = (int)pow((double)(_maxAA + 1), (double)_ktup); if(limit >= (int)pl.size()) { pl.resize(limit + 1); } if(length >= (int)tptr.size()) { tptr.resize(length + 1); } for (i = 1; i <= limit; ++i) { pl[i] = 0; } for (i = 1; i <= length; ++i) // NOTE changed this { tptr[i] = 0; } for (i = 1; i <= (length - _ktup + 1); ++i) { code = 0; flag = false; for (j = 1; j <= _ktup; ++j) { residue = (*seq)[i + j - 1]; if ((residue < 0) || (residue > _maxAA)) { flag = true; break; } code += ((residue) * a[j]); } if (flag) { continue; } ++code; if (pl[code] != 0) { tptr[i] = pl[code]; } pl[code] = i; //Ktuple code is at position i in sequence } } void FastPairwiseAlign::makeNPtrs(vector& tptr,vector& pl, const vector* seq, int length) { int pot[] = { 0, 1, 4, 16, 64, 256, 1024, 4096 }; int i, j, limit, code; bool flag; int residue; int _ktup = userParameters->getKtup(); limit = (int)pow((double)4, (double)_ktup); if(limit >= (int)pl.size()) { pl.resize(limit + 1); } if(length >= (int)tptr.size()) { tptr.resize(length + 1); } for (i = 1; i <= limit; ++i) { pl[i] = 0; } for (i = 1; i <= length; ++i) { tptr[i] = 0; } for (i = 1; i <= length - _ktup + 1; ++i) { code = 0; flag = false; for (j = 1; j <= _ktup; ++j) { residue = ziAA2DNA[(*seq)[i + j - 1]]; if ((residue < 0) || (residue > ciMaxResID)) { flag = true; break; } code += ((residue) * pot[j]); } if (flag) { continue; } ++code; if (pl[code] != 0) { tptr[i] = pl[code]; } pl[code] = i; } } void FastPairwiseAlign::putFrag(int fs, int v1, int v2, int flen) { int end; accum[0][currFrag] = fs; accum[1][currFrag] = v1; accum[2][currFrag] = v2; accum[3][currFrag] = flen; if (!maxSoFar) { maxSoFar = 1; accum[4][currFrag] = 0; return ; } if (fs >= accum[0][maxSoFar]) { accum[4][currFrag] = maxSoFar; maxSoFar = currFrag; return ; } else { next = maxSoFar; while (true) { end = next; next = accum[4][next]; if (fs >= accum[0][next]) { break; } } accum[4][currFrag] = next; accum[4][end] = currFrag; } } inline int FastPairwiseAlign::fragRelPos(int a1, int b1, int a2, int b2) { int ret; int _ktup = userParameters->getKtup(); ret = false; if (a1 - b1 == a2 - b2) { if (a2 < a1) { ret = true; } } else { if (a2 + _ktup - 1 < a1 && b2 + _ktup - 1 < b1) { ret = true; } } return ret; } void FastPairwiseAlign::desQuickSort(vector& array1, vector& array2, int arraySize) { // Quicksort routine, adapted from chapter 4, page 115 of software tools // by Kernighan and Plauger, (1986) // Sort the elements of array1 and sort the // elements of array2 accordingly int temp1, temp2; int p, pivlin; int i, j; int lst[50], ust[50]; // the maximum no. of elements must be // < log(base2) of 50 lst[1] = 1; ust[1] = arraySize - 1; p = 1; while (p > 0) { if (lst[p] >= ust[p]) { p--; } else { i = lst[p] - 1; j = ust[p]; pivlin = array1[j]; while (i < j) { for (i = i + 1; array1[i] < pivlin; i++) ; for (j = j - 1; j > i; j--) if (array1[j] <= pivlin) { break; } if (i < j) { temp1 = array1[i]; array1[i] = array1[j]; array1[j] = temp1; temp2 = array2[i]; array2[i] = array2[j]; array2[j] = temp2; } } j = ust[p]; temp1 = array1[i]; array1[i] = array1[j]; array1[j] = temp1; temp2 = array2[i]; array2[i] = array2[j]; array2[j] = temp2; if (i - lst[p] < ust[p] - i) { lst[p + 1] = lst[p]; ust[p + 1] = i - 1; lst[p] = i + 1; } else { lst[p + 1] = i + 1; ust[p + 1] = ust[p]; ust[p] = i - 1; } p = p + 1; } } return ; } } clustalx-2.1/clustalW/pairwise/0000755000175000017500000000000011470725216016437 5ustar ddineenddineenclustalx-2.1/clustalW/multipleAlign/ProfileWithSub.h0000644000175000017500000000123211470725216022477 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef PROFILEWITHSUB_H #define PROFILEWITHSUB_H #include "../alignment/Alignment.h" #include "ProfileBase.h" namespace clustalw { class ProfileWithSub : public ProfileBase { public: /* Functions */ ProfileWithSub(int prfLen, int firstS, int lastS); void resetPrf1(); void calcProfileWithSub(SeqArray* seqArray, vector* gaps, int matrix[NUMRES][NUMRES], vector* seqWeight); /* Attributes */ private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/multipleAlign/ProfileWithSub.cpp0000644000175000017500000000742111470725216023040 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ProfileWithSub.h" namespace clustalw { /** * * @param prfLen * @param firstS * @param lastS * @return */ ProfileWithSub::ProfileWithSub(int prfLen, int firstS, int lastS) : ProfileBase(prfLen, firstS, lastS) { } /** * */ void ProfileWithSub::resetPrf1() { profile.clear(); } /** * * @param seqArray * @param gaps * @param matrix[][] * @param seqWeight */ void ProfileWithSub::calcProfileWithSub(SeqArray* seqArray, vector* gaps, int matrix[NUMRES][NUMRES], vector* seqWeight) { vector > weighting; int sum2, aa, seq, res; int _numSeq; int col, pos; int f; float scale; int _maxAA = userParameters->getMaxAA(); int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); weighting.resize(NUMRES + 2, vector(prfLength + 2)); _numSeq = lastSeq - firstSeq; sum2 = 0; for (seq = firstSeq; seq < lastSeq; seq++) { sum2 += (*seqWeight)[seq]; } for (col = 0; col < prfLength; col++) { for (aa = 0; aa <= _maxAA; aa++) { weighting[aa][col] = 0; for (seq = firstSeq; seq < lastSeq; seq++) if (aa == (*seqArray)[seq][col]) { weighting[aa][col] += (*seqWeight)[seq]; } } weighting[_gapPos1][col] = 0; for (seq = firstSeq; seq < lastSeq; seq++) { if (_gapPos1 == (*seqArray)[seq][col]) { weighting[_gapPos1][col] += (*seqWeight)[seq]; } } weighting[_gapPos2][col] = 0; for (seq = firstSeq; seq < lastSeq; seq++) { if (_gapPos2 == (*seqArray)[seq][col]) { weighting[_gapPos2][col] += (*seqWeight)[seq]; } } } for (pos = 0; pos < prfLength; pos++) { if ((*gaps)[pos] == _numSeq) // If all gaps { for (res = 0; res <= _maxAA; res++) { profile[pos + 1][res] = matrix[res][_gapPos1]; } profile[pos + 1][_gapPos1] = matrix[_gapPos1][_gapPos1]; profile[pos + 1][_gapPos2] = matrix[_gapPos2][_gapPos1]; } else { scale = (float)(_numSeq - (*gaps)[pos]) / (float)_numSeq; for (res = 0; res <= _maxAA; res++) { f = 0; for (aa = 0; aa <= _maxAA; aa++) { f += (weighting[aa][pos] * matrix[aa][res]); } f += (weighting[_gapPos1][pos] * matrix[_gapPos1][res]); f += (weighting[_gapPos2][pos] * matrix[_gapPos2][res]); profile[pos + 1][res] = (int)(((float)f / (float)sum2) * scale); } f = 0; for (aa = 0; aa <= _maxAA; aa++) { f += (weighting[aa][pos] * matrix[aa][_gapPos1]); } f += (weighting[_gapPos1][pos] * matrix[_gapPos1][_gapPos1]); f += (weighting[_gapPos2][pos] * matrix[_gapPos2][_gapPos1]); profile[pos + 1][_gapPos1] = (int)(((float)f / (float)sum2) * scale); f = 0; for (aa = 0; aa <= _maxAA; aa++) { f += (weighting[aa][pos] * matrix[aa][_gapPos2]); } f += (weighting[_gapPos1][pos] * matrix[_gapPos1][_gapPos2]); f += (weighting[_gapPos2][pos] * matrix[_gapPos2][_gapPos2]); profile[pos + 1][_gapPos2] = (int)(((float)f / (float)sum2) * scale); } } } } clustalx-2.1/clustalW/multipleAlign/ProfileStandard.h0000644000175000017500000000111711470725216022654 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef PROFILESTANDARD_H #define PROFILESTANDARD_H #include "../alignment/Alignment.h" #include "ProfileBase.h" namespace clustalw { class ProfileStandard : public ProfileBase { public: /* Functions */ ProfileStandard(int prfLen, int firstS, int lastS); void resetPrf2(); void calcStandardProfile(SeqArray* alignment, vector* seqWeight); /* Attributes */ private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/multipleAlign/ProfileStandard.cpp0000644000175000017500000000510311470725216023206 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ProfileStandard.h" namespace clustalw { /** * * @param prfLen * @param firstS * @param lastS * @return */ ProfileStandard::ProfileStandard(int prfLen, int firstS, int lastS) : ProfileBase(prfLen, firstS, lastS) { } /** * */ void ProfileStandard::resetPrf2() { profile.clear(); } /** * * @param seqArray * @param seqWeight */ void ProfileStandard::calcStandardProfile(SeqArray* seqArray, vector* seqWeight) { /** DONT FORGET TO CHECK THE SIZES ARE CORRECT */ int sum1, sum2; int i, d; int r; int _maxAA = userParameters->getMaxAA(); profile.resize(prfLength + 2, vector(LENCOL + 2)); int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); for (r = 0; r < prfLength; r++) { /* * calculate sum2 = number of residues found in this column */ sum2 = 0; for (i = firstSeq; i < lastSeq; i++) { sum2 += (*seqWeight)[i]; } /* * only include matrix comparison scores for those residue types found in this * column */ if (sum2 == 0) { for (d = 0; d <= _maxAA; d++) { profile[r + 1][d] = 0; } profile[r + 1][_gapPos1] = 0; profile[r + 1][_gapPos2] = 0; } else { for (d = 0; d <= _maxAA; d++) { sum1 = 0; for (i = firstSeq; i < lastSeq; i++) { if (d == (*seqArray)[i][r]) { sum1 += (*seqWeight)[i]; } } profile[r + 1][d] = (int)(10 *(float)sum1 / (float)sum2); } sum1 = 0; for (i = firstSeq; i < lastSeq; i++) { if (_gapPos1 == (*seqArray)[i][r]) { sum1 += (*seqWeight)[i]; } } profile[r + 1][_gapPos1] = (int)(10 *(float)sum1 / (float)sum2); sum1 = 0; for (i = firstSeq; i < lastSeq; i++) { if (_gapPos2 == (*seqArray)[i][r]) { sum1 += (*seqWeight)[i]; } } profile[r + 1][_gapPos2] = (int)(10 *(float)sum1 / (float)sum2); } } } } clustalx-2.1/clustalW/multipleAlign/ProfileBase.h0000644000175000017500000000453711470725216021777 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * The reason why we have 2 different types of profiles is because one of them * (ProfileWithSub) has the substitution matrix information already in it. This * increases the performance when aligning 2 profile columns. */ #ifndef PROFILEBASE_H #define PROFILEBASE_H #include "../alignment/Alignment.h" namespace clustalw { class ProfileBase { public: /* Functions */ ProfileBase(int prfLen, int firstS, int lastS); void calcGapCoeff(SeqArray* seqArray, vector* gaps, bool useStructPenalties, vector* gapPenaltyMask, int gapCoef, int lenCoef); const SeqArray* getProfilePtr(){return &profile;}; void resetProfile(){for(int i = 0; i < (int)profile.size();i++) { profile[i].clear(); } profile.clear(); }; /* Attributes */ protected: /* Functions */ void calcVPenalties(SeqArray* aln, vector* weight); void calcResidueSpecificPen(SeqArray* aln, vector* weight); void calcHydrophilicPen(SeqArray* aln, vector* weight); int localPenalty(int penalty, int n, vector* resWeight, vector* hydWeight, vector* vWeight); float percentId(vector* s1, vector* s2); /* Attributes */ vector > profile; /* number of residues used for a window for the variable zone penalties */ int vwindow; /* vll is the lower limit for the variable zone penalties (vll < pen < 1.0) */ int vll; /* "Pascarella and Argos" residue specific gap modification factors. See Table 1 in the ClustalW 1994 NAR paper http://www.ncbi.nlm.nih.gov/pubmed/7984417 */ string pascarellaRes; vector pascarellaProb; vector > vlut; static const int numLetters = 26; float reducedGap; bool nVarPen; bool nHydPen; bool nPrefPen; int gdist; int prfLength; int firstSeq, lastSeq; private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/multipleAlign/ProfileBase.cpp0000644000175000017500000003226111470725216022325 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ProfileBase.h" namespace clustalw { /** * * @param prfLen * @param firstS * @param lastS */ ProfileBase::ProfileBase(int prfLen, int firstS, int lastS) : vwindow(5), vll(50), reducedGap(1.0), prfLength(prfLen), firstSeq(firstS), lastSeq(lastS) { vlut.resize(numLetters, vector(numLetters)); for(int i = 0; i < numLetters; i++) { vlut[i][i] = 1; } pascarellaRes = "ACDEFGHKILMNPQRSTVYW"; int pasprob[] = { 87, 87, 104, 69, 80, 139, 100, 104, 68, 79, 71, 137, 126, 93, 128, 124, 111, 75, 100, 77}; pascarellaProb = vector(pasprob, pasprob + 20); profile.resize(prfLength + 2, vector(LENCOL + 2)); } /** * * @param seqArray * @param gaps * @param useStructPenalties * @param gapPenaltyMask * @param gapCoef * @param lenCoef */ void ProfileBase::calcGapCoeff(SeqArray* seqArray, vector* gaps, bool useStructPenalties, vector* gapPenaltyMask, int gapCoef, int lenCoef) { int c; int i, j; int is, ie; int _numSeq, val, pcid; vector gapPos; vector vWeight, resWeight, hydWeight; float scale; int _maxAA = userParameters->getMaxAA(); _numSeq = lastSeq - firstSeq; if(_numSeq == 2) { pcid = static_cast(percentId(&(*seqArray)[firstSeq], &(*seqArray)[firstSeq + 1])); } else pcid = 0; for (j = 0; j < prfLength; j++) { (*gaps)[j] = 0; } // Check for a gap penalty mask if (useStructPenalties != false) { nVarPen = nHydPen = nPrefPen = true; gdist = 0; } else if (userParameters->getNoVarPenalties() == false && pcid > 60) { nHydPen = nPrefPen = true; nVarPen = false; } else { nVarPen = true; nHydPen = userParameters->getNoHydPenalties(); nPrefPen = userParameters->getNoPrefPenalties(); gdist = userParameters->getGapDist(); } for (i = firstSeq; i < lastSeq; i++) { // Include end gaps as gaps ? is = 0; ie = prfLength; if (userParameters->getUseEndGaps() == false && userParameters->getEndGapPenalties() == false) { for (j = 0; j < prfLength; j++) { c = (*seqArray)[i][j]; if ((c < 0) || (c > _maxAA)) is++; else break; } for (j = prfLength - 1; j >= 0; j--) { c = (*seqArray)[i][j]; if ((c < 0) || (c > _maxAA)) ie--; else break; } } for (j = is; j < ie; j++) { if (((*seqArray)[i][j] < 0) || ((*seqArray)[i][j] > _maxAA)) { (*gaps)[j]++; } } } int _DNAFlag = userParameters->getDNAFlag(); if ((!_DNAFlag) && (nVarPen == false)) { vWeight.resize(prfLength + 2); calcVPenalties(seqArray, &vWeight); } if ((!_DNAFlag) && (nPrefPen == false)) { resWeight.resize(prfLength + 2); calcResidueSpecificPen(seqArray, &resWeight); } if ((!_DNAFlag) && (nHydPen == false)) { hydWeight.resize(prfLength + 2); calcHydrophilicPen(seqArray, &hydWeight); } gapPos.resize(prfLength + 2); // mark the residues close to an existing gap (set gaps[i] = -ve) if (_DNAFlag || (gdist <= 0)) { for (i = 0; i < prfLength; i++) { gapPos[i] = (*gaps)[i]; } } else { i = 0; while (i < prfLength) { if ((*gaps)[i] <= 0) { gapPos[i] = (*gaps)[i]; i++; } else { for (j = -gdist + 1; j < 0; j++) { if ((i + j >= 0) && (i + j < prfLength) && (((*gaps)[i + j] == 0) || ((*gaps)[i + j] < j))) { gapPos[i + j] = j; } } while ((*gaps)[i] > 0) { if (i >= prfLength) { break; } gapPos[i] = (*gaps)[i]; i++; } for (j = 0; j < gdist; j++) { if ((*gaps)[i + j] > 0) { break; } if ((i + j >= 0) && (i + j < prfLength) && (((*gaps)[i + j] == 0) || ((*gaps)[i + j] < -j))) { gapPos[i + j] = -j-1; } } i += j; } } } for (j = 0; j < prfLength; j++) { if (gapPos[j] <= 0) { // apply residue-specific and hydrophilic gap penalties. if (!_DNAFlag) { profile[j + 1][GAPCOL] = localPenalty(gapCoef, j, &resWeight, &hydWeight, &vWeight); profile[j+1][LENCOL] = lenCoef; } else { profile[j + 1][GAPCOL] = gapCoef; profile[j + 1][LENCOL] = lenCoef; } // increase gap penalty near to existing gaps. if (gapPos[j] < 0) { profile[j + 1][GAPCOL] = static_cast((profile[j + 1][GAPCOL] * (2.0 + 2.0 * (gdist + gapPos[j]) / gdist))); } } else { scale = ((float)(_numSeq - (*gaps)[j]) / (float)_numSeq) * reducedGap; profile[j + 1][GAPCOL] = static_cast(scale * gapCoef); profile[j + 1][LENCOL] = static_cast(0.5 * lenCoef); } // apply the gap penalty mask if (useStructPenalties != NONE) { val = (*gapPenaltyMask)[j] - '0'; if (val > 0 && val < 10) { profile[j + 1][GAPCOL] *= val; profile[j + 1][LENCOL] *= val; } } // make sure no penalty is zero - even for all-gap positions if (profile[j + 1][GAPCOL] <= 0) { profile[j + 1][GAPCOL] = 1; } if (profile[j + 1][LENCOL] <= 0) { profile[j + 1][LENCOL] = 1; } } // set the penalties at the beginning and end of the profile if(userParameters->getEndGapPenalties() == true) { profile[0][GAPCOL] = gapCoef; profile[0][LENCOL] = lenCoef; } else { profile[0][GAPCOL] = 0; profile[0][LENCOL] = 0; profile[prfLength][GAPCOL] = 0; profile[prfLength][LENCOL] = 0; } if (userParameters->getDebug() > 0) { cout << "Opening penalties:\n"; for(i = 0; i <= prfLength; i++) { cout <<" " << i << ":" << profile[i][GAPCOL]<< " "; } cout << "\n"; } if (userParameters->getDebug() > 0) { cout << "Extension penalties:\n"; for(i = 0; i <= prfLength; i++) { cout << i << ":" << profile[i][LENCOL] << " "; } cout << "\n"; } } /** ************************************************************************************** * Protected functions * *****************************************************************************************/ /** * * @param aln * @param weight */ void ProfileBase::calcVPenalties(SeqArray* aln, vector* weight) { int ix1, ix2; int i, j, t; int _maxAA = userParameters->getMaxAA(); int aminoCodeix1, aminoCodeix2; for (i = 0; i < prfLength; i++) { (*weight)[i] = 0; t = 0; for(j = i - vwindow;j < i + vwindow; j++) { if(j >= 0 && j < prfLength) { ix1 = (*aln)[firstSeq][j]; ix2 = (*aln)[firstSeq + 1][j]; if ((ix1 < 0) || (ix1 > _maxAA) || (ix2 < 0) || (ix2 > _maxAA)) { continue; } aminoCodeix1 = userParameters->getAminoAcidCode(ix1); aminoCodeix2 = userParameters->getAminoAcidCode(ix2); (*weight)[i] += vlut[aminoCodeix1 - 'A'][aminoCodeix2 - 'A']; t++; } } /* now we have a weight -t < w < t */ (*weight)[i] +=t; if(t > 0) (*weight)[i] = ((*weight)[i] * 100)/(2 * t); else (*weight)[i] = 100; /* now we have a weight vll < w < 100 */ if ((*weight)[i] < vll) (*weight)[i] = vll; } } /** * * @param aln * @param weight */ void ProfileBase::calcResidueSpecificPen(SeqArray* aln, vector* weight) { char ix; int j, k, _numSeq; int i; int _maxAA = userParameters->getMaxAA(); int _pascarellaNumRes = pascarellaRes.size(); _numSeq = lastSeq - firstSeq; for (i = 0; i < prfLength; i++) { (*weight)[i] = 0; for (k = firstSeq; k < lastSeq; k++) { for (j = 0; j < _pascarellaNumRes; j++) { ix = (*aln)[k][i]; if ((ix < 0) || (ix > _maxAA)) continue; if (userParameters->getAminoAcidCode(ix) == pascarellaRes[j]) { (*weight)[i] += (180 - pascarellaProb[j]); break; } } } (*weight)[i] /= _numSeq; } } /** * * @param aln * @param weight */ void ProfileBase::calcHydrophilicPen(SeqArray* aln, vector* weight) { int res; int numHydResidues, j, k; int i, e, s; vector hyd; float scale; int _maxAA = userParameters->getMaxAA(); hyd.resize(prfLength + 2); string _hydResidues(userParameters->getHydResidues()); numHydResidues = _hydResidues.size(); for (i = 0; i < prfLength; i++) { (*weight)[i] = 0; } for (k = firstSeq; k < lastSeq; k++) { for (i = 0; i < prfLength; i++) { hyd[i] = 0; for (j = 0; j < numHydResidues; j++) { res = (*aln)[k][i]; if ((res < 0) || (res > _maxAA)) continue; if (userParameters->getAminoAcidCode(res) == _hydResidues[j]) { hyd[i] = 1; break; } } } i = 0; while (i < prfLength) { if (hyd[i] == 0) i++; else { s = i; while ((hyd[i] != 0) && (i < prfLength)) { i++; } e = i; if (e - s > 3) { for (j = s; j < e; j++) { (*weight)[j] += 100; } } } } } scale = lastSeq - firstSeq; for (i = 0; i < prfLength; i++) { (*weight)[i] = static_cast(((*weight)[i] / scale)); // Mark change 17-5-07 } } /** * * @param penalty * @param n * @param resWeight * @param hydWeight * @param vWeight * @return */ int ProfileBase::localPenalty(int penalty, int n, vector* resWeight, vector* hydWeight, vector* vWeight) { bool h = false; float gw; if (userParameters->getDNAFlag()) { return(1); } gw = 1.0; if (nVarPen == false) { gw *= (*vWeight)[n] / 100.0; } if (nHydPen == false) { if ((*hydWeight)[n] > 0) { gw *= 0.5; h = true; } } if ((nPrefPen == false) && (h == false)) { gw *= ((*resWeight)[n] / 100.0); } gw *= penalty; return (int)gw; } /** ********************************************************************* * Note dont think this will work. Dont have -3. Need to use lengths! * ************************************************************************/ /** * * @param s1 * @param s2 * @return */ float ProfileBase::percentId(vector* s1, vector* s2) { int i; int count, total; float score; count = total = 0; for (i = 0; i < prfLength; i++) { if (((*s1)[i] >= 0) && ((*s1)[i] < userParameters->getMaxAA())) { total++; if ((*s1)[i] == (*s2)[i]) { count++; } } if ((*s1)[i]==(-3) || (*s2)[i]==(-3)) { break; // I dont have -3 at the end! } } if(total == 0) { score = 0; } else { score = 100.0 * (float)count / (float)total; } return (score); } } clustalx-2.1/clustalW/multipleAlign/ProfileAlignAlgorithm.h0000644000175000017500000000140511470725216024015 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef PROFILEALIGNALGORITHM_H #define PROFILEALIGNALGORITHM_H #include #include "../alignment/Alignment.h" namespace clustalw { class ProfileAlignAlgorithm { public: virtual ~ProfileAlignAlgorithm(){}; /* Functions */ virtual int profileAlign(Alignment* alnPtr, DistMatrix* distMat, vector* group, int* aligned) = 0; /* Attributes */ protected: /* Attributes */ int prfLength1; int prfLength2; SeqArray seqArray; vector alnWeight; int nseqs1; int nseqs2; private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/multipleAlign/MyersMillerProfileAlign.h0000644000175000017500000000374011470725216024337 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef MYERSMILLERPROFILEALIGN_H #define MYERSMILLERPROFILEALIGN_H #include #include "ProfileAlignAlgorithm.h" #include "ProfileStandard.h" #include "ProfileWithSub.h" namespace clustalw { class MyersMillerProfileAlign : public ProfileAlignAlgorithm { public: virtual ~MyersMillerProfileAlign(){}; /* Functions */ MyersMillerProfileAlign(); virtual int profileAlign(Alignment* alnPtr, DistMatrix* distMat, vector* group, int* aligned); /* Attributes */ private: /* Functions */ void addGGaps(Alignment* alnPtr, SeqArray* seqArray); void addGGapsMask(vector* mask,int len, vector* path1, vector* path2); int prfScore(int n, int m); int progTracepath(); void progDel(int k); void progAdd(int k); void progAlign(); int progDiff(int A, int B, int M, int N, int go1, int go2); int openPenalty1(int i, int j); int extPenalty1(int i, int j); int gapPenalty1(int i, int j, int k); int openPenalty2(int i, int j); int extPenalty2(int i, int j); int gapPenalty2(int i, int j, int k); /* Attributes */ ProfileWithSub* profileWithSub; ProfileStandard* profileStandard; int gapcoef1; int gapcoef2; int lencoef1; int lencoef2; vector displ; vector gS; vector HH; vector DD; vector RR; vector SS; vector alnPath1; vector alnPath2; int printPtr; int lastPrint; int matrix[32][32]; vector gaps; bool switchProfiles; const SeqArray* profile1; const SeqArray* profile2; int _gapPos1, _gapPos2; int alignmentLength; }; } #endif clustalx-2.1/clustalW/multipleAlign/MyersMillerProfileAlign.cpp0000644000175000017500000007363011470725216024677 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. * Changes: Mark 20-6-07, I added a call to calculateMaxlengths */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "MyersMillerProfileAlign.h" #include "Iteration.h" #include namespace clustalw { /** * * @return */ MyersMillerProfileAlign::MyersMillerProfileAlign() : _gapPos1(userParameters->getGapPos1()), _gapPos2(userParameters->getGapPos2()) { } /** * * @param alnPtr * @param distMat * @param group * @param aligned * @return */ int MyersMillerProfileAlign::profileAlign(Alignment* alnPtr, DistMatrix* distMat, vector* group, int* aligned) { bool negative = false; int i = 0, j = 0, count = 0; int numSeq = 0; int seqNum = 0; int len = 0, len1 = 0, len2 = 0, is = 0, minLen = 0; int se1 = 0, se2 = 0, sb1 = 0, sb2 = 0; int maxRes = 0; int c = 0; int score = 0; double logmin = 0.0, logdiff = 0.0; double pcid = 0.0; int matrix[NUMRES][NUMRES]; int numSeqsProf1 = 0, numSeqsProf2 = 0; // Initialise the matrix. To get rid of a valgrind error. for(int i = 0; i < NUMRES; i++) { for(int j = 0; j < NUMRES; j++) { matrix[i][j] = 0; } } numSeq = alnPtr->getNumSeqs(); seqArray.resize(numSeq); alnWeight.resize(numSeq); for (i = 0; i < numSeq; i++) { if (aligned[i + 1] == 0) { (*group)[i + 1] = 0; } } nseqs1 = nseqs2 = 0; for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 1) { nseqs1++; } else if ((*group)[i + 1] == 2) { nseqs2++; } } if ((nseqs1 == 0) || (nseqs2 == 0)) { return (0); } numSeqsProf1 = nseqs1; numSeqsProf2 = nseqs2; if (nseqs2 > nseqs1) { switchProfiles = true; for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 1) { (*group)[i + 1] = 2; } else if ((*group)[i + 1] == 2) { (*group)[i + 1] = 1; } } } else { switchProfiles = false; } // calculate the mean of the sequence pc identities between the two groups count = 0; pcid = 0.0; negative = userParameters->getUseNegMatrix(); for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 1) { for (j = 0; j < numSeq; j++) { if ((*group)[j + 1] == 2) { count++; pcid += (*distMat)(i + 1, j + 1); } } } } pcid = pcid / (float)count; // Make the first profile. prfLength1 = 0; for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 1) { if (alnPtr->getSeqLength(i + 1) > prfLength1) { prfLength1 = alnPtr->getSeqLength(i + 1); } } } nseqs1 = 0; for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 1) { len = alnPtr->getSeqLength(i + 1); // initialise with the other vector! seqArray[nseqs1] = vector(alnPtr->getSequence(i + 1)->begin() + 1, alnPtr->getSequence(i + 1)->end()); seqArray[nseqs1].resize(prfLength1 + extraEndElemNum); for (j = len; j < prfLength1; j++) { seqArray[nseqs1][j] = userParameters->getGapPos1(); } alnWeight[nseqs1] = alnPtr->getSeqWeight(i); nseqs1++; } } // Make the second profile. prfLength2 = 0; for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 2) { if (alnPtr->getSeqLength(i + 1) > prfLength2) { prfLength2 = alnPtr->getSeqLength(i + 1); } } } nseqs2 = 0; for (i = 0; i < numSeq; i++) { if ((*group)[i + 1] == 2) { len = alnPtr->getSeqLength(i + 1); seqArray[nseqs1 + nseqs2] = vector(alnPtr->getSequence(i + 1)->begin() + 1, alnPtr->getSequence(i + 1)->end()); seqArray[nseqs1 + nseqs2].resize(prfLength2 + extraEndElemNum); for (j = len; j < prfLength2; j++) { seqArray[nseqs1 + nseqs2][j] = userParameters->getGapPos1(); } seqArray[nseqs1 + nseqs2][j] = ENDALN; alnWeight[nseqs1 + nseqs2] = alnPtr->getSeqWeight(i); //cout << "weight " << nseqs1 + nseqs2 << alnWeight[nseqs1 + nseqs2] << "\n"; nseqs2++; } } // Change the Max alignment length in the Alignment Object! alnPtr->setMaxAlnLength(prfLength1 + prfLength2 + 2); // calculate real length of profiles - removing gaps! len1 = 0; for (i = 0; i < nseqs1; i++) { is = 0; for (j = 0; j < utilityObject->MIN(static_cast(seqArray[i].size()), prfLength1); j++) { c = seqArray[i][j]; if ((c != _gapPos1) && (c != _gapPos2)) { is++; } } len1 += is; } len1 = static_cast(len1 / (float)nseqs1); len2 = 0; for (i = nseqs1; i < nseqs2 + nseqs1; i++) { is = 0; for (j = 0; j < utilityObject->MIN(static_cast(seqArray[i].size()), prfLength2); j++) { c = seqArray[i][j]; if ((c != _gapPos1) && (c != _gapPos2)) { is++; } } len2 += is; } len2 = static_cast(len2 / (float)nseqs2); PrfScaleValues scaleVals; scaleVals.scale = 1.0; scaleVals.intScale = 100.0; int matAvgScore = 0; minLen = utilityObject->MIN(len1, len2); maxRes = 0; // Get the substitution matrix that will be stored in 'matrix' maxRes = subMatrix->getProfileAlignMatrix(matrix, pcid, minLen, scaleVals, matAvgScore); if (maxRes == 0 || maxRes == -1) { return -1; } if (userParameters->getDNAFlag()) { gapcoef1 = gapcoef2 = static_cast(100.0 * userParameters->getGapOpen() * scaleVals.scale); lencoef1 = lencoef2 = static_cast(100.0 * userParameters->getGapExtend() * scaleVals.scale); } else { if (len1 == 0 || len2 == 0) { logmin = 1.0; logdiff = 1.0; } else { logmin = 1.0 / log10((double)minLen); if (len2 < len1) { logdiff = 1.0 + 0.5 * log10((double)((float)len2 / (float)len1)); } else if (len1 < len2) { logdiff = 1.0 + 0.5 * log10((double)((float)len1 / (float)len2)); } else { logdiff = 1.0; } if (logdiff < 0.9) { logdiff = 0.9; } } if (negative) { gapcoef1 = gapcoef2 = static_cast(100.0 *(float)(userParameters->getGapOpen())); lencoef1 = lencoef2 = static_cast(100.0 * userParameters->getGapExtend()); } else { if (matAvgScore <= 0) { gapcoef1 = gapcoef2 = static_cast(100.0 *(float)(userParameters->getGapOpen() + logmin)); } else { gapcoef1 = gapcoef2 = static_cast(scaleVals.scale * matAvgScore * (float)(userParameters->getGapOpen() / (logdiff * logmin))); } lencoef1 = lencoef2 = static_cast(100.0 * userParameters->getGapExtend()); } } // We need one profile with substitution matrix information and one without! // But this will change when we have the LE scoring function. profileWithSub = new ProfileWithSub(prfLength1, 0, nseqs1); profileStandard = new ProfileStandard(prfLength2, nseqs1, nseqs1 + nseqs2); // Calculate the profile array with Substitution matrix info // calculate the Gap Coefficients. gaps.resize(alnPtr->getMaxAlnLength() + 1); bool profile1Pen; if (switchProfiles == false) { profile1Pen = userParameters->getStructPenalties1() && userParameters->getUseSS1(); profileWithSub->calcGapCoeff(&seqArray, &gaps, profile1Pen, alnPtr->getGapPenaltyMask1(), gapcoef1, lencoef1); } else { profile1Pen = userParameters->getStructPenalties2() && userParameters->getUseSS2(); profileWithSub->calcGapCoeff(&seqArray, &gaps, profile1Pen, alnPtr->getGapPenaltyMask2(), gapcoef1, lencoef1); } // calculate the profile matrix. profileWithSub->calcProfileWithSub(&seqArray, &gaps, matrix, &alnWeight); profile1 = profileWithSub->getProfilePtr(); if (userParameters->getDebug() > 4) { string aminoAcidCodes = userParameters->getAminoAcidCodes(); for (j = 0; j <= userParameters->getMaxAA(); j++) { cout << aminoAcidCodes[j] << " "; } cout << "\n"; for (i = 0; i < prfLength1; i++) { for (j = 0; j <= userParameters->getMaxAA(); j++) { cout << (*profile1)[i + 1][j] << " "; } cout << (*profile1)[i + 1][_gapPos1]<< " "; cout << (*profile1)[i + 1][_gapPos2]<< " "; cout << (*profile1)[i + 1][GAPCOL] << " " << (*profile1)[i + 1][LENCOL] << "\n"; } } // Calculate the standard profile array // calculate the Gap Coefficients. bool profile2Pen; if (switchProfiles == false) { profile2Pen = userParameters->getStructPenalties2() && userParameters->getUseSS2(); profileStandard->calcGapCoeff(&seqArray, &gaps, profile2Pen, alnPtr->getGapPenaltyMask2(), gapcoef2, lencoef2); } else { profile2Pen = userParameters->getStructPenalties1() && userParameters->getUseSS1(); profileStandard->calcGapCoeff(&seqArray, &gaps, profile2Pen, alnPtr->getGapPenaltyMask1(), gapcoef2, lencoef2); } // calculate the profile matrix. profileStandard->calcStandardProfile(&seqArray, &alnWeight); profile2 = profileStandard->getProfilePtr(); if (userParameters->getDebug() > 4) { string aminoAcidCodes = userParameters->getAminoAcidCodes(); for (j = 0; j <= userParameters->getMaxAA(); j++) { cout << aminoAcidCodes[j] << " "; } cout << "\n"; for (i = 0; i < prfLength2; i++) { for (j = 0; j <= userParameters->getMaxAA(); j++) { cout << (*profile2)[i + 1][j] << " "; } cout << (*profile2)[i + 1][_gapPos1]<< " "; cout << (*profile2)[i + 1][_gapPos2]<< " "; cout << (*profile2)[i + 1][GAPCOL] << " " << (*profile2)[i + 1][LENCOL] << "\n"; } } alnWeight.clear(); int _maxAlnLength = alnPtr->getMaxAlnLength(); alnPath1.resize(_maxAlnLength + 1); alnPath2.resize(_maxAlnLength + 1); //align the profiles // use Myers and Miller to align two sequences lastPrint = 0; printPtr = 1; sb1 = sb2 = 0; se1 = prfLength1; se2 = prfLength2; HH.resize(_maxAlnLength + 1); DD.resize(_maxAlnLength + 1); RR.resize(_maxAlnLength + 1); SS.resize(_maxAlnLength + 1); gS.resize(_maxAlnLength + 1); displ.resize(_maxAlnLength + 1); score = progDiff(sb1, sb2, se1 - sb1, se2 - sb2, (*profile1)[0][GAPCOL], (*profile1)[prfLength1][GAPCOL]); // May not need if we recreate the Profile every time! HH.clear(); DD.clear(); RR.clear(); SS.clear(); gS.clear(); alignmentLength = progTracepath(); displ.clear(); addGGaps(alnPtr, &seqArray); profileStandard->resetProfile(); profileWithSub->resetProfile(); prfLength1 = alignmentLength; alnPath1.clear(); alnPath2.clear(); if(userParameters->getDoRemoveFirstIteration() == TREE) { Iteration iterateObj; iterateObj.iterationOnTreeNode(numSeqsProf1, numSeqsProf2, prfLength1, prfLength2, &seqArray); } // Now we resize the SeqArray that holds the sequences in the alignment class // and also update it with the new aligned sequences SeqArray* newSequences = alnPtr->getSeqArrayForRealloc(); seqNum = 0; for (j = 0; j < numSeq; j++) { if ((*group)[j + 1] == 1) { (*newSequences)[j + 1].clear(); (*newSequences)[j + 1].resize(prfLength1 + 1); for (i = 0; i < prfLength1; i++) { (*newSequences)[j + 1][i + 1] = seqArray[seqNum][i]; } seqNum++; } } for (j = 0; j < numSeq; j++) { if ((*group)[j + 1] == 2) { (*newSequences)[j + 1].clear(); (*newSequences)[j + 1].resize(prfLength1 + 1); for (i = 0; i < prfLength1; i++) { (*newSequences)[j + 1][i + 1] = seqArray[seqNum][i]; } seqNum++; } } alnPtr->calculateMaxLengths(); // Mark change 20-6-07 gaps.clear(); seqArray.clear(); delete profileWithSub; delete profileStandard; int retScore = (score / 100); return retScore; } /** **************************************************************************************** * Private functions * *******************************************************************************************/ /** * * @param A * @param B * @param M * @param N * @param go1 * @param go2 * @return */ int MyersMillerProfileAlign::progDiff(int A, int B, int M, int N, int go1, int go2) { int midi, midj, type; int midh; static int t, tl, g, h; { static int i, j; static int hh, f, e, s; /* Boundary cases: M <= 1 or N == 0 */ if (userParameters->getDebug() > 2) { cout << "A " << A << " B " << B << " M " << M << " N " << N << " midi " << M / 2 << " go1 " << go1 << " go2 " << go2<< "\n"; } /* if sequence B is empty.... */ if (N <= 0) { /* if sequence A is not empty.... */ if (M > 0) { /* delete residues A[1] to A[M] */ progDel(M); } return ( - gapPenalty1(A, B, M)); } /* if sequence A is empty.... */ if (M <= 1) { if (M <= 0) { /* insert residues B[1] to B[N] */ progAdd(N); return ( - gapPenalty2(A, B, N)); } /* if sequence A has just one residue.... */ if (go1 == 0) { midh = - gapPenalty1(A + 1, B + 1, N); } else { midh = - gapPenalty2(A + 1, B, 1) - gapPenalty1(A + 1, B + 1, N); } midj = 0; for (j = 1; j <= N; j++) { hh = - gapPenalty1(A, B + 1, j - 1) + prfScore(A + 1, B + j) - gapPenalty1(A + 1, B + j + 1, N - j); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { progAdd(N); progDel(1); } else { if (midj > 1) { progAdd(midj - 1); } progAlign(); if (midj < N) { progAdd(N - midj); } } return midh; } /* Divide sequence A in half: midi */ midi = M / 2; /* In a forward phase, calculate all HH[j] and HH[j] */ HH[0] = 0; t = - openPenalty1(A, B + 1); tl = - extPenalty1(A, B + 1); for (j = 1; j <= N; j++) { HH[j] = t = t + tl; DD[j] = t - openPenalty2(A + 1, B + j); } if (go1 == 0) { t = 0; } else { t = - openPenalty2(A + 1, B); } tl = - extPenalty2(A + 1, B); for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t + tl; f = t - openPenalty1(A + i, B + 1); for (j = 1; j <= N; j++) { g = openPenalty1(A + i, B + j); h = extPenalty1(A + i, B + j); if ((hh = hh - g - h) > (f = f - h)) { f = hh; } g = openPenalty2(A + i, B + j); h = extPenalty2(A + i, B + j); if ((hh = HH[j] - g - h) > (e = DD[j] - h)) { e = hh; } hh = s + prfScore(A + i, B + j); if (f > hh) { hh = f; } if (e > hh) { hh = e; } s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; /* In a reverse phase, calculate all RR[j] and SS[j] */ RR[N] = 0; tl = 0; for (j = N - 1; j >= 0; j--) { g = - openPenalty1(A + M, B + j + 1); tl -= extPenalty1(A + M, B + j + 1); RR[j] = g + tl; SS[j] = RR[j] - openPenalty2(A + M, B + j); gS[j] = openPenalty2(A + M, B + j); } tl = 0; for (i = M - 1; i >= midi; i--) { s = RR[N]; if (go2 == 0) { g = 0; } else { g = - openPenalty2(A + i + 1, B + N); } tl -= extPenalty2(A + i + 1, B + N); RR[N] = hh = g + tl; t = openPenalty1(A + i, B + N); f = RR[N] - t; for (j = N - 1; j >= 0; j--) { g = openPenalty1(A + i, B + j + 1); h = extPenalty1(A + i, B + j + 1); if ((hh = hh - g - h) > (f = f - h - g + t)) { f = hh; } t = g; g = openPenalty2(A + i + 1, B + j); h = extPenalty2(A + i + 1, B + j); hh = RR[j] - g - h; if (i == (M - 1)) { e = SS[j] - h; } else { e = SS[j] - h - g + openPenalty2(A + i + 2, B + j); gS[j] = g; } if (hh > e) { e = hh; } hh = s + prfScore(A + i + 1, B + j + 1); if (f > hh) { hh = f; } if (e > hh) { hh = e; } s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N] = RR[N]; gS[N] = openPenalty2(A + midi + 1, B + N); /* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */ midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + gS[j]; if (hh > midh) { midh = hh; midj = j; type = 2; } } } /* Conquer recursively around midpoint */ if (type == 1) { /* Type 1 gaps */ if (userParameters->getDebug() > 2) { cout << "Type 1,1: midj " << midj << "\n"; } progDiff(A, B, midi, midj, go1, 1); if (userParameters->getDebug() > 2) { cout << "Type 1,2: midj " << midj << "\n"; } progDiff(A + midi, B + midj, M - midi, N - midj, 1, go2); } else { if (userParameters->getDebug() > 2) { cout << "Type 2,1: midj " << midj << "\n"; } progDiff(A, B, midi - 1, midj, go1, 0); progDel(2); if (userParameters->getDebug() > 2) { cout << "Type 2,2: midj " << midj << "\n"; } progDiff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, go2); } return midh; /* Return the score of the best alignment */ } /** * * @param alnPtr * @param seqArray */ void MyersMillerProfileAlign::addGGaps(Alignment* alnPtr, SeqArray* seqArray) { int j; int i, ix; int len; vector ta; ta.resize(alignmentLength + 1); for (j = 0; j < nseqs1; j++) { ix = 0; for (i = 0; i < alignmentLength; i++) { if (alnPath1[i] == 2) { if (ix < ((int)(*seqArray)[j].size() - extraEndElemNum)) { ta[i] = (*seqArray)[j][ix]; } else { ta[i] = ENDALN; } ix++; } else if (alnPath1[i] == 1) { /* insertion in first alignment... */ ta[i] = _gapPos1; } else { cerr << "Error in aln_path\n"; } } ta[i] = ENDALN; len = alignmentLength; (*seqArray)[j].resize(len + 2); for (i = 0; i < len; i++) { (*seqArray)[j][i] = ta[i]; } (*seqArray)[j][len] = ENDALN; } for (j = nseqs1; j < nseqs1 + nseqs2; j++) { ix = 0; for (i = 0; i < alignmentLength; i++) { if (alnPath2[i] == 2) { if (ix < ((int)(*seqArray)[j].size() - extraEndElemNum)) { ta[i] = (*seqArray)[j][ix]; } else { ta[i] = ENDALN; } ix++; } else if (alnPath2[i] == 1) { /* insertion in second alignment... */ ta[i] = _gapPos1; } else { cerr << "Error in alnPath\n"; } } ta[i] = ENDALN; len = alignmentLength; (*seqArray)[j].resize(len + 2); for (i = 0; i < len; i++) { (*seqArray)[j][i] = ta[i]; } (*seqArray)[j][len] = ENDALN; } if (userParameters->getStructPenalties1() != NONE) { addGGapsMask(alnPtr->getGapPenaltyMask1(), alignmentLength, &alnPath1, &alnPath2); } if (userParameters->getStructPenalties1() == SECST) { addGGapsMask(alnPtr->getSecStructMask1(), alignmentLength, &alnPath1, &alnPath2); } if (userParameters->getStructPenalties2() != NONE) { addGGapsMask(alnPtr->getGapPenaltyMask2(), alignmentLength, &alnPath2, &alnPath1); } if (userParameters->getStructPenalties2() == SECST) { addGGapsMask(alnPtr->getSecStructMask2(), alignmentLength, &alnPath2, &alnPath1); } } /** * * @param mask * @param len * @param path1 * @param path2 */ void MyersMillerProfileAlign::addGGapsMask(vector* mask, int len, vector* path1, vector* path2) { int i, ix; char *ta; ta = new char[len + 1]; ix = 0; if (switchProfiles == false) { for (i = 0; i < len; i++) { if ((*path1)[i] == 2) { ta[i] = (*mask)[ix]; ix++; } else if ((*path1)[i] == 1) { ta[i] = _gapPos1; } } } else { for (i = 0; i < len; i++) { if ((*path2)[i] == 2) { ta[i] = (*mask)[ix]; ix++; } else if ((*path2)[i] == 1) { ta[i] = _gapPos1; } } } mask->resize(len + 2); for (i = 0; i < len; i++) { (*mask)[i] = ta[i]; } delete [] ta; ta = NULL; } /** * * @param n * @param m * @return */ inline int MyersMillerProfileAlign::prfScore(int n, int m) { int ix; int score; score = 0; int _maxAA = userParameters->getMaxAA(); // NOTE Change here! for (ix = 0; ix <= _maxAA; ix++) { score += ((*profile1)[n][ix] * (*profile2)[m][ix]); } score += ((*profile1)[n][_gapPos1] * (*profile2)[m][_gapPos1]); score += ((*profile1)[n][_gapPos2] * (*profile2)[m][_gapPos2]); return (score / 10); } /** * * @return */ int MyersMillerProfileAlign::progTracepath() { int i, j, k, pos, toDo; int alignLen; pos = 0; toDo = printPtr - 1; for (i = 1; i <= toDo; ++i) { if (userParameters->getDebug() > 1) { cout << displ[i] << " "; } if (displ[i] == 0) { alnPath1[pos] = 2; alnPath2[pos] = 2; ++pos; } else { if ((k = displ[i]) > 0) { for (j = 0; j <= k - 1; ++j) { alnPath2[pos + j] = 2; alnPath1[pos + j] = 1; } pos += k; } else { k = (displ[i] < 0) ? displ[i] * - 1: displ[i]; for (j = 0; j <= k - 1; ++j) { alnPath1[pos + j] = 2; alnPath2[pos + j] = 1; } pos += k; } } } if (userParameters->getDebug() > 1) { cout << "\n"; } alignLen = pos; return alignLen; } /** * * @param k */ void MyersMillerProfileAlign::progDel(int k) { if (lastPrint < 0) { lastPrint = displ[printPtr - 1] -= k; } else { lastPrint = displ[printPtr++] = - (k); } } /** * * @param k */ void MyersMillerProfileAlign::progAdd(int k) { if (lastPrint < 0) { displ[printPtr - 1] = k; displ[printPtr++] = lastPrint; } else { lastPrint = displ[printPtr++] = k; } } /** * */ void MyersMillerProfileAlign::progAlign() { displ[printPtr++] = lastPrint = 0; } /** * * @param i * @param j * @return */ inline int MyersMillerProfileAlign::openPenalty1(int i, int j) // NOTE Change here! { int g; if (!userParameters->getEndGapPenalties() && (i == 0 || i == prfLength1)) { return (0); } g = (*profile2)[j][GAPCOL] + (*profile1)[i][GAPCOL]; return (g); } /** * * @param i * @param j * @return */ inline int MyersMillerProfileAlign::extPenalty1(int i, int j) // NOTE Change here! { int h; if (!userParameters->getEndGapPenalties() && (i == 0 || i == prfLength1)) { return (0); } h = (*profile2)[j][LENCOL]; return (h); } /** * * @param i * @param j * @param k * @return */ int MyersMillerProfileAlign::gapPenalty1(int i, int j, int k) { int ix; int gp; int g, h = 0; if (k <= 0) { return (0); } if (!userParameters->getEndGapPenalties() && (i == 0 || i == prfLength1)) { return (0); } g = (*profile2)[j][GAPCOL] + (*profile1)[i][GAPCOL]; for (ix = 0; ix < k && ix + j < prfLength2; ix++) { h += (*profile2)[ix + j][LENCOL]; } gp = g + h; return (gp); } /** * * @param i * @param j * @return */ inline int MyersMillerProfileAlign::openPenalty2(int i, int j) // NOTE Change here! { int g; if (!userParameters->getEndGapPenalties() && (j == 0 || j == prfLength2)) { return (0); } g = (*profile1)[i][GAPCOL] + (*profile2)[j][GAPCOL]; return (g); } /** * * @param i * @param j * @return */ inline int MyersMillerProfileAlign::extPenalty2(int i, int j) // NOTE Change here! { int h; if (!userParameters->getEndGapPenalties() && (j == 0 || j == prfLength2)) { return (0); } h = (*profile1)[i][LENCOL]; return (h); } /** * * @param i * @param j * @param k * @return */ int MyersMillerProfileAlign::gapPenalty2(int i, int j, int k) { int ix; int gp; int g, h = 0; if (k <= 0) { return (0); } if (!userParameters->getEndGapPenalties() && (j == 0 || j == prfLength2)) { return (0); } g = (*profile1)[i][GAPCOL] + (*profile2)[j][GAPCOL]; for (ix = 0; ix < k && ix + i < prfLength1; ix++) { h += (*profile1)[ix + i][LENCOL]; } gp = g + h; return (gp); } } clustalx-2.1/clustalW/multipleAlign/MSA.h0000644000175000017500000000204711470725216020216 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef MSA_H #define MSA_H #include #include "ProfileBase.h" #include "../alignment/Alignment.h" //#include "../calcAlignSteps/Tree.h" #include "../tree/AlignmentSteps.h" #include "ProfileAlignAlgorithm.h" namespace clustalw { //using tree::AlignmentSteps; class MSA { public: /* Functions */ int multiSeqAlign(Alignment* alnPtr, DistMatrix* distMat, vector* seqWeight, AlignmentSteps* progSteps, int iStart); int seqsAlignToProfile(Alignment* alnPtr, DistMatrix* distMat, vector* seqWeight, int iStart, string phylipName); int calcPairwiseForProfileAlign(Alignment* alnPtr, DistMatrix* distMat); int doProfileAlign(Alignment* alnPtr, DistMatrix* distMat, vector* prof1Weight, vector* prof2Weight); /* Attributes */ private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/multipleAlign/MSA.cpp0000644000175000017500000004474511470725216020564 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * Changes: * * Mark: 23-01-2007: There was a problem with running an alignment with only * one sequence in it. I needed to make a change to the multiSeqAlign function. ****************************************************************************/ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "MSA.h" #include "MyersMillerProfileAlign.h" //#include "../phylogeneticTree/ClusterTree.h" #include "../general/debuglogObject.h" namespace clustalw { /** * * @param alnPtr * @param distMat * @param iStart * @param phylipName * @return */ int MSA::multiSeqAlign(Alignment* alnPtr, DistMatrix* distMat, vector* seqWeight, AlignmentSteps* progSteps, int iStart) { if(!progSteps) { return 0; } int* aligned; vector group; int ix; int* maxid; int max = 0, sum = 0; vector treeWeight; int i = 0, j = 0, set = 0, iseq = 0; int entries = 0; int score = 0; int _numSteps = 0; utilityObject->info("Start of Multiple Alignment\n"); int _numSeqs = alnPtr->getNumSeqs(); vector newOutputIndex(_numSeqs); alnPtr->addSeqWeight(seqWeight); ProfileAlignAlgorithm* alignAlgorithm = new MyersMillerProfileAlign; _numSteps = progSteps->getNumSteps(); // for each sequence, find the most closely related sequence maxid = new int[_numSeqs + 1]; for (i = 1; i <= _numSeqs; i++) { maxid[i] = -1; for (j = 1; j <= _numSeqs; j++) if (j != i && maxid[i] < (*distMat)(i, j)) { maxid[i] = static_cast((*distMat)(i, j)); } } // group the sequences according to their relative divergence if (iStart == 0) { // start the multiple alignments......... utilityObject->info("Aligning..."); // first pass, align closely related sequences first.... ix = 0; aligned = new int[_numSeqs + 1]; for (i = 0; i <= _numSeqs; i++) { aligned[i] = 0; } const vector >* ptrToSets = progSteps->getSteps(); for (set = 1; set <= _numSteps; ++set) { entries = 0; for (i = 1; i <= _numSeqs; i++) { if (((*ptrToSets)[set][i] != 0) && (maxid[i] > userParameters->getDivergenceCutoff())) { entries++; if (aligned[i] == 0) { if (userParameters->getOutputOrder() == INPUT) { ++ix; newOutputIndex[i - 1] = i; } else { if(ix >= (int)newOutputIndex.size()) { cerr << "ERROR: size = " << newOutputIndex.size() << "ix = " << ix << "\n"; exit(1); } else { newOutputIndex[ix] = i; ++ix; } } aligned[i] = 1; } } } if (entries > 0) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Doing profile align"); } #endif score = alignAlgorithm->profileAlign(alnPtr, distMat, progSteps->getStep(set), aligned); } else { score = 0; } // negative score means fatal error... exit now! if (score < 0) { return (-1); } if(userParameters->getDisplayInfo()) { if ((entries > 0) && (score > 0)) { utilityObject->info("Group %d: Sequences:%4d Score:%d", set, entries, score); } else { utilityObject->info("Group %d: Delayed", set); } } } } else { aligned = new int[_numSeqs + 1]; ix = 0; for (i = 1; i <= iStart + 1; i++) { aligned[i] = 1; ++ix; newOutputIndex[i - 1] = i; } for (i = iStart + 2; i <= _numSeqs; i++) { aligned[i] = 0; } } // second pass - align remaining, more divergent sequences..... // if not all sequences were aligned, for each unaligned sequence, // find it's closest pair amongst the aligned sequences. group.resize(_numSeqs + 1); treeWeight.resize(_numSeqs); for (i = 0; i < _numSeqs; i++) { treeWeight[i] = (*seqWeight)[i]; } // if we haven't aligned any sequences, in the first pass - align the // two most closely related sequences now if (ix == 0) { max = -1; iseq = 0; for (i = 1; i <= _numSeqs; i++) { for (j = i + 1; j <= _numSeqs; j++) { if (max < (*distMat)(i, j)) { max = static_cast((*distMat)(i, j)); // Mark change 17-5-07 iseq = i; } } } aligned[iseq] = 1; if (userParameters->getOutputOrder() == INPUT) { ++ix; newOutputIndex[iseq - 1] = iseq; } else { newOutputIndex[ix] = iseq; ++ix; } } while (ix < _numSeqs) { for (i = 1; i <= _numSeqs; i++) { if (aligned[i] == 0) { maxid[i] = - 1; for (j = 1; j <= _numSeqs; j++) if ((maxid[i] < (*distMat)(i, j)) && (aligned[j] != 0)) { maxid[i] = static_cast((*distMat)(i, j));// Mark change 17-5-07 } } } // find the most closely related sequence to those already aligned max = - 1; iseq = 0; for (i = 1; i <= _numSeqs; i++) { if ((aligned[i] == 0) && (maxid[i] > max)) { max = maxid[i]; iseq = i; } } // align this sequence to the existing alignment // weight sequences with percent identity with profile // OR...., multiply sequence weights from tree by percent identity with new sequence if (userParameters->getNoWeights() == false) { for (j = 0; j < _numSeqs; j++) if (aligned[j + 1] != 0) { (*seqWeight)[j] = static_cast(treeWeight[j] * (*distMat)(j + 1, iseq)); } // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR sum = 0; for (j = 0; j < _numSeqs; j++) { if (aligned[j + 1] != 0) { sum += (*seqWeight)[j]; } } if (sum == 0) { for (j = 0; j < _numSeqs; j++) { (*seqWeight)[j] = 1; } sum = j; } for (j = 0; j < _numSeqs; j++) { if (aligned[j + 1] != 0) { (*seqWeight)[j] = ((*seqWeight)[j] * INT_SCALE_FACTOR) / sum; if ((*seqWeight)[j] < 1) { (*seqWeight)[j] = 1; } } } } entries = 0; for (j = 1; j <= _numSeqs; j++) { if (aligned[j] != 0) { group[j] = 1; entries++; } else if (iseq == j) { group[j] = 2; entries++; } } alnPtr->addSeqWeight(seqWeight); aligned[iseq] = 1; score = alignAlgorithm->profileAlign(alnPtr, distMat, &group, aligned); if (userParameters->getOutputOrder() == INPUT) { ++ix; newOutputIndex[iseq - 1] = iseq; } else { newOutputIndex[ix] = iseq; ++ix; } } alnPtr->addOutputIndex(&newOutputIndex); if(userParameters->getDisplayInfo()) { int alignmentScore = alnPtr->alignScore(); // ?? check, FS, 2009-05-18 } delete alignAlgorithm; delete [] aligned; delete [] maxid; return (_numSeqs); } /** * * @param alnPtr * @param distMat * @param iStart * @param phylipName * @return */ int MSA::seqsAlignToProfile(Alignment* alnPtr, DistMatrix* distMat, vector* seqWeight, int iStart, string phylipName) { int *aligned; vector treeWeight; vector group; int ix; int *maxid; int max = 0; int i = 0, j = 0, iseq = 0; int sum = 0, entries = 0; int score = 0; int _numSeqs = alnPtr->getNumSeqs(); utilityObject->info("Start of Multiple Alignment\n"); ProfileAlignAlgorithm* alignAlgorithm = new MyersMillerProfileAlign; // calculate sequence weights according to branch lengths of the tree - // weights in global variable seq_weight normalised to sum to 100 vector newOutputIndex(_numSeqs); //groupTree.calcSeqWeights(0, _numSeqs, &seqWeight); treeWeight.resize(_numSeqs); for (i = 0; i < _numSeqs; i++) { treeWeight[i] = (*seqWeight)[i]; } // for each sequence, find the most closely related sequence maxid = new int[_numSeqs + 1]; for (i = 1; i <= _numSeqs; i++) { maxid[i] = - 1; for (j = 1; j <= _numSeqs; j++) { if (maxid[i] < (*distMat)(i, j)) { maxid[i] = static_cast((*distMat)(i, j)); // Mark change 17-5-07 } } } aligned = new int[_numSeqs + 1]; ix = 0; for (i = 1; i <= iStart + 1; i++) { aligned[i] = 1; ++ix; newOutputIndex[i - 1] = i; } for (i = iStart + 2; i <= _numSeqs; i++) { aligned[i] = 0; } // for each unaligned sequence, find it's closest pair amongst the // aligned sequences. group.resize(_numSeqs + 1); while (ix < _numSeqs) { if (ix > 0) { for (i = 1; i <= _numSeqs; i++) { if (aligned[i] == 0) { maxid[i] = - 1; for (j = 1; j <= _numSeqs; j++) { if ((maxid[i] < (*distMat)(i, j)) && (aligned[j] != 0)) { maxid[i] = static_cast((*distMat)(i, j)); } } } } } // find the most closely related sequence to those already aligned max = -1; for (i = 1; i <= _numSeqs; i++) { if ((aligned[i] == 0) && (maxid[i] > max)) { max = maxid[i]; iseq = i; } } // align this sequence to the existing alignment entries = 0; for (j = 1; j <= _numSeqs; j++) { if (aligned[j] != 0) { group[j] = 1; entries++; } else if (iseq == j) { group[j] = 2; entries++; } } aligned[iseq] = 1; // multiply sequence weights from tree by percent // identity with new sequence for (j = 0; j < _numSeqs; j++) { (*seqWeight)[j] = static_cast(treeWeight[j] * (*distMat)(j + 1, iseq)); } // // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR // sum = 0; for (j = 0; j < _numSeqs; j++) { if (group[j + 1] == 1) { sum += (*seqWeight)[j]; } } if (sum == 0) { for (j = 0; j < _numSeqs; j++) { (*seqWeight)[j] = 1; } sum = j; } for (j = 0; j < _numSeqs; j++) { (*seqWeight)[j] = ((*seqWeight)[j] * INT_SCALE_FACTOR) / sum; if ((*seqWeight)[j] < 1) { (*seqWeight)[j] = 1; } } // Add the seqWeights to the Alignment object!!!! alnPtr->addSeqWeight(seqWeight); score = alignAlgorithm->profileAlign(alnPtr, distMat, &group, aligned); utilityObject->info("Sequence:%d Score:%d", iseq, score); if (userParameters->getOutputOrder() == INPUT) { ++ix; newOutputIndex[iseq - 1] = iseq; } else { newOutputIndex[ix] = iseq; ++ix; } } delete [] aligned; delete [] maxid; delete alignAlgorithm; alnPtr->addOutputIndex(&newOutputIndex); if(userParameters->getDisplayInfo()) { alnPtr->alignScore(); } return (_numSeqs); } /** * * @param alnPtr * @param distMat * @return */ int MSA::calcPairwiseForProfileAlign(Alignment* alnPtr, DistMatrix* distMat) { //Tree groupTree; int i, j, temp; int entries; int* aligned; vector group; vector seqWeight; float dscore; int score; int _numSeqs = alnPtr->getNumSeqs(); seqWeight.resize(_numSeqs); ProfileAlignAlgorithm* alignAlg = new MyersMillerProfileAlign; utilityObject->info("Start of Initial Alignment"); /* calculate sequence weights according to branch lengths of the tree - * weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */ temp = INT_SCALE_FACTOR / _numSeqs; for (i = 0; i < _numSeqs; i++) { seqWeight[i] = temp; } userParameters->setDistanceTree(false); /* do the initial alignment......... */ group.resize(_numSeqs + 1); for (i = 1; i <= alnPtr->getProfile1NumSeqs(); ++i) { group[i] = 1; } for (i = alnPtr->getProfile1NumSeqs() + 1; i <= _numSeqs; ++i) { group[i] = 2; } entries = _numSeqs; aligned = new int[_numSeqs + 1]; for (i = 1; i <= _numSeqs; i++) { aligned[i] = 1; } alnPtr->addSeqWeight(&seqWeight); score = alignAlg->profileAlign(alnPtr, distMat, &group, aligned); utilityObject->info("Sequences:%d Score:%d", entries, score); delete [] aligned; for (i = 1; i <= _numSeqs; i++) { for (j = i + 1; j <= _numSeqs; j++) { dscore = alnPtr->countid(i, j); (*distMat)(i, j) = ((double)100.0 - (double)dscore) / (double)100.0; (*distMat)(j, i) = (*distMat)(i, j); } } delete alignAlg; return (_numSeqs); } /** * * @param alnPtr * @param distMat * @param p1TreeName * @param p2TreeName * @return */ int MSA::doProfileAlign(Alignment* alnPtr, DistMatrix* distMat, vector* prof1Weight, vector* prof2Weight) { //Tree groupTree1, groupTree2; int i, j, sum, entries; int score; int *aligned; vector group; int *maxid; //vector prof1Weight, prof2Weight; int _profile1NumSeqs = alnPtr->getProfile1NumSeqs(); int _numSeqs = alnPtr->getNumSeqs(); vector _seqWeight, _outputIndex; utilityObject->info("Start of Multiple Alignment\n"); _seqWeight.resize(_numSeqs + 1); _outputIndex.resize(_numSeqs); ProfileAlignAlgorithm* alignAlgorithm = new MyersMillerProfileAlign; // weight sequences with max percent identity with other profile maxid = new int[_numSeqs + 1]; for (i = 0; i < _profile1NumSeqs; i++) { maxid[i] = 0; for (j = _profile1NumSeqs + 1; j <= _numSeqs; j++) { if (maxid[i] < (*distMat)(i + 1, j)) { maxid[i] = static_cast((*distMat)(i + 1, j)); // Mark change 17-5-07 } } _seqWeight[i] = maxid[i] * (*prof1Weight)[i]; } for (i = _profile1NumSeqs; i < _numSeqs; i++) { maxid[i] = - 1; for (j = 1; j <= _profile1NumSeqs; j++) { if (maxid[i] < (*distMat)(i + 1, j)) { maxid[i] = static_cast((*distMat)(i + 1, j));// Mark change 17-5-07 } } _seqWeight[i] = maxid[i] * (*prof2Weight)[i]; } // // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR // sum = 0; for (j = 0; j < _numSeqs; j++) { sum += _seqWeight[j]; } if (sum == 0) { for (j = 0; j < _numSeqs; j++) { _seqWeight[j] = 1; } sum = j; } for (j = 0; j < _numSeqs; j++) { _seqWeight[j] = (_seqWeight[j] * INT_SCALE_FACTOR) / sum; if (_seqWeight[j] < 1) { _seqWeight[j] = 1; } } // do the alignment......... / utilityObject->info("Aligning..."); group.resize(_numSeqs + 1); for (i = 1; i <= _profile1NumSeqs; ++i) { group[i] = 1; } for (i = _profile1NumSeqs + 1; i <= _numSeqs; ++i) { group[i] = 2; } entries = _numSeqs; aligned = new int[_numSeqs + 1]; for (i = 1; i <= _numSeqs; i++) { aligned[i] = 1; } alnPtr->addSeqWeight(&_seqWeight); score = alignAlgorithm->profileAlign(alnPtr, distMat, &group, aligned); utilityObject->info("Sequences:%d Score:%d", entries, score); for (i = 1; i <= _numSeqs; i++) { _outputIndex[i - 1] = i; } alnPtr->addOutputIndex(&_outputIndex); delete alignAlgorithm; delete [] aligned; delete [] maxid; return (_numSeqs); return 1; } } clustalx-2.1/clustalW/multipleAlign/LowScoreSegProfile.h0000644000175000017500000000175011470725216023313 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * NOTE: This profile is not used in the multiple alignment part. It is used for the * the clustal Qt part. It is used in the calculation of low scoring segments. */ #ifndef LOWSCORESEGPROFILE_H #define LOWSCORESEGPROFILE_H #include "../alignment/Alignment.h" namespace clustalw { class LowScoreSegProfile { public: /* Functions */ LowScoreSegProfile(int prfLen, int firstS, int lastS); void calcLowScoreSegProfile(const SeqArray* seqArray, int matrix[NUMRES][NUMRES], vector* seqWeight); const SeqArray* getProfilePtr(){return &profile;}; /* Attributes */ protected: /* Functions */ /* Attributes */ SeqArray profile; int prfLength; int firstSeq, lastSeq; private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/multipleAlign/LowScoreSegProfile.cpp0000644000175000017500000000626611470725216023655 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "LowScoreSegProfile.h" namespace clustalw { LowScoreSegProfile::LowScoreSegProfile(int prfLen, int firstS, int lastS) : prfLength(prfLen), firstSeq(firstS), lastSeq(lastS) { profile.resize(prfLength + 2, vector(LENCOL + 2)); } void LowScoreSegProfile::calcLowScoreSegProfile(const SeqArray* seqArray, int matrix[NUMRES][NUMRES], vector* seqWeight) { vector > weighting; int d, i, res; int r, pos; int f; int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); int _maxAA = userParameters->getMaxAA(); weighting.resize(NUMRES + 2, vector(prfLength + 2)); for (r = 0; r < prfLength; r++) { for (d = 0; d <= _maxAA; d++) { weighting[d][r] = 0; for (i = firstSeq; i < lastSeq; i++) { if (r + 1 < (int)(*seqArray)[i + 1].size() - 1) { if (d == (*seqArray)[i + 1][r + 1]) { weighting[d][r] += (*seqWeight)[i]; } } } } weighting[_gapPos1][r] = 0; for (i = firstSeq; i < lastSeq; i++) { if (r + 1 < (int)(*seqArray)[i + 1].size() - 1) { if (_gapPos1 == (*seqArray)[i + 1][r + 1]) { weighting[_gapPos1][r] += (*seqWeight)[i]; } } } weighting[_gapPos2][r] = 0; for (i = firstSeq; i < lastSeq; i++) { if (r + 1 < (int)(*seqArray)[i + 1].size() - 1) { if (_gapPos2 == (*seqArray)[i + 1][r + 1]) { weighting[_gapPos2][r] += (*seqWeight)[i]; } } } } for (pos = 0; pos < prfLength; pos++) { for (res = 0; res <= _maxAA; res++) { f = 0; for (d = 0; d <= _maxAA; d++) { f += (weighting[d][pos] * matrix[d][res]); } f += (weighting[_gapPos1][pos] * matrix[_gapPos1][res]); f += (weighting[_gapPos2][pos] * matrix[_gapPos2][res]); profile[pos + 1][res] = f; } f = 0; for (d = 0; d <= _maxAA; d++) { f += (weighting[d][pos] * matrix[d][_gapPos1]); } f += (weighting[_gapPos1][pos] * matrix[_gapPos1][_gapPos1]); f += (weighting[_gapPos2][pos] * matrix[_gapPos2][_gapPos1]); profile[pos + 1][_gapPos1] = f; f = 0; for (d = 0; d <= _maxAA; d++) { f += (weighting[d][pos] * matrix[d][_gapPos2]); } f += (weighting[_gapPos1][pos] * matrix[_gapPos1][_gapPos2]); f += (weighting[_gapPos2][pos] * matrix[_gapPos2][_gapPos2]); profile[pos + 1][_gapPos2] = f; } } } clustalx-2.1/clustalW/multipleAlign/Iteration.h0000644000175000017500000000110511470725216021526 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef ITERATION_H #define ITERATION_H #include "../alignment/Alignment.h" #include "../general/clustalw.h" namespace clustalw { class Iteration { public: //Iteration(); bool iterationOnTreeNode(int numSeqsProf1, int numSeqsProf2, int& prfLength1, int& prfLength2, SeqArray* seqArray); bool removeFirstIterate(Alignment* alnPtr); private: void printSeqArray(SeqArray* arrayToPrint); }; } #endif clustalx-2.1/clustalW/multipleAlign/Iteration.cpp0000644000175000017500000002630111470725216022066 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. * * Changes: * Mark 30-5-2007: Changed iterationOnTreeNode function as it was adding in extra gaps * at the end of an alignment. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "Iteration.h" #include "../alignment/ObjectiveScore.h" #include "../general/utils.h" #include "../general/userparams.h" #include "../tree/TreeInterface.h" #include "../clustalw_version.h" #include "MSA.h" namespace clustalw { bool Iteration::iterationOnTreeNode(int numSeqsProf1, int numSeqsProf2, int& prfLength1, int& prfLength2, SeqArray* seqArray) { Alignment alignmentToIterate; int numSeqsInProfiles = numSeqsProf1 + numSeqsProf2; if(numSeqsInProfiles <= 2) { return false; } SeqArray profileSeqs; profileSeqs.resize(numSeqsInProfiles + 1); // Copy the SeqArray! for (int j = 0; ((j < numSeqsProf1 + numSeqsProf2) && (j < (int)seqArray->size())); j++) { profileSeqs[j + 1].clear(); profileSeqs[j + 1].resize(prfLength1 + 1); for (int i = 0; i < prfLength1 && i < (int)(*seqArray)[j].size(); i++) { profileSeqs[j + 1][i + 1] = (*seqArray)[j][i]; } } alignmentToIterate.addSequences(&profileSeqs); //userParameters->setNumIterations(numSeqsInProfiles * 2); bool changed = false; changed = removeFirstIterate(&alignmentToIterate); if(changed) { SeqArray* iteratedSeqs = alignmentToIterate.getSeqArrayForRealloc(); string aaCodes = userParameters->getAminoAcidCodes(); int newPrf1Length = 0, newPrf2Length = 0; for (int j = 0; j < numSeqsProf1 + numSeqsProf2; j++) { if(j < numSeqsProf1) { if(alignmentToIterate.getSeqLength(j + 1) > newPrf1Length) { newPrf1Length = alignmentToIterate.getSeqLength(j + 1); } } else if(j < numSeqsProf1 + numSeqsProf2) { if(alignmentToIterate.getSeqLength(j + 1) > newPrf2Length) { newPrf2Length = alignmentToIterate.getSeqLength(j + 1); } } } prfLength1 = newPrf1Length; // mark 30-5-2007 prfLength2 = newPrf2Length; // mark 30-5-2007 for (int j = 0; j < numSeqsProf1 + numSeqsProf2; j++) { // I need to recalculate the prfLength1 and prfLength2 (*seqArray)[j].clear(); (*seqArray)[j].assign((*iteratedSeqs)[j + 1].begin() + 1, (*iteratedSeqs)[j + 1].end()); (*seqArray)[j].resize(prfLength1 + extraEndElemNum, 31); (*seqArray)[j][prfLength1] = ENDALN; } } return true; } void Iteration::printSeqArray(SeqArray* arrayToPrint) { cout << "HERE IS THE SEQARRAY\n"; // I need to use iterators for everything here. SeqArray::iterator mainBeginIt = arrayToPrint->begin(); SeqArray::iterator mainEndIt = arrayToPrint->end(); vector::iterator begin, end; string aaCodes = userParameters->getAminoAcidCodes(); for(; mainBeginIt != mainEndIt; mainBeginIt++) { if(mainBeginIt->size() > 0) { begin = mainBeginIt->begin() + 1; end = mainBeginIt->end(); for(; begin != end; begin++) { if(*begin < (int)aaCodes.size()) { cout << aaCodes[*begin]; } else { cout << "-"; } } cout << "\n"; } } cout << "\n\n"; } /** * The function removeFirstIterate is used to perform the remove first iteration * strategy on the finished alignment. It optimises the score give in alignScore. * For iter = 1 to numIterations * For seq i = 1 to numSeqs * remove seq i * if either of the profiles has all gaps, remove this column. * realign using profileAlign * if its better, keep it. If its not better, dont keep it. * @param alnPtr The alignment object. * @return true if it has been successful, false if it has not been successful. */ bool Iteration::removeFirstIterate(Alignment* alnPtr) { if(!alnPtr) { return false; } string p1TreeName; p1TreeName = ""; string p2TreeName; int nSeqs = alnPtr->getNumSeqs(); if(nSeqs <= 2) { return false; } DistMatrix distMat; distMat.ResizeRect(nSeqs + 1); ObjectiveScore scoreObj; int iterate = userParameters->getDoRemoveFirstIteration(); userParameters->setDoRemoveFirstIteration(NONE); double firstScore = scoreObj.getScore(alnPtr); //cout << "firstScore = " << firstScore << "\n"; double score = 0; double bestScore = firstScore; double dscore; int count; bool scoreImproved = false; bool scoreImprovedAnyIteration = false; int prof1NumSeqs = 1; // This will be used for removing gaps!!! vector profile1; vector profile2; profile1.resize(nSeqs + 1, 0); profile1[1] = 1; profile2.resize(nSeqs + 1, 1); profile2[0] = 0; profile2[1] = 0; vector prof1Weight, prof2Weight; int iterations = userParameters->getNumIterations(); //cout << "Max num iterations = " << iterations << "\n"; Alignment bestAlignSoFar; TreeInterface tree; // One iteration consists of removing each of the sequences, reseting all the gap // only columns. If the score is better, the new alignment is kept. for(int n = 1; n <= iterations; n++) { scoreImproved = false; cout << "ITERATION " << n << " OF " << iterations << "\n"; for(int i = 1; i <= nSeqs; i++) { vector seqVector; Alignment iterateAlign = *alnPtr; iterateAlign.setProfile1NumSeqs(1); // We remove the sequence i from the profile, and paste into the first position // This is to make it easy to do the profile alignment. vector selected; selected.resize(nSeqs + 1, 0); selected[i] = 1; seqVector = iterateAlign.cutSelectedSequencesFromAlignment(&selected); iterateAlign.pasteSequencesIntoPosition(&seqVector, 0); // Remove any gap only columns iterateAlign.removeGapOnlyColsFromSelectedSeqs(&profile1); iterateAlign.removeGapOnlyColsFromSelectedSeqs(&profile2); // Calculate a simple distance matrix. if(nSeqs - 1 >= 2) { for (int i = 1; i <= nSeqs; i++) { for (int j = i + 1; j <= nSeqs; j++) { dscore = iterateAlign.countid(i, j); distMat(i, j) = (100.0 - dscore)/100.0; } } /* temporary tree file * * can't use the safer mkstemp function here, because * we just pass down the filename :( */ char buffer[L_tmpnam]; tmpnam (buffer); p2TreeName = buffer + string(".dnd"); // should test here if file is writable } bool success = false; prof1Weight.clear(); prof1Weight.resize(prof1NumSeqs); prof2Weight.clear(); prof2Weight.resize(nSeqs); tree.getWeightsForProfileAlign(&iterateAlign, &distMat, &p1TreeName, &prof1Weight, &p2TreeName, &prof2Weight, nSeqs, prof1NumSeqs, false, false, &success); remove(p2TreeName.c_str()); if(!success) { /* returning false only means alignment hasn't * changed, but here getWeightsForProfileAlign failed, * most likely because p2TreeName couldn't be read. an * error will be printed to console. clustalw should * then exit, FIXME: clustalx users have to sit * through all error messages until someone * implements a way to return an exit code and react * appropriately */ // does anyone know how to use // (userParameters->getMenuFlag() || // !userParameters->getInteractive() instead? char buf[1024]; utilityObject->myname(buf); if (strcasecmp(buf, "clustalw")==0) { exit(EXIT_FAILURE); } else { // the next two lines were here before the exit // was added. keeping it for clustalx although it // doesnt seem to make any sens userParameters->setDoRemoveFirstIteration(iterate); return false; } } MSA* msaObj = new MSA(); iterateAlign.resetProfile1(); iterateAlign.resetProfile2(); // Do the profile alignment. count = msaObj->doProfileAlign(&iterateAlign, &distMat, &prof1Weight, &prof2Weight); delete msaObj; // Check if its better score = scoreObj.getScore(&iterateAlign); iterateAlign.setProfile1NumSeqs(0); if(score < bestScore) // Might be a problem with this. { //cout << "**********************************************\n"; //cout << "***** Better score found using iteration *****\n"; //cout << "**********************************************\n"; bestScore = score; bestAlignSoFar = iterateAlign; scoreImproved = true; scoreImprovedAnyIteration = true; } distMat.clearArray(); distMat.ResizeRect(nSeqs + 1); } if(scoreImproved == false) { cout << "Score was not improved in last iteration. Exiting...\n"; break; } } // // NOTE if we have improved it, then we need to update the sequences in alnPtr // 1) get the unique id of seq i // 2) get the sequence from new object using id // 3) update the sequence in alnPtr // if(scoreImprovedAnyIteration) // If we need to update the alnPtr object. { cout << "Iteration improved Align score: " << bestScore << "\n"; int seqId; const vector* improvedSeq; for(int i = 1; i <= nSeqs; i++) // For each seq in alnPtr { seqId = alnPtr->getUniqueId(i); improvedSeq = bestAlignSoFar.getSequenceFromUniqueId(seqId); alnPtr->updateSequence(i, improvedSeq); } } cout << "FINAL score: " << bestScore << "\n"; userParameters->setDoRemoveFirstIteration(iterate); return true; // It was successful. } } clustalx-2.1/clustalW/multipleAlign/0000755000175000017500000000000011470725216017422 5ustar ddineenddineenclustalx-2.1/clustalW/interface/InteractiveMenu.h0000644000175000017500000000323211470725216022027 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Mark Larkin 12 Dec 2005. * Interactive menu class. It uses the object userParameters for * all the session variables. */ #ifndef INTERACTIVEMENU_H #define INTERACTIVEMENU_H #include #include "../Clustal.h" #include "../general/clustalw.h" #include "../general/userparams.h" #include "../general/utils.h" #include "../substitutionMatrix/globalmatrix.h" namespace clustalw { using namespace std; class InteractiveMenu { public: /* Functions */ InteractiveMenu(); ~InteractiveMenu(); void mainMenu(); /* Attributes */ private: /* Functions */ void doSystem(); void multipleAlignMenu(); void profileAlignMenu(); void ssOptionsMenu(); int secStrOutputOptions(); void phylogeneticTreeMenu(); void treeFormatOptionsMenu(); void formatOptionsMenu(); void pairwiseMenu(); void multiMenu(); void gapPenaltiesMenu(); int readMatrix(int alignResidueType, int alignType, MatMenu menu); void clusteringAlgorithmMenu(); void iterationMenu(); /* Attributes */ Clustal* clustalObj; string phylipName; string clustalName; string distName; string nexusName; //string fasta_name; string p1TreeName; string p2TreeName; string secStructOutputTxt[4]; // Changed to a string array string lin1; MatMenu dnaMatrixMenu; MatMenu matrixMenu; MatMenu pwMatrixMenu; char choice; }; } #endif clustalx-2.1/clustalW/interface/InteractiveMenu.cpp0000644000175000017500000012540111470725216022365 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Mark Larkin Dec 12 2005. * This provides the implementation of the interactive menu functions. * Changes: * 15-5-07: Added changes to clustering algorithm choice in function phylogenticTreeMenu * Added iteration to the multipleAlignMenu function. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include #include #include "InteractiveMenu.h" namespace clustalw { using namespace std; InteractiveMenu::InteractiveMenu() { try { clustalObj = new Clustal(); lin1 = ""; secStructOutputTxt[0] = string("Secondary Structure"); secStructOutputTxt[1] = string("Gap Penalty Mask"); secStructOutputTxt[2] = string("Structure and Penalty Mask"); secStructOutputTxt[3] = string("None"); } catch(bad_alloc) { cerr<<"The memory heap is exhausted. The program must terminate now\n"; exit(1); } /* Initialise the menu structs */ matrixMenu.noptions = 5; strcpy(matrixMenu.opt[AABLOSUM].title, "BLOSUM series"); strcpy(matrixMenu.opt[AABLOSUM].string, "blosum"); strcpy(matrixMenu.opt[AAPAM].title, "PAM series"); strcpy(matrixMenu.opt[AAPAM].string, "pam"); strcpy(matrixMenu.opt[AAGONNET].title, "Gonnet series"); strcpy(matrixMenu.opt[AAGONNET].string, "gonnet"); strcpy(matrixMenu.opt[AAIDENTITY].title, "Identity matrix"); strcpy(matrixMenu.opt[AAIDENTITY].string, "id"); strcpy(matrixMenu.opt[AAUSERDEFINED].title, "User defined"); strcpy(matrixMenu.opt[AAUSERDEFINED].string, ""); dnaMatrixMenu.noptions = 3; strcpy(dnaMatrixMenu.opt[DNAIUB].title, "IUB"); strcpy(dnaMatrixMenu.opt[DNAIUB].string, "iub"); strcpy(dnaMatrixMenu.opt[DNACLUSTALW].title, "CLUSTALW(1.6)"); strcpy(dnaMatrixMenu.opt[DNACLUSTALW].string, "clustalw"); strcpy(dnaMatrixMenu.opt[DNAUSERDEFINED].title, "User defined"); strcpy(dnaMatrixMenu.opt[DNAUSERDEFINED].string, ""); pwMatrixMenu.noptions = 5; strcpy(pwMatrixMenu.opt[PWAABLOSUM].title, "BLOSUM 30"); strcpy(pwMatrixMenu.opt[PWAABLOSUM].string, "blosum"); strcpy(pwMatrixMenu.opt[PWAAPAM].title, "PAM 350"); strcpy(pwMatrixMenu.opt[PWAAPAM].string, "pam"); strcpy(pwMatrixMenu.opt[PWAAGONNET].title, "Gonnet 250"); strcpy(pwMatrixMenu.opt[PWAAGONNET].string, "gonnet"); strcpy(pwMatrixMenu.opt[PWAAIDENTITY].title, "Identity matrix"); strcpy(pwMatrixMenu.opt[PWAAIDENTITY].string, "id"); strcpy(pwMatrixMenu.opt[PWAAUSER].title, "User defined"); strcpy(pwMatrixMenu.opt[PWAAUSER].string, ""); } InteractiveMenu::~InteractiveMenu() { delete clustalObj; } void InteractiveMenu::mainMenu() { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<" **************************************************************\n"; cout<<" ******** CLUSTAL " << userParameters->getRevisionLevel() <<" Multiple Sequence Alignments ********\n"; cout<<" **************************************************************\n"; cout<<"\n\n"; cout<<" 1. Sequence Input From Disc\n"; cout<<" 2. Multiple Alignments\n"; cout<<" 3. Profile / Structure Alignments\n"; cout<<" 4. Phylogenetic trees\n\n"; cout<<" S. Execute a system command\n"; cout<<" H. HELP\n"; cout<<" X. EXIT (leave program)\n\n\n"; choice = utilityObject->getChoice(string("Your choice")); string offendingSeq; // unused here switch (toupper(choice)) { case '1': clustalObj->sequenceInput(false, &offendingSeq); break; case '2': multipleAlignMenu(); break; case '3': profileAlignMenu(); break; case '4': phylogeneticTreeMenu(); break; case 'S': doSystem(); break; case '?': case 'H': clustalObj->getHelp('1'); break; case 'Q': case 'X': exit(0); break; default: cout<<"\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::multipleAlignMenu() { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<"****** MULTIPLE ALIGNMENT MENU ******\n\n\n"; cout<<" 1. Do complete multiple alignment now " <<(!userParameters->getQuickPairAlign() ? "Slow/Accurate" : "Fast/Approximate") <<"\n"; cout<<" 2. Produce guide tree file only\n"; cout<<" 3. Do alignment using old guide tree file\n\n"; cout<<" 4. Toggle Slow/Fast pairwise alignments = " <<((!userParameters->getQuickPairAlign()) ? "SLOW" : "FAST") <<"\n\n"; cout<<" 5. Pairwise alignment parameters\n"; cout<<" 6. Multiple alignment parameters\n\n"; cout<<" 7. Reset gaps before alignment?"; if (userParameters->getResetAlignmentsNew()) { cout<<" = ON\n"; } else { cout<<" = OFF\n"; } cout<<" 8. Toggle screen display = " <<((!userParameters->getShowAlign()) ? "OFF" : "ON") <<"\n"; cout<<" 9. Output format options\n"; cout<<" I. Iteration = "; if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT) { cout << "ALIGNMENT\n\n"; } else if(userParameters->getDoRemoveFirstIteration() == TREE) { cout << "TREE\n\n"; } else { cout << "NONE\n\n"; } cout<<" S. Execute a system command\n"; cout<<" H. HELP\n"; cout<<" or press [RETURN] to go back to main menu\n\n\n"; choice = utilityObject->getChoice(string("Your choice")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': clustalObj->align(&phylipName); break; case '2': clustalObj->doGuideTreeOnly(&phylipName); break; case '3': clustalObj->doAlignUseOldTree(&phylipName); break; case '4': userParameters->toggleQuickPairAlign(); break; case '5': pairwiseMenu(); break; case '6': multiMenu(); break; case '7': userParameters->toggleResetAlignmentsNew(); if (userParameters->getResetAlignmentsNew() == true) { userParameters->setResetAlignmentsAll(false); } break; case '8': userParameters->toggleShowAlign(); break; case '9': formatOptionsMenu(); break; case 'I': iterationMenu(); break; case 'S': doSystem(); break; case '?': case 'H': clustalObj->getHelp('2'); break; case 'Q': case 'X': return ; default: fprintf(stdout, "\n\nUnrecognised Command\n\n"); break; } } } void InteractiveMenu::profileAlignMenu(void) { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<"****** PROFILE AND STRUCTURE ALIGNMENT MENU ******\n\n\n"; cout<<" 1. Input 1st. profile "; if (!userParameters->getProfile1Empty()) { cout<<"(loaded)"; } cout<<"\n"; cout<<" 2. Input 2nd. profile/sequences "; if (!userParameters->getProfile2Empty()) { cout<<"(loaded)"; } cout<<"\n\n"; cout<<" 3. Align 2nd. profile to 1st. profile\n"; cout<<" 4. Align sequences to 1st. profile " <<((!userParameters->getQuickPairAlign()) ? "(Slow/Accurate)\n\n" : "(Fast/Approximate)\n\n"); cout<<" 5. Toggle Slow/Fast pairwise alignments = " <<((!userParameters->getQuickPairAlign()) ? "SLOW\n\n" : "FAST\n\n"); cout<<" 6. Pairwise alignment parameters\n"; cout<<" 7. Multiple alignment parameters\n\n"; cout<<" 8. Toggle screen display = " <<((!userParameters->getShowAlign()) ? "OFF\n" : "ON\n"); cout<<" 9. Output format options\n"; cout<<" 0. Secondary structure options\n\n"; cout<<" S. Execute a system command\n"; cout<<" H. HELP\n"; cout<<" or press [RETURN] to go back to main menu\n\n\n"; choice = utilityObject->getChoice(string("Your choice")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': clustalObj->profile1Input(); break; case '2': clustalObj->profile2Input(); break; case '3': clustalObj->profileAlign(&p1TreeName, &p2TreeName); break; case '4': /* align new sequences to profile 1 */ clustalObj->sequencesAlignToProfile(&phylipName); break; case '5': userParameters->toggleQuickPairAlign(); break; case '6': pairwiseMenu(); break; case '7': multiMenu(); break; case '8': userParameters->toggleShowAlign(); break; case '9': formatOptionsMenu(); break; case '0': ssOptionsMenu(); break; case 'S': doSystem(); break; case '?': case 'H': clustalObj->getHelp('6'); break; case 'Q': case 'X': return ; default: cout<<"\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::ssOptionsMenu() { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<" ********* SECONDARY STRUCTURE OPTIONS *********\n\n\n"; cout<<" 1. Use profile 1 secondary structure / penalty mask "; if (userParameters->getUseSS1()) { cout<<"= YES\n"; } else { cout<<"= NO\n"; } cout<<" 2. Use profile 2 secondary structure / penalty mask "; if (userParameters->getUseSS2()) { cout<<"= YES\n\n"; } else { cout<<"= NO\n\n"; } cout<<" 3. Output in alignment "; cout<<"= "<< secStructOutputTxt[userParameters->getOutputStructPenalties()] <<"\n\n"; cout<<" 4. Helix gap penalty : " <getHelixPenalty()<<"\n"; cout<<" 5. Strand gap penalty : " <getStrandPenalty()<<"\n"; cout<<" 6. Loop gap penalty : " <getLoopPenalty()<<"\n"; cout<<" 7. Secondary structure terminal penalty : " <getHelixEndPenalty()<<"\n"; cout<<" 8. Helix terminal positions within : " <getHelixEndMinus() <<" outside : " <getHelixEndPlus()<<"\n"; cout<<" 9. Strand terminal positions within : " <getStrandEndMinus() <<" outside : " <getStrandEndPlus()<<"\n\n\n"; cout<<" H. HELP\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': userParameters->toggleUseSS1(); break; case '2': userParameters->toggleUseSS2(); break; case '3': userParameters->setOutputStructPenalties(secStrOutputOptions()); break; case '4': cout<<"Helix Penalty Currently: " << userParameters->getHelixPenalty()<<"\n"; userParameters->setHelixPenalty(utilityObject->getInt("Enter number", 1,9, userParameters->getHelixPenalty())); break; case '5': cout<<"Strand Gap Penalty Currently: " << userParameters->getStrandPenalty() <<"\n"; userParameters->setStrandPenalty(utilityObject->getInt("Enter number", 1,9, userParameters->getStrandPenalty())); break; case '6': cout<<"Loop Gap Penalty Currently: " << userParameters->getLoopPenalty() <<"\n"; userParameters->setLoopPenalty(utilityObject->getInt("Enter number", 1, 9, userParameters->getLoopPenalty())); break; case '7': cout<<"Secondary Structure Terminal Penalty Currently: " << userParameters->getHelixEndPenalty() <<"\n"; userParameters->setHelixEndPenalty(utilityObject->getInt("Enter number", 1, 9,userParameters->getHelixEndPenalty())); userParameters->setStrandEndPenalty(userParameters->getHelixEndPenalty()); break; case '8': cout<<"Helix Terminal Positions Currently: \n"; cout<<" within helix: " << userParameters->getHelixEndMinus() << " outside helix: " << userParameters->getHelixEndPlus() <<"\n"; userParameters->setHelixEndMinus(utilityObject->getInt( "Enter number of residues within helix", 0, 3, userParameters->getHelixEndMinus())); userParameters->setHelixEndPlus(utilityObject->getInt( "Enter number of residues outside helix", 0, 3, userParameters->getHelixEndPlus())); break; case '9': cout<<"Strand Terminal Positions Currently: \n"; cout<<" within strand: " << userParameters->getStrandEndMinus() << " outside strand: " << userParameters->getStrandEndPlus() <<"\n"; userParameters->setStrandEndMinus(utilityObject->getInt( "Enter number of residues within strand", 0, 3, userParameters->getStrandEndMinus())); userParameters->setStrandEndPlus(utilityObject->getInt( "Enter number of residues outside strand", 0, 3, userParameters->getStrandEndPlus())); break; case '?': case 'H': clustalObj->getHelp('B'); break; default: cout<<"\n\nUnrecognised Command\n\n"; break; } } } int InteractiveMenu::secStrOutputOptions() { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<" ********* Secondary Structure Output Menu *********\n\n\n"; cout<<" 1. "<< secStructOutputTxt[0] <<"\n"; cout<<" 2. "<< secStructOutputTxt[1] <<"\n"; cout<<" 3. "<< secStructOutputTxt[2] <<"\n"; cout<<" 4. "<< secStructOutputTxt[3] <<"\n"; cout<<" H. HELP\n\n"; cout<<" -- Current output is " << secStructOutputTxt[userParameters->getOutputStructPenalties()]; cout<<" --\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return (userParameters->getOutputStructPenalties());; } switch (toupper(choice)) { case '1': return (OUTSECST); case '2': return (OUTGAP); case '3': return (OUTBOTH); case '4': return (OUTNONE); case '?': case 'H': clustalObj->getHelp('C'); case 'Q': case 'X': return (0); default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::phylogeneticTreeMenu() { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<"****** PHYLOGENETIC TREE MENU ******\n\n\n"; cout<<" 1. Input an alignment\n"; cout<<" 2. Exclude positions with gaps? "; if (userParameters->getTossGaps()) { cout<<"= ON\n"; } else { cout<<"= OFF\n"; } cout<<" 3. Correct for multiple substitutions? "; if (userParameters->getKimura()) { cout<<"= ON\n"; } else { cout<<"= OFF\n"; } cout<<" 4. Draw tree now\n"; cout<<" 5. Bootstrap tree\n"; cout<<" 6. Output format options\n"; cout<<" 7. Clustering algorithm = "; // Mark change 15-5-2007 if(userParameters->getClusterAlgorithm() == NJ) { cout << "NJ\n\n"; } else { cout << "UPGMA\n\n"; } cout<<" S. Execute a system command\n"; cout<<" H. HELP\n"; cout<<" or press [RETURN] to go back to main menu\n\n\n"; choice = utilityObject->getChoice(string("Your choice")); if (choice == '\n') { return ; } string offendingSeq; // unused here switch (toupper(choice)) { case '1': clustalObj->sequenceInput(false, &offendingSeq); break; case '2': userParameters->toggleTossGaps(); break; case '3': userParameters->toggleKimura(); break; case '4': clustalObj->phylogeneticTree(&phylipName, &clustalName, &distName, &nexusName, "amenu.pim"); break; case '5': clustalObj->bootstrapTree(&phylipName, &clustalName, &nexusName); break; case '6': treeFormatOptionsMenu(); break; case '7': clusteringAlgorithmMenu(); break; case 'S': doSystem(); break; case '?': case 'H': clustalObj->getHelp('7'); break; case 'Q': case 'X': return ; default: cout<<"\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::treeFormatOptionsMenu() { while (true) { lin1 = ""; cout<< "\n\n\n"; cout<<" ****** Format of Phylogenetic Tree Output ******\n\n\n"; cout<<" 1. Toggle CLUSTAL format tree output = " << ((!userParameters->getOutputTreeClustal()) ? "OFF" : "ON")<<"\n"; cout<<" 2. Toggle Phylip format tree output = " << ((!userParameters->getOutputTreePhylip()) ? "OFF" : "ON")<<"\n"; cout<<" 3. Toggle Phylip distance matrix output = " << ((!userParameters->getOutputTreeDistances()) ? "OFF" : "ON")<<"\n"; cout<<" 4. Toggle Nexus format tree output = " << ((!userParameters->getOutputTreeNexus()) ? "OFF" : "ON")<<"\n\n"; cout<<" 5. Toggle Phylip bootstrap positions = " <<((userParameters->getBootstrapFormat() == BS_NODE_LABELS) ? "NODE LABELS" : "BRANCH LABELS") <<"\n\n\n"; cout<<" H. HELP\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': userParameters->toggleOutputTreeClustal(); break; case '2': userParameters->toggleOutputTreePhylip(); break; case '3': userParameters->toggleOutputTreeDistances(); break; case '4': userParameters->toggleOutputTreeNexus(); break; case '5': userParameters->toggleBootstrapFormat(); break; case '?': case 'H': clustalObj->getHelp('0'); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::formatOptionsMenu() { while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<" ********* Format of Alignment Output *********\n\n\n"; cout<<" F. Toggle FASTA format output = " << ((!userParameters->getOutputFasta()) ? "OFF" : "ON") <<"\n\n"; cout<<" 1. Toggle CLUSTAL format output = " << ((!userParameters->getOutputClustal()) ? "OFF" : "ON") <<"\n"; cout<<" 2. Toggle NBRF/PIR format output = " << ((!userParameters->getOutputNbrf()) ? "OFF" : "ON") << "\n"; cout<<" 3. Toggle GCG/MSF format output = " << ((!userParameters->getOutputGCG()) ? "OFF" : "ON") << "\n"; cout<<" 4. Toggle PHYLIP format output = " << ((!userParameters->getOutputPhylip()) ? "OFF" : "ON") << "\n"; cout<<" 5. Toggle NEXUS format output = " << ((!userParameters->getOutputNexus()) ? "OFF" : "ON") << "\n"; cout<<" 6. Toggle GDE format output = " << ((!userParameters->getOutputGde()) ? "OFF" : "ON") << "\n\n"; cout<<" 7. Toggle GDE output case = " << ((!userParameters->getLowercase()) ? "UPPER" : "LOWER") << "\n"; cout<<" 8. Toggle CLUSTALW sequence numbers = " << ((!userParameters->getClSeqNumbers()) ? "OFF" : "ON") << "\n"; cout<<" 9. Toggle output order = " << ((userParameters->getOutputOrder() == 0) ? "INPUT FILE" : "ALIGNED") <<"\n\n"; cout<<" 0. Create alignment output file(s) now?\n\n"; cout<<" T. Toggle parameter output = " << ((!userParameters->getSaveParameters()) ? "OFF" : "ON") << "\n"; cout<<" R. Toggle sequence range numbers = " <<((!userParameters->getSeqRange()) ? "OFF" : "ON") << "\n\n"; cout<<" H. HELP\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': userParameters->toggleOutputClustal(); break; case '2': userParameters->toggleOutputNbrf(); break; case '3': userParameters->toggleOutputGCG(); break; case '4': userParameters->toggleOutputPhylip(); break; case '5': userParameters->toggleOutputNexus(); break; case '6': userParameters->toggleOutputGde(); break; case '7': userParameters->toggleLowercase(); break; case '8': userParameters->toggleClSeqNumbers(); break; case '9': userParameters->toggleOutputOrder(); break; case 'F': userParameters->toggleOutputFasta(); break; case 'R': userParameters->toggleSeqRange(); break; case '0': clustalObj->outputNow(); break; case 'T': userParameters->toggleSaveParameters(); break; case '?': case 'H': clustalObj->getHelp('5'); break; default: cout<<"\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::pairwiseMenu() { if (userParameters->getDNAFlag()) { userParameters->setPWParamToDNA(); } else { userParameters->setPWParamToProtein(); } while (true) { lin1 = ""; cout<<"\n\n\n"; cout<<" ********* PAIRWISE ALIGNMENT PARAMETERS *********\n\n\n"; cout<<" Slow/Accurate alignments:\n\n"; cout<<" 1. Gap Open Penalty : " << fixed << setprecision(2) << userParameters->getPWGapOpen() << "\n"; cout<<" 2. Gap Extension Penalty : " << fixed << setprecision(2) << userParameters->getPWGapExtend() << "\n"; cout<< " 3. Protein weight matrix :" << matrixMenu.opt[subMatrix->getPWMatrixNum() - 1].title << "\n"; cout<<" 4. DNA weight matrix :" << dnaMatrixMenu.opt[subMatrix->getPWDNAMatrixNum() - 1].title << "\n\n"; cout<<" Fast/Approximate alignments:\n\n"; cout<<" 5. Gap penalty :" << userParameters->getWindowGap() << "\n"; cout<<" 6. K-tuple (word) size :" << userParameters->getKtup() << "\n"; cout<<" 7. No. of top diagonals :" << userParameters->getSignif() << "\n"; cout<<" 8. Window size :" << userParameters->getWindow() << "\n\n"; cout<<" 9. Toggle Slow/Fast pairwise alignments "; if (userParameters->getQuickPairAlign()) { cout<<"= FAST\n\n"; } else { cout<<"= SLOW\n\n"; } cout<<" H. HELP\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { if (userParameters->getDNAFlag()) { userParameters->setPWDNAParam(); } else { userParameters->setPWProteinParam(); } return ; } switch (toupper(choice)) { case '1': cout<<"Gap Open Penalty Currently: " << userParameters->getPWGapOpen() << "\n"; userParameters->setPWGapOpen( (float)utilityObject->getReal("Enter number", (double)0.0, (double)100.0, (double)userParameters->getPWGapOpen())); break; case '2': cout<<"Gap Extension Penalty Currently: " << userParameters->getPWGapExtend() << "\n"; userParameters->setPWGapExtend( (float)utilityObject->getReal("Enter number", (double)0.0, (double)10.0, (double)userParameters->getPWGapExtend())); break; case '3': readMatrix(Protein, Pairwise, pwMatrixMenu); break; case '4': readMatrix(DNA, Pairwise, dnaMatrixMenu); break; case '5': cout<<"Gap Penalty Currently: " << userParameters->getWindowGap() << "\n"; userParameters->setWindowGap( utilityObject->getInt("Enter number", 1, 500, userParameters->getWindowGap())); break; case '6': cout<<"K-tuple Currently: " << userParameters->getKtup() << "\n"; if (userParameters->getDNAFlag()) { int _ktup = utilityObject->getInt("Enter number", 1, 4, userParameters->getKtup()); userParameters->setKtup(_ktup); // see bug 185 userParameters->setDNAKtup(_ktup); userParameters->setWindowGap(_ktup + 4); userParameters->setDNAWindowGap(_ktup + 4); } else { int _ktup = utilityObject->getInt("Enter number", 1, 2, userParameters->getKtup()); userParameters->setKtup(_ktup); // see bug 185 userParameters->setAAKtup(_ktup); userParameters->setWindowGap(_ktup + 3); userParameters->setAAWindowGap(_ktup + 3); } break; case '7': cout<<"Top diagonals Currently: " << userParameters->getSignif() << "\n"; userParameters->setSignif( utilityObject->getInt("Enter number", 1, 50, userParameters->getSignif())); break; case '8': cout<<"Window size Currently: " << userParameters->getWindow() << "\n"; userParameters->setWindow( utilityObject->getInt("Enter number", 1, 50, userParameters->getWindow())); break; case '9': userParameters->toggleQuickPairAlign(); break; case '?': case 'H': clustalObj->getHelp('3'); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::multiMenu() { if (userParameters->getDNAFlag()) { userParameters->setDNAMultiGap(); } else { userParameters->setProtMultiGap(); } while (true) { lin1 = ""; cout<<"\n\n\n ********* MULTIPLE ALIGNMENT PARAMETERS *********\n\n\n"; cout<<" 1. Gap Opening Penalty :" << fixed << setprecision(2) << userParameters->getGapOpen() << "\n"; cout<<" 2. Gap Extension Penalty :" << fixed << setprecision(2) << userParameters->getGapExtend() << "\n"; cout<<" 3. Delay divergent sequences :" << userParameters->getDivergenceCutoff() << " %\n\n"; cout<<" 4. DNA Transitions Weight :" << fixed << setprecision(2) << userParameters->getTransitionWeight() << "\n\n"; cout<<" 5. Protein weight matrix :" << matrixMenu.opt[subMatrix->getMatrixNum() - 1].title << "\n"; cout<<" 6. DNA weight matrix :" << dnaMatrixMenu.opt[subMatrix->getDNAMatrixNum() - 1].title << "\n"; cout<<" 7. Use negative matrix :" << ((!userParameters->getUseNegMatrix()) ? "OFF" : "ON") << "\n\n"; cout<<" 8. Protein Gap Parameters\n\n"; cout<<" H. HELP\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { if (userParameters->getDNAFlag()) { //userParameters->setDNAMultiGap(); userParameters->setDNAGapOpen(userParameters->getGapOpen()); userParameters->setDNAGapExtend(userParameters->getGapExtend()); } else { //userParameters->setProtMultiGap(); userParameters->setAAGapOpen(userParameters->getGapOpen()); userParameters->setAAGapExtend(userParameters->getGapExtend()); } return ; } switch (toupper(choice)) { case '1': cout<<"Gap Opening Penalty Currently: " << userParameters->getGapOpen() << "\n"; userParameters->setGapOpen( (float)utilityObject->getReal("Enter number", (double)0.0, (double)100.0, (double)userParameters->getGapOpen())); break; case '2': cout<<"Gap Extension Penalty Currently: " << userParameters->getGapExtend() << "\n"; userParameters->setGapExtend( (float)utilityObject->getReal("Enter number", (double)0.0, (double)10.0, (double)userParameters->getGapExtend())); break; case '3': cout<<"Min Identity Currently: " << userParameters->getDivergenceCutoff() << "\n"; userParameters->setDivergenceCutoff( utilityObject->getInt("Enter number", 0, 100, userParameters->getDivergenceCutoff())); break; case '4': cout<<"Transition Weight Currently: " << userParameters->getTransitionWeight() << "\n"; userParameters->setTransitionWeight( (float)utilityObject->getReal("Enter number", (double)0.0, (double)1.0, (double)userParameters->getTransitionWeight())); break; case '5': readMatrix(Protein, MultipleAlign, matrixMenu); break; case '6': readMatrix(DNA, MultipleAlign, dnaMatrixMenu); break; case '7': userParameters->toggleUseNegMatrix(); break; case '8': gapPenaltiesMenu(); break; case '?': case 'H': //clustalObj->getHelp('4'); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::gapPenaltiesMenu() { while (true) { lin1 = ""; cout<< "\n\n\n ********* PROTEIN GAP PARAMETERS *********\n\n\n\n"; cout<<" 1. Toggle Residue-Specific Penalties :" << ((userParameters->getNoPrefPenalties()) ? "OFF" : "ON") << "\n\n"; cout<<" 2. Toggle Hydrophilic Penalties :" << ((userParameters->getNoHydPenalties()) ? "OFF" : "ON") << "\n"; cout<<" 3. Hydrophilic Residues :" << userParameters->getHydResidues() << "\n\n"; cout<<" 4. Gap Separation Distance :" << userParameters->getGapDist() << "\n"; cout<<" 5. Toggle End Gap Separation :" << ((!userParameters->getUseEndGaps()) ? "OFF" : "ON") << "\n\n"; cout<<" H. HELP\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': userParameters->toggleNoPrefPenalties(); break; case '2': userParameters->toggleNoHydPenalties(); break; case '3': cout<<"Hydrophilic Residues Currently: " << userParameters->getHydResidues() << "\n"; lin1 = ""; utilityObject->getStr(string("Enter residues (or [RETURN] to quit)"), lin1); if (lin1.size() > 0) { userParameters->setHydResidues(lin1); } break; case '4': cout<<"Gap Separation Distance Currently: " << userParameters->getGapDist() << "\n"; userParameters->setGapDist( utilityObject->getInt("Enter number", 0, 100, userParameters->getGapDist())); break; case '5': userParameters->toggleUseEndGaps(); break; case '?': case 'H': clustalObj->getHelp('A'); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } /* * This function displays the menu for selecting the weight matrix to use. * It is used for both the protein and DNA menu for pairwise and Multiple alignment. * This is why it requires the MatMenu struct. */ int InteractiveMenu::readMatrix(int alignResidueType, int alignType, MatMenu menu) { static char userFile[FILENAMELEN + 1]; int i, option; char title[10]; int matn; // Used to show which is the current matrix. if(alignResidueType == Protein) { strcpy(title, "PROTEIN"); } else // DNA { strcpy(title, "DNA"); } while (true) { lin1 = ""; cout<<"\n\n\n ********* "<< title <<" WEIGHT MATRIX MENU *********\n\n\n"; // Find out what the currently selected matrix is. matn = subMatrix->getMatrixNumForMenu(alignResidueType, alignType); for (i = 0; i < menu.noptions; i++) { cout<< " " << i + 1 << ". " << menu.opt[i].title << "\n"; } cout<<" H. HELP\n\n"; cout<<" -- Current matrix is the " << menu.opt[matn -1].title << " "; if (matn == menu.noptions) { cout<<"(file = "<< userFile <<")";; } cout<<"--\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return matn; } option = toupper(choice) - '0'; // Select the matrix series to be using if (option > 0 && option < menu.noptions) { subMatrix->setCurrentNameAndNum(string(menu.opt[i - 1].string), option, alignResidueType, alignType); } else if (option == menu.noptions) // Read in a User defined matrix. { // NOTE this will be changed to deal with matrix series. if (subMatrix->getUserMatFromFile(userFile, alignResidueType, alignType)) { subMatrix->setCurrentNameAndNum(userFile, option, alignResidueType, alignType); } } else switch (toupper(choice)) { case '?': case 'H': clustalObj->getHelp('8'); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::doSystem() { lin1 = ""; utilityObject->getStr(string("\n\nEnter system command"), lin1); if (lin1.size() > 0) { system(lin1.c_str()); } cout<< "\n\n"; } void InteractiveMenu::clusteringAlgorithmMenu() { string currentAlgorithm = ""; cout<<"****** CLUSTERING ALGORITHM MENU ******\n\n\n"; while (true) { if(userParameters->getClusterAlgorithm() == NJ) { currentAlgorithm = "Neighbour Joining"; } else { currentAlgorithm = "UPGMA"; } lin1 = ""; cout<< "\n\n\n"; cout<<" ****** Clustering Algorithms ******\n\n\n"; cout<<" 1. Neighbour Joining \n"; cout<<" 2. UPGMA \n"; cout << "-- Current algorithm is "<< currentAlgorithm << " --\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': userParameters->setClusterAlgorithm(NJ); break; case '2': userParameters->setClusterAlgorithm(UPGMA); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } void InteractiveMenu::iterationMenu() { string currentIteration = ""; cout<<"****** ITERATION MENU ******\n\n\n"; while (true) { if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT) { currentIteration = "Alignment iteration"; } else if(userParameters->getDoRemoveFirstIteration() == TREE) { currentIteration = "Tree based iteration"; } else { currentIteration = "None"; } lin1 = ""; cout<< "\n\n\n"; cout<<" ****** Iteration Choices ******\n\n\n"; cout<<" 1. Off \n"; cout<<" 2. Tree based iteration (iterates each profile alignment step) \n"; cout<<" 3. Alignment iteration (iterates final alignment only) \n\n"; cout << "-- Current selection is "<< currentIteration << " --\n\n\n"; choice = utilityObject->getChoice(string("Enter number (or [RETURN] to exit)")); if (choice == '\n') { return ; } switch (toupper(choice)) { case '1': userParameters->setDoRemoveFirstIteration(NONE); break; case '2': userParameters->setDoRemoveFirstIteration(TREE); break; case '3': userParameters->setDoRemoveFirstIteration(ALIGNMENT); break; default: cout<< "\n\nUnrecognised Command\n\n"; break; } } } } clustalx-2.1/clustalW/interface/CommandLineParser.h0000644000175000017500000001270511470725216022275 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * The class CommandLineParser is used to parse the command line arguments. It then * sets some parameters, and calls the required functions. * To get it to parse the command line, and execute the required job, create a * CommandLineParser object and pass the list of arguments to the constructor. */ #ifndef COMMANDLINEPARSER_H #define COMMANDLINEPARSER_H #include #include #include "../Clustal.h" #include "../general/clustalw.h" #include "../general/userparams.h" #include "../general/utils.h" #include "../general/debuglogObject.h" #include "../general/statsObject.h" namespace clustalw { typedef std::vector StringArray; typedef struct { const char *str; int *flag; int type; StringArray* arg; } CmdLineData; class CommandLineParser { public: /* Functions */ CommandLineParser(StringArray* args, bool xmenus); ~CommandLineParser(); private: /* Functions */ /** separate argument options and values * * put all the parameters and their values in the * vectors, and also to check that all parameters that require a value * have it. * * @param args contains parameters as value and optional option pairs * @param parameters used for storing parsed arguments * @param paramArgs used for storing values of parsed arguments * @return number of parsed parameters or -1 on error */ int checkParam(StringArray* args, StringArray* params, StringArray* paramArg); /** Parse all parameters */ void parseParams(StringArray* args, bool xmenus); void setOptionalParam(); int findMatch(string probe, StringArray* list, int n); CmdLineData getCmdLineDataStruct(const char *str, int *flag, int type, StringArray* arg); void printCmdLineData(const CmdLineData& temp); string ConvertStringToLower(string strToConvert); void exitWithErrorMsg(string msg); void reportBadOptionAndExit(string option, string expectedType); void reportInvalidOptionAndExit(string option); /* Attributes */ Clustal *clustalObj; static const int MAXARGS = 100; static const int NOARG = 0; static const int INTARG = 1; static const int FLTARG = 2; static const int STRARG = 3; static const int FILARG = 4; static const int OPTARG = 5; int setOptions; int setHelp; int setFullHelp; int setQuiet; int setInteractive; int setBatch; int setGapOpen; int setGapExtend; int setPWGapOpen; int setPWGapExtend; int setOutOrder; int setBootLabels; int setPWMatrix; int setMatrix; int setPWDNAMatrix; int setDNAMatrix; int setNegative; int setNoWeights; int setOutput; int setOutputTree; int setQuickTree; int setType; int setCase; int setSeqNo; int setSeqNoRange; int setRange; int setTransWeight; int setSeed; int setScore; int setWindow; int setKtuple; int setKimura; int setTopDiags; int setPairGap; int setTossGaps; int setNoPGap; int setNoHGap; int setNoVGap; int setHGapRes; int setUseEndGaps; int setMaxDiv; int setGapDist; int setDebug; int setOutfile; int setInfile; int setProfile1; int setProfile2; int setAlign; int setConvert; int setNewTree; int setUseTree; int setNewTree1; int setUseTree1; int setNewTree2; int setUseTree2; int setBootstrap; int setTree; int setProfile; int setSequences; int setSecStruct1; int setSecStruct2; int setSecStructOutput; int setHelixGap; int setStrandGap; int setLoopGap; int setTerminalGap; int setHelixEndIn; int setHelixEndOut; int setStrandEndIn; int setStrandEndOut; int profileType; int setDoIteration; int setNumIterations; int setTreeAlgorithm; int setMaxSeqLen; int setStatsFile; int setOutputPim; string userMatrixName; string pwUserMatrixName; string DNAUserMatrixName; string pwDNAUserMatrixName; CmdLineData cmdLineFile[4]; CmdLineData cmdLineVerb[20]; CmdLineData cmdLinePara[56]; string clustalTreeName; string distTreeName; string phylipTreeName; string nexusTreeName; string p1TreeName; string p2TreeName; string pimName; StringArray* typeArg; StringArray* bootLabelsArg; StringArray* outOrderArg; StringArray* caseArg; StringArray* seqNoArg; StringArray* seqNoRangeArg; StringArray* scoreArg; StringArray* outputArg; StringArray* outputTreeArg; StringArray* outputSecStrArg; StringArray* cmdLineType; StringArray* clusterAlgorithm; StringArray* iterationArg; StringArray* params; // parameter names StringArray* paramArg; // parameter values }; } #endif clustalx-2.1/clustalW/interface/CommandLineParser.cpp0000644000175000017500000024347211470725216022637 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include "CommandLineParser.h" #include "../substitutionMatrix/globalmatrix.h" #include "general/Utility.h" #include "general/statsObject.h" using namespace std; namespace clustalw { CommandLineParser::CommandLineParser(StringArray* args, bool xmenus) :setOptions(-1), setHelp(-1), setFullHelp(-1), setQuiet(-1), setInteractive(-1), setBatch(-1), setGapOpen(-1), setGapExtend(-1), setPWGapOpen(-1), setPWGapExtend(-1), setOutOrder(-1), setBootLabels(-1), setPWMatrix(-1), setMatrix(-1), setPWDNAMatrix(-1), setDNAMatrix(-1), setNegative(-1), setNoWeights(-1), setOutput(-1), setOutputTree(-1), setQuickTree(-1), setType(-1), setCase(-1), setSeqNo(-1), setSeqNoRange(-1), setRange(-1), setTransWeight(-1), setSeed(-1), setScore(-1), setWindow(-1), setKtuple(-1), setKimura(-1), setTopDiags(-1), setPairGap(-1), setTossGaps(-1), setNoPGap(-1), setNoHGap(-1), setNoVGap(-1), setHGapRes(-1), setUseEndGaps(-1), setMaxDiv(-1), setGapDist(-1), setDebug(-1), setOutfile(-1), setInfile(-1), setProfile1(-1), setProfile2(-1), setAlign(-1), setConvert(-1), setNewTree(-1), setUseTree(-1), setNewTree1(-1), setUseTree1(-1), setNewTree2(-1), setUseTree2(-1), setBootstrap(-1), setTree(-1), setProfile(-1), setSequences(-1), setSecStruct1(-1), setSecStruct2(-1), setSecStructOutput(-1), setHelixGap(-1), setStrandGap(-1), setLoopGap(-1), setTerminalGap(-1), setHelixEndIn(-1), setHelixEndOut(-1), setStrandEndIn(-1), setStrandEndOut(-1), profileType(PROFILE), setDoIteration(-1), setNumIterations(-1), setTreeAlgorithm(-1), setMaxSeqLen(-1), setStatsFile(-1), setOutputPim(-1) { int ctr=0; // The rest of the variables are arrays! try { clustalObj = new Clustal(); // selecting the size prevents the resizing of the vector which is expensive. typeArg = new StringArray(3); bootLabelsArg = new StringArray(3); outOrderArg = new StringArray(3); caseArg = new StringArray(3); seqNoArg = new StringArray(3); seqNoRangeArg = new StringArray(3); scoreArg = new StringArray(3); outputArg = new StringArray(8); outputTreeArg = new StringArray(5); outputSecStrArg = new StringArray(5); cmdLineType = new StringArray(6); clusterAlgorithm = new StringArray(3); iterationArg = new StringArray(4); params = new StringArray; // Wait until I need it!!!!!!!!! paramArg = new StringArray; } catch(const exception &ex) { cerr << ex.what() << endl; cerr << "Terminating program. Cannot continue" << std::endl; exit(1); } (*typeArg)[0] = "protein"; (*typeArg)[1] = "dna"; (*typeArg)[2] = ""; (*bootLabelsArg)[0] = "node"; (*bootLabelsArg)[1] = "branch"; (*bootLabelsArg)[2] = ""; (*outOrderArg)[0] = "input"; (*outOrderArg)[1] = "aligned"; (*outOrderArg)[2] = ""; (*caseArg)[0] = "lower"; (*caseArg)[1] = "upper"; (*caseArg)[2] = ""; (*seqNoArg)[0] = "off"; (*seqNoArg)[1] = "on"; (*seqNoArg)[2] = ""; (*seqNoRangeArg)[0] = "off"; (*seqNoRangeArg)[1] = "on"; (*seqNoRangeArg)[2] = ""; (*scoreArg)[0] = "percent"; (*scoreArg)[1] = "absolute"; (*scoreArg)[2] = ""; (*outputArg)[0] = "gcg"; (*outputArg)[1] = "gde"; (*outputArg)[2] = "pir"; (*outputArg)[3] = "phylip"; (*outputArg)[4] = "nexus"; (*outputArg)[5] = "fasta"; (*outputArg)[6] = "clustal"; (*outputArg)[7] = ""; (*outputTreeArg)[0] = "nj"; (*outputTreeArg)[1] = "phylip"; (*outputTreeArg)[2] = "dist"; (*outputTreeArg)[3] = "nexus"; (*outputTreeArg)[4] = ""; (*outputSecStrArg)[0] = "structure"; (*outputSecStrArg)[1] = "mask"; (*outputSecStrArg)[2] = "both"; (*outputSecStrArg)[3] = "none"; (*outputSecStrArg)[4] = ""; (*cmdLineType)[0] = " "; (*cmdLineType)[1] = "=n "; (*cmdLineType)[2] = "=f "; (*cmdLineType)[3] = "=string "; (*cmdLineType)[4] = "=filename "; (*cmdLineType)[5] = ""; (*clusterAlgorithm)[0] = "nj"; (*clusterAlgorithm)[1] = "upgma"; (*clusterAlgorithm)[2] = ""; (*iterationArg)[0] = "tree"; (*iterationArg)[1] = "alignment"; (*iterationArg)[2] = "none"; (*iterationArg)[3] = ""; userMatrixName = ""; pwUserMatrixName = ""; DNAUserMatrixName = ""; pwDNAUserMatrixName = ""; clustalTreeName = ""; distTreeName = ""; phylipTreeName = ""; nexusTreeName = ""; p1TreeName = ""; p2TreeName = ""; pimName = ""; // NOTE there were only 3 params for the last one, so I put in NULL for the 4th. ctr=0; cmdLineFile[ctr++] = getCmdLineDataStruct("infile", &setInfile, FILARG, NULL); cmdLineFile[ctr++] = getCmdLineDataStruct("profile1", &setProfile1, FILARG, NULL); cmdLineFile[ctr++] = getCmdLineDataStruct("profile2", &setProfile2, FILARG, NULL); cmdLineFile[ctr++] = getCmdLineDataStruct("", NULL, -1, NULL); // FIXME: final ctr index is hardcoded in CommandLineParser ctr=0; cmdLineVerb[ctr++] = getCmdLineDataStruct("help", &setHelp, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("fullhelp", &setFullHelp, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("quiet", &setQuiet, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("check", &setHelp, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("options", &setOptions, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("align", &setAlign, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("newtree", &setNewTree, FILARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("usetree", &setUseTree, FILARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("newtree1", &setNewTree1, FILARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("usetree1", &setUseTree1, FILARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("newtree2", &setNewTree2, FILARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("usetree2", &setUseTree2, FILARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("bootstrap", &setBootstrap, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("tree", &setTree, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("quicktree", &setQuickTree, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("convert", &setConvert, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("interactive", &setInteractive, NOARG, NULL); cmdLineVerb[ctr++] = getCmdLineDataStruct("batch", &setBatch, NOARG, NULL); // Mark change 16-feb-2007 I added options for doing LE and iteration cmdLineVerb[ctr++] = getCmdLineDataStruct("iteration", &setDoIteration, OPTARG, iterationArg); cmdLineVerb[ctr++] = getCmdLineDataStruct("", NULL, -1, NULL); // FIXME: final ctr index is hardcoded in CommandLineParser.h // NOTE Start back here!!!!!!!!!!!! ctr=0; cmdLinePara[ctr++] = getCmdLineDataStruct("type", &setType, OPTARG, typeArg); cmdLinePara[ctr++] = getCmdLineDataStruct("profile", &setProfile, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("sequences", &setSequences, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("matrix", &setMatrix, FILARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("dnamatrix", &setDNAMatrix, FILARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("negative", &setNegative, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("noweights", &setNoWeights, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("gapopen", &setGapOpen, FLTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("gapext", &setGapExtend, FLTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("endgaps", &setUseEndGaps, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("nopgap", &setNoPGap, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("nohgap", &setNoHGap, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("novgap", &setNoVGap, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("hgapresidues", &setHGapRes, STRARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("maxdiv", &setMaxDiv, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("gapdist", &setGapDist, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("pwmatrix", &setPWMatrix, FILARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("pwdnamatrix", &setPWDNAMatrix, FILARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("pwgapopen", &setPWGapOpen, FLTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("pwgapext", &setPWGapExtend, FLTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("ktuple", &setKtuple, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("window", &setWindow, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("pairgap", &setPairGap, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("topdiags", &setTopDiags, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("score", &setScore, OPTARG, scoreArg); cmdLinePara[ctr++] = getCmdLineDataStruct("transweight", &setTransWeight, FLTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("seed", &setSeed, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("kimura", &setKimura, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("tossgaps", &setTossGaps, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("bootlabels", &setBootLabels, OPTARG, bootLabelsArg); cmdLinePara[ctr++] = getCmdLineDataStruct("debug", &setDebug, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("output", &setOutput, OPTARG, outputArg); cmdLinePara[ctr++] = getCmdLineDataStruct("outputtree", &setOutputTree, OPTARG, outputTreeArg); cmdLinePara[ctr++] = getCmdLineDataStruct("outfile", &setOutfile, FILARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("outorder", &setOutOrder, OPTARG, outOrderArg); cmdLinePara[ctr++] = getCmdLineDataStruct("case", &setCase, OPTARG, caseArg); cmdLinePara[ctr++] = getCmdLineDataStruct("seqnos", &setSeqNo, OPTARG, seqNoArg); cmdLinePara[ctr++] = getCmdLineDataStruct("seqno_range", &setSeqNoRange, OPTARG, seqNoRangeArg); cmdLinePara[ctr++] = getCmdLineDataStruct("range", &setRange, STRARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("nosecstr1", &setSecStruct1, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("nosecstr2", &setSecStruct2, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("secstrout", &setSecStructOutput, OPTARG, outputSecStrArg); cmdLinePara[ctr++] = getCmdLineDataStruct("helixgap", &setHelixGap, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("strandgap", &setStrandGap, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("loopgap", &setLoopGap, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("terminalgap", &setTerminalGap, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("helixendin", &setHelixEndIn, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("helixendout", &setHelixEndOut, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("strandendin", &setStrandEndIn, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("strandendout",&setStrandEndOut, INTARG, NULL); // NOTE these one was added to test the new LE scoring and iterations cmdLinePara[ctr++] = getCmdLineDataStruct("numiter",&setNumIterations, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("clustering", &setTreeAlgorithm, OPTARG, clusterAlgorithm); cmdLinePara[ctr++] = getCmdLineDataStruct("maxseqlen", &setMaxSeqLen, INTARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("stats", &setStatsFile, FILARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("pim", &setOutputPim, NOARG, NULL); cmdLinePara[ctr++] = getCmdLineDataStruct("", NULL, -1, NULL); // FIXME: final ctr index is hardcoded in CommandLineParser parseParams(args, xmenus); } CommandLineParser::~CommandLineParser() { // Free up menory used here! // NOTE any dynamically allocated memory (new) must be deleted. delete clustalObj; delete typeArg; delete bootLabelsArg; delete outOrderArg; delete caseArg; delete seqNoArg; delete seqNoRangeArg; delete scoreArg; delete outputArg; delete outputTreeArg; delete outputSecStrArg; delete cmdLineType; delete params; delete paramArg; delete clusterAlgorithm; delete iterationArg; } void CommandLineParser::parseParams(StringArray* args, bool xmenus) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Parsing Command line Parameters!\n"); } #endif int i, j, temp; //int len; //static int cl_error_code = 0; //char path[FILENAMELEN]; int numparams = 0; bool doAlign, doConvert, doAlignUseOldTree, doGuideTreeOnly, doTreeFromAlign, doBootstrap, doProfileAlign, doSomething; if (!xmenus && userParameters->getDisplayInfo()) { cout << std::endl << std::endl << std::endl; cout << " CLUSTAL " << userParameters->getRevisionLevel() << " Multiple Sequence Alignments" << std::endl << std::endl << std::endl; } doAlign = doConvert = doAlignUseOldTree = doGuideTreeOnly = doTreeFromAlign = false; doBootstrap = doProfileAlign = doSomething = false; numparams = checkParam(args, params, paramArg); if (numparams < 0) { exit(1); } //********************************************************************** //*** Note: This part of the code is to print out the options with **** //*** their expected value types/ranges **** //********************************************************************** if(setHelp != -1) { userParameters->setHelpFlag(true); if (xmenus) { // gui will display help // but parse rest of args anyway and don't return } else { clustalObj->getHelp('9'); exit(1); } } if(setFullHelp != -1) { userParameters->setFullHelpFlag(true); if (xmenus) { // gui will handle this // but parse rest of args anyway and don't return } else { clustalObj->getFullHelp(); exit(1); } } if(setQuiet != -1) { userParameters->setDisplayInfo(false); utilityObject->beQuiet(true); } else { userParameters->setDisplayInfo(true); utilityObject->beQuiet(false); } // need to check maxseqlen before reading input file if (setMaxSeqLen != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting max allowed sequence length."); } #endif temp = 0; if((*paramArg)[setMaxSeqLen].length() > 0) { if (sscanf((*paramArg)[setMaxSeqLen].c_str(),"%d", &temp) != 1) { reportBadOptionAndExit("maxseqlen", "integer"); } } if(temp > 0) { userParameters->setMaxAllowedSeqLength(temp); } else { cerr << "Cannot use a negative value for maximum sequence length. Using default" << std::endl; } } if (setStatsFile != -1) { if((*paramArg)[setStatsFile].length() > 0) { statsObject->setEnabled(true); statsObject->setStatsFile((*paramArg)[setStatsFile]); } } if (setOutputPim != -1) { userParameters->setOutputPim(true); } /*if(setDoIteration != -1) { userParameters->setDoRemoveFirstIteration(true); }*/ if(setDoIteration != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting iteration parameter."); } #endif if((*paramArg)[setDoIteration].length() > 0) { temp = findMatch((*paramArg)[setDoIteration], iterationArg, 3); if(temp == 0) { userParameters->setDoRemoveFirstIteration(TREE); } else if(temp == 1) { userParameters->setDoRemoveFirstIteration(ALIGNMENT); } else if(temp == 2) { userParameters->setDoRemoveFirstIteration(NONE); } else { cerr << "Unknown option for iteration. Setting to NONE" << std::endl; userParameters->setDoRemoveFirstIteration(NONE); } } } if(setOptions != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Displaying options!\n"); } #endif cout << "clustalw option list:-" << std::endl; for (i = 0; cmdLineVerb[i].str[0] != '\0'; i++) { cout << "\t\t" << default_commandsep << cmdLineVerb[i].str << (*cmdLineType)[cmdLineVerb[i].type]; if (cmdLineVerb[i].type == OPTARG) { if (cmdLineVerb[i].arg != NULL) { cout << "=" << cmdLineVerb[i].arg->at(0); for(j = 1; j < (int)cmdLineVerb[i].arg->size() - 1; j++) { cout << " OR " << cmdLineVerb[i].arg->at(j); } } } cout << std::endl; } for (i = 0; cmdLineFile[i].str[0] != '\0'; i++) { cout<<"\t\t" << default_commandsep << cmdLineFile[i].str << (*cmdLineType)[cmdLineFile[i].type]; if (cmdLineFile[i].type == OPTARG) { if (cmdLineFile[i].arg != NULL) { cout << "=" << cmdLineFile[i].arg->at(0); for(j = 1; j < (int)cmdLineFile[i].arg->size() - 1; j++) { cout << " OR " << cmdLineFile[i].arg->at(j); } } } cout << std::endl; } for (i = 0; cmdLinePara[i].str[0] != '\0'; i++) { cout <<"\t\t" << default_commandsep << cmdLinePara[i].str << (*cmdLineType)[cmdLinePara[i].type]; if (cmdLinePara[i].type == OPTARG) { if (cmdLinePara[i].arg != NULL) { cout << "=" << cmdLinePara[i].arg->at(0); for(j = 1; j < (int)cmdLinePara[i].arg->size() - 1; j++) { cout << " OR " << cmdLinePara[i].arg->at(j); } } } cout << std::endl; } exit(1); } //***************************************************************************** // Check to see if sequence type is explicitely stated..override ************ // the automatic checking (DNA or Protein). /type=d or /type=p ************* //**************************************************************************** if(setType != -1) { string msg; if(((*paramArg)[setType].length()) > 0) { temp = findMatch((*paramArg)[setType], typeArg, 2); if(temp == 0) { userParameters->setDNAFlag(false); userParameters->setExplicitDNAFlag(true); msg = "Sequence type explicitly set to Protein"; cout << msg << std::endl; } else if(temp == 1) { msg = "Sequence type explicitly set to DNA"; cout << msg << std::endl; userParameters->setDNAFlag(true); userParameters->setExplicitDNAFlag(true); } else { msg = "Unknown sequence type " + (*paramArg)[setType]; cerr << std::endl << msg << endl; } #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(msg); } #endif } } //*************************************************************************** // check to see if 1st parameter does not start with '/' i.e. look for an * // input file as first parameter. The input file can also be specified * // by /infile=fname. * //*************************************************************************** // JULIE - moved to checkParam() // if(paramstr[0] != '/') // { // strcpy(seqName, params[0]); // } //************************************************* // Look for /infile=file.ext on the command line * //************************************************* if(setInfile != -1) { if((*paramArg)[setInfile].length() <= 0) { exitWithErrorMsg("Bad sequence file name"); } userParameters->setSeqName((*paramArg)[setInfile]); #if DEBUGFULL if(logObject && DEBUGLOG) { string msg = "Sequence file name (seqName) has been set to "; msg += userParameters->getSeqName(); logObject->logMsg(msg); } #endif } // NOTE keep an eye on this part to see if it works. if(userParameters->getSeqName() != "") { // NOTE I will need to cheack if it has been successful before setting // doSomething to true. int code; code = clustalObj->commandLineReadSeq(1); if(code == OK) { doSomething = true; } else { doSomething = false; exit(1); } } // NOTE call the other function to set the parameter values!! setOptionalParam(); //********************************************************* // Look for /profile1=file.ext AND /profile2=file2.ext * // You must give both file names OR neither. * //******************************************************** if(setProfile1 != -1) { if((*paramArg)[setProfile1].length() <= 0) { exitWithErrorMsg("Bad profile 1 file name"); } clustalObj->profile1Input((*paramArg)[setProfile1]); } if(setProfile2 != -1) { if((*paramArg)[setProfile2].length() <= 0) { exitWithErrorMsg("Bad profile 2 file name"); } if(userParameters->getProfile1Empty()) { exitWithErrorMsg("Only 1 profile file (profile 2) specified."); } clustalObj->profile2Input((*paramArg)[setProfile2]); doSomething = doProfileAlign = true; } //************************************************************************ // Look for /tree or /bootstrap or /align or /usetree ****************** //************************************************************************ if (setBatch != -1) { userParameters->setInteractive(false); } if (setInteractive != -1) { userParameters->setInteractive(true); if (setQuiet!=-1) { cout << "interactive menu: overriding " << default_commandsep << "quiet" << std::endl; userParameters->setDisplayInfo(true); utilityObject->beQuiet(false); } } if (userParameters->getInteractive()) { setTree = -1; setBootstrap = -1; setAlign = -1; setUseTree = -1; setUseTree1 = -1; setUseTree2 = -1; setNewTree = -1; setConvert = -1; } if(setTree != -1 ) { if(userParameters->getEmpty()) { exitWithErrorMsg("Cannot draw tree. No input alignment file"); } else { doTreeFromAlign = true; } } if(setBootstrap != -1) { if(userParameters->getEmpty()) { exitWithErrorMsg("Cannot bootstrap tree. No input alignment file"); } else { temp = 0; // Check if there is anything in the string! if((*paramArg)[setBootstrap].length() > 0) { if (sscanf((*paramArg)[setBootstrap].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("bootstrap", "integer"); } } if(temp > 0) { userParameters->setBootNumTrials(temp); } doBootstrap = true; } } if(setAlign != -1) { if(userParameters->getEmpty()) { exitWithErrorMsg("Cannot align sequences. No input file"); } else { doAlign = true; } } if(setConvert != -1) { if(userParameters->getEmpty()) { exitWithErrorMsg("Cannot convert sequences. No input file"); } else { doConvert = true; } } if(setUseTree != -1) { if(userParameters->getEmpty()) { exitWithErrorMsg("Cannot align sequences. No input file"); } else { if((*paramArg)[setUseTree].length() == 0) { exitWithErrorMsg("Cannot align sequences. No tree file specified"); } else { phylipTreeName = (*paramArg)[setUseTree]; } userParameters->setUseTreeFile(true); doAlignUseOldTree = true; } } if(setNewTree != -1) { if(userParameters->getEmpty()) { exitWithErrorMsg("Cannot align sequences. No input file"); } else { if((*paramArg)[setNewTree].length() == 0) { exitWithErrorMsg("Cannot align sequences. No tree file specified"); } else { phylipTreeName = (*paramArg)[setNewTree]; } userParameters->setNewTreeFile(true); doGuideTreeOnly = true; } } if(setUseTree1 != -1) { if(userParameters->getProfile1Empty()) { exitWithErrorMsg("Cannot align profiles. No input file"); } else if(profileType == SEQUENCE) { reportInvalidOptionAndExit("usetree1"); } else { if((*paramArg)[setUseTree1].length() == 0) { exitWithErrorMsg("Cannot align profiles. No tree file specified"); } else { p1TreeName = (*paramArg)[setUseTree1]; } userParameters->setUseTree1File(true); doAlignUseOldTree = true; } } if(setNewTree1 != -1) { if(userParameters->getProfile1Empty()) { exitWithErrorMsg("Cannot align profiles. No input file"); } else if(profileType == SEQUENCE) { reportInvalidOptionAndExit("newtree1"); } else { if((*paramArg)[setNewTree1].length() == 0) { exitWithErrorMsg("Cannot align profiles. No tree file specified"); } else { p1TreeName = (*paramArg)[setNewTree1]; } userParameters->setNewTree1File(true); } } if(setUseTree2 != -1) { if(userParameters->getProfile2Empty()) { exitWithErrorMsg("Cannot align profiles. No input file"); } else if(profileType == SEQUENCE) { reportInvalidOptionAndExit("usetree2"); } else { if((*paramArg)[setUseTree2].length() == 0) { exitWithErrorMsg("Cannot align profiles. No tree file specified"); } else { p2TreeName = (*paramArg)[setUseTree2]; } userParameters->setUseTree2File(true); doAlignUseOldTree = true; } } if(setNewTree2 != -1) { if(userParameters->getProfile2Empty()) { exitWithErrorMsg("Cannot align profiles. No input file"); } else if(profileType == SEQUENCE) { reportInvalidOptionAndExit("newtree2"); } else { if((*paramArg)[setNewTree2].length() == 0) { exitWithErrorMsg("Cannot align profiles. No tree file specified"); } else { p2TreeName = (*paramArg)[setNewTree2]; } userParameters->setNewTree2File(true); } } if( (!doTreeFromAlign) && (!doBootstrap) && (!userParameters->getEmpty()) && (!doProfileAlign) && (!doAlignUseOldTree) && (!doGuideTreeOnly) && (!doConvert)) { doAlign = true; } //** ? /quicktree if(setQuickTree != -1) { userParameters->setQuickPairAlign(true); } // NOTE if(userParameters->getDNAFlag()) { userParameters->setDNAParams(); } else { userParameters->setProtParams(); } if(userParameters->getInteractive()) { if (!xmenus) { userParameters->setMenuFlag(true); } return; } if(!doSomething) { exitWithErrorMsg("No input file(s) specified"); } //*************************************************************************** // Now do whatever has been requested *************************************** //*************************************************************************** // NOTE This part is obviously not done yet! Functions not working in Clustal!!!! #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Now doing the requested task(s)"); } #endif if(doProfileAlign) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing profile align"); } #endif if (profileType == PROFILE) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Calling Profile_align in clustal obj!"); } #endif clustalObj->profileAlign(&p1TreeName, &p2TreeName); } else { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Calling sequencesAlignToProfile in clustal obj!"); } #endif clustalObj->sequencesAlignToProfile(&phylipTreeName); } } else if(doAlign) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing Alignment"); } #endif clustalObj->align(&phylipTreeName); } else if(doConvert) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing filetype conversion"); } #endif clustalObj->outputNow(); } else if (doAlignUseOldTree) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing Alignment only"); } #endif clustalObj->doAlignUseOldTree(&phylipTreeName); } else if(doGuideTreeOnly) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing tree only"); } #endif clustalObj->doGuideTreeOnly(&phylipTreeName); } else if(doTreeFromAlign) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing tree"); } #endif clustalObj->phylogeneticTree(&phylipTreeName, &clustalTreeName, &distTreeName, &nexusTreeName, pimName); } else if(doBootstrap) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Doing Bootstrap"); } #endif clustalObj->bootstrapTree(&phylipTreeName, &clustalTreeName, &nexusTreeName); } cout << std::endl; } int CommandLineParser::checkParam(StringArray* args, StringArray* params, StringArray* paramArg) { int len, i, j, nameFirst, num; //int k; vector match; match.resize(MAXARGS); bool name1 = false; #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Checking Parameters!"); } #endif if(args->size() == 0) // NOTE I think this will work better! { cout << "The argument list is empty\n"; return 0; } // AW: first arg is an input file if it doesnt start with commandsep if (VALID_COMMAND_SEP.find((*args)[0][0], 0) == string::npos) { name1 = true; params->push_back((*args)[0]); // Put the string at the back of this vector. paramArg->push_back(""); // Push a blank string onto the first element. } else // It is not a file name { params->push_back((*args)[0].substr(1)); } for (i = 1; i < (int)args->size(); i++) { params->push_back(""); // Empty string! for(j = 0; j < (int)(*args)[i].length() - 1; j++) { if(isprint((*args)[i][j + 1])) // Character printable? { // We start at j + 1 because each option should begin with commandsep (*params)[i].append((*args)[i].substr(j + 1, 1)); } } } num = i; // AW: // params are now setup // extract paramArgs in next step if ((int)args->size() > MAXARGS) { cerr << "Error: too many command line arguments\n"; return(-1); } /* special case - first parameter is input fileName */ nameFirst = 0; if(name1 == true) // If name of file is first argument { userParameters->setSeqName((*params)[0]); /* Andreas Wilm (UCD) 2008-03-19: conversion nowadays unnecessary and makes trouble /@ JULIE convert to lower case now @/ #ifndef UNIX if(logObject && DEBUGLOG) { logObject->logMsg("Converting seqName to lower case.\n"); } string temp = ConvertStringToLower(userParameters->getSeqName()); userParameters->setSeqName(temp); #endif */ nameFirst = 1; } // NOTE if name first we should start at the 2nd element in paramArg // This loop is used to set up the paramArg vector! for (i = nameFirst; i < num; i++) { bool has_arg=false; paramArg->push_back(""); // Push a new empty string on. len = (*params)[i].length(); for(j = 0; j < len; j++) { if((*params)[i][j] == '=') { has_arg=true; (*paramArg)[i].assign((*params)[i].substr(j + 1, len - j -1)); // Trim off the bit from the '=' to the end, and put all in lower case! (*params)[i].assign(ConvertStringToLower((*params)[i].substr(0, j))); break; } } // Andreas Wilm (UCD): 2008-03-19: // this convert nonarg params to lowercase (-QuIcKtReE etc) if (!has_arg) { (*params)[i].assign(ConvertStringToLower((*params)[i])); } } if(paramArg->size() != params->size()) { cerr << "There is something wrong with arguments. Lengths different\n"; return -1; } /* for each parameter given on the command line, first search the list of recognised optional parameters.... */ for (i = 0; i < num; i++) { if ((i == 0) && (name1 == true)) { continue; } j = 0; match[i] = -1; for(;;) { if (cmdLinePara[j].str[0] == '\0') { // Think this means we have not found it! break; } if (!(*params)[i].compare(cmdLinePara[j].str)) { match[i] = j; // Match has been found! *cmdLinePara[match[i]].flag = i; if ((cmdLinePara[match[i]].type != NOARG) && ((*paramArg)[i] == "")) { cerr << "Error: parameter required for " << default_commandsep << (*params)[i] << endl; return -1; /* Andreas Wilm (UCD) 2008-03-19: * conversion nowadays unnecessary and breaks things * * // JULIE * // convert parameters to lower case now, unless the parameter is a fileName * #ifdef UNIX * else if (cmdLinePara[match[i]].type != FILARG && (*paramArg)[i] != "") * #endif */ } else if (cmdLinePara[match[i]].type != FILARG && (*paramArg)[i] != "") { if ((*paramArg)[i] != "") { // lowercase arg if not a filename to support mixed case (*paramArg)[i].assign(ConvertStringToLower((*paramArg)[i])); } } break; } j++; } } /* ....then the list of recognised input files,.... */ for (i = 0; i < num; i++) { if ((i == 0) && (name1 == true)) { continue; } if (match[i] != -1) { continue; } j = 0; for(;;) { if (cmdLineFile[j].str[0] == '\0') { // Have not found a match! break; } if (!(*params)[i].compare(cmdLineFile[j].str)) { match[i] = j; *cmdLineFile[match[i]].flag = i; if ((cmdLineFile[match[i]].type != NOARG) && ((*paramArg)[i] == "")) { cerr << "Error: parameter required for " << default_commandsep << (*params)[i] << endl; return -1; } break; } j++; } } /* ....and finally the recognised verbs. */ for (i = 0; i < num; i++) { if ((i == 0) && (name1 == true)) { continue; } if (match[i] != -1) { continue; } j = 0; for(;;) { if (cmdLineVerb[j].str[0] == '\0') { // Havent found it! break; } if (!(*params)[i].compare(cmdLineVerb[j].str)) { match[i] = j; *cmdLineVerb[match[i]].flag = i; if ((cmdLineVerb[match[i]].type != NOARG) && ((*paramArg)[i] == "")) { cerr << "Error: parameter required for " << default_commandsep << (*params)[i] << endl; return -1; } break; } j++; } } /* check for any unrecognised parameters. */ for (i = 0; i < num; i++) { if (match[i] == -1) { cerr << "Error: unknown option " << default_commandsep << (*params)[i] << endl; return -1; } } return(num); } void CommandLineParser::setOptionalParam() { int temp; int _ktup, _windgap, _signif, _window = 0; string _matrixname; //int c, i; float ftemp; //char tstr[100]; #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Setting optional parameters."); } #endif //**************************************************************************** //* look for parameters on command line e.g. gap penalties, k-tuple etc. * //**************************************************************************** // Mark change 16-2-2007. if(setNumIterations != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting num iterations parameter."); } #endif temp = 0; if((*paramArg)[setNumIterations].length() > 0) { if (sscanf((*paramArg)[setNumIterations].c_str(),"%d", &temp) != 1) { reportBadOptionAndExit("numiter", "int"); temp = 0; } } if(temp > 0) { userParameters->setNumIterations(temp); } else { exitWithErrorMsg("Cannot use a negative value for number of iterations."); } } //** ? /score=percent or /score=absolute * if(setScore != -1) { if((*paramArg)[setScore].length() > 0) { temp = findMatch((*paramArg)[setScore], scoreArg, 2); if(temp == 0) { userParameters->setPercent(true); #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting score parameter = percent"); } #endif } else if(temp == 1) { userParameters->setPercent(false); #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting score parameter = absolute"); } #endif } else { cerr << "\nUnknown SCORE type: " << (*paramArg)[setScore] << endl; #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" problem setting score type!!!!!"); } #endif } } } // NOTE I decided to stay with sscanf for getting the int. Options in c++ no better. //** ? /seed=n * if(setSeed != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting seed parameter."); } #endif temp = 0; if((*paramArg)[setSeed].length() > 0) { if (sscanf((*paramArg)[setSeed].c_str(),"%d",&temp) != 1) { reportBadOptionAndExit("seed", "integer"); } } if(temp > 0) { userParameters->setBootRanSeed(temp); } cout<< "\ntemp = " << temp << "; seed = " << userParameters->getBootRanSeed() << ";\n"; } if(setTreeAlgorithm != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting clustering algorithm parameter."); } #endif if((*paramArg)[setTreeAlgorithm].length() > 0) { temp = findMatch((*paramArg)[setTreeAlgorithm], clusterAlgorithm, 2); if(temp == 0) { userParameters->setClusterAlgorithm(NJ); } else if(temp == 1) { userParameters->setClusterAlgorithm(UPGMA); } else { cerr << "Unknown option for clustering algorithm. Using default\n"; userParameters->setClusterAlgorithm(NJ); } } } //** ? /output=PIR, GCG, GDE or PHYLIP * if(setOutput != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting output parameter."); } #endif if((*paramArg)[setOutput].length() > 0) { temp = findMatch((*paramArg)[setOutput], outputArg, 7); if (temp >= 0 && temp <= 6) { userParameters->setOutputClustal(false); userParameters->setOutputGCG(false); userParameters->setOutputPhylip(false); userParameters->setOutputNbrf(false); userParameters->setOutputGde(false); userParameters->setOutputNexus(false); userParameters->setOutputFasta(false); } switch (temp) { case 0: // GCG userParameters->setOutputGCG(true); break; case 1: // GDE userParameters->setOutputGde(true); break; case 2: // PIR userParameters->setOutputNbrf(true); break; case 3: // PHYLIP userParameters->setOutputPhylip(true); break; case 4: // NEXUS userParameters->setOutputNexus(true); break; case 5: // FASTA userParameters->setOutputFasta(true); break; case 6: // CLUSTAL userParameters->setOutputClustal(true); break; default: // FIXME AW: 1.83 behaves the same, but shouldnt // we exit here? exitWithErrorMsg("Unknown OUTPUT type: " + (*paramArg)[setOutput]); } } } //** ? /outputtree=NJ or PHYLIP or DIST or NEXUS if(setOutputTree != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting outputtree parameter."); } #endif if((*paramArg)[setOutputTree].length() > 0) { temp = findMatch((*paramArg)[setOutputTree], outputTreeArg, 4); switch (temp) { case 0: // NJ userParameters->setOutputTreeClustal(true); break; case 1: // PHYLIP userParameters->setOutputTreePhylip(true); break; case 2: // DIST userParameters->setOutputTreeDistances(true); break; case 3: // NEXUS userParameters->setOutputTreeNexus(true); break; default: cerr << "\nUnknown OUTPUT TREE type: " << (*paramArg)[setOutputTree] << endl; } } } //** ? /profile (sets type of second input file to profile) if(setProfile != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting profileType = PROFILE."); } #endif profileType = PROFILE; } //** ? /sequences (sets type of second input file to list of sequences) if(setSequences != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting profileType = SEQUENCE."); } #endif profileType = SEQUENCE; } //** ? /ktuple=n if(setKtuple != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting ktup parameter."); } #endif _ktup = 0; if((*paramArg)[setKtuple].length() > 0) { if (sscanf((*paramArg)[setKtuple].c_str(),"%d",&_ktup)!=1) { reportBadOptionAndExit("ktuple", "integer"); _ktup = 0; } } if(_ktup > 0) { if(userParameters->getDNAFlag()) { if(_ktup <= 4) { userParameters->setKtup(_ktup); userParameters->setDNAKtup(_ktup); userParameters->setWindowGap(_ktup + 4); userParameters->setDNAWindowGap(_ktup + 4); } else { // see comment in bug 185 cerr << "WARNING: Ignoring invalid ktuple of " << _ktup << " (must be <=4)" << std::endl; } } else { if(_ktup <= 2) { userParameters->setKtup(_ktup); userParameters->setAAKtup(_ktup); userParameters->setWindowGap(_ktup + 3); userParameters->setAAWindowGap(_ktup + 3); // AW: why set setDNAWindowGap? we are in AA mode // userParameters->setDNAWindowGap(_ktup + 4); } else { // see comment in bug 185 cerr << "WARNING: Ignoring invalid ktuple of " << _ktup << " (must be <=2)" << std::endl; } } } } //** ? /pairgap=n if(setPairGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting pairgap parameter."); } #endif _windgap = 0; if((*paramArg)[setPairGap].length() > 0) { if (sscanf((*paramArg)[setPairGap].c_str(),"%d",&_windgap)!=1) { reportBadOptionAndExit("pairgap", "integer"); _windgap = 0; } if(_windgap > 0) { if(userParameters->getDNAFlag()) { if(_windgap > userParameters->getKtup()) { userParameters->setWindowGap(_windgap); userParameters->setDNAWindowGap(_windgap); } } else { if(_windgap > userParameters->getKtup()) { userParameters->setWindowGap(_windgap); userParameters->setAAWindowGap(_windgap); } } } } } //** ? /topdiags=n if(setTopDiags != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting topdiags parameter."); } #endif _signif = 0; if((*paramArg)[setTopDiags].length() > 0) { if (sscanf((*paramArg)[setTopDiags].c_str(),"%d",&_signif)!=1) { reportBadOptionAndExit("topdiags", "integer"); } } if(_signif > 0) { if(userParameters->getDNAFlag()) { if(_signif > userParameters->getKtup()) { userParameters->setSignif(_signif); userParameters->setDNASignif(_signif); } } else { if(_signif > userParameters->getKtup()) { userParameters->setSignif(_signif); userParameters->setAASignif(_signif); } } } } //** ? /window=n if(setWindow != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting window parameter."); } #endif _window = 0; if((*paramArg)[setWindow].length() > 0) { if (sscanf((*paramArg)[setWindow].c_str(),"%d",&_window)!=1) { reportBadOptionAndExit("window", "integer"); _window = 0; } } if(_window > 0) { if(userParameters->getDNAFlag()) { if(_window > userParameters->getKtup()) { userParameters->setWindow(_window); userParameters->setDNAWindow(_window); } } else { if(_window > userParameters->getKtup()) { userParameters->setWindow(_window); userParameters->setAAWindow(_window); } } } } //** ? /kimura if(setKimura != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting kimura=true"); } #endif userParameters->setKimura(true); } //** ? /tossgaps if(setTossGaps != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting tossgaps=true"); } #endif userParameters->setTossGaps(true); } //** ? /negative if(setNegative != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting useNegMatrix=true"); } #endif userParameters->setUseNegMatrix(true); } //** ? /noweights if(setNoWeights!= -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting noweights=true"); } #endif userParameters->setNoWeights(true); } //** ? /pwmatrix=ID (user's file) if(setPWMatrix != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting pwmatrix parameter."); } #endif temp = (*paramArg)[setPWMatrix].length(); if(temp > 0) { _matrixname = ConvertStringToLower((*paramArg)[setPWMatrix]); if (_matrixname.compare("blosum") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 1, Protein, Pairwise); } else if (_matrixname.compare("pam") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 2, Protein, Pairwise); } else if (_matrixname.compare("gonnet") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 3, Protein, Pairwise); } else if (_matrixname.compare("id") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 4, Protein, Pairwise); } else { char hackTempName[FILENAMELEN + 1]; strcpy(hackTempName, (*paramArg)[setPWMatrix].c_str()); if(subMatrix->getUserMatFromFile(hackTempName, Protein, Pairwise)) { subMatrix->setCurrentNameAndNum((*paramArg)[setPWMatrix], 5, Protein, Pairwise); pwUserMatrixName = (*paramArg)[setPWMatrix]; } else exit(1); } } } //** ? /matrix=ID (user's file) if(setMatrix != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting matrix parameter."); } #endif temp = (*paramArg)[setMatrix].length(); if(temp > 0) { _matrixname = ConvertStringToLower((*paramArg)[setMatrix]); if (_matrixname.compare("blosum")==0) { subMatrix->setCurrentNameAndNum(_matrixname, 1, Protein, MultipleAlign); } else if (_matrixname.compare("pam")==0) { subMatrix->setCurrentNameAndNum(_matrixname, 2, Protein, MultipleAlign); } else if (_matrixname.compare("gonnet")==0) { subMatrix->setCurrentNameAndNum(_matrixname, 3, Protein, MultipleAlign); } else if (_matrixname.compare("id")==0) { subMatrix->setCurrentNameAndNum(_matrixname, 4, Protein, MultipleAlign); } else { char hackTempName[FILENAMELEN + 1]; strcpy(hackTempName, (*paramArg)[setMatrix].c_str()); if(subMatrix->getUserMatSeriesFromFile(hackTempName)) { subMatrix->setCurrentNameAndNum((*paramArg)[setMatrix], 4, Protein, MultipleAlign); userMatrixName = (*paramArg)[setMatrix]; } else exit(1); } } } //** ? /pwdnamatrix=ID (user's file) if(setPWDNAMatrix != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting pwdnamatrix parameter."); } #endif temp = (*paramArg)[setPWDNAMatrix].length(); if(temp > 0) { _matrixname = ConvertStringToLower((*paramArg)[setPWDNAMatrix]); if (_matrixname.compare("iub") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 1, DNA, Pairwise); } else if (_matrixname.compare("clustalw") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 2, DNA, Pairwise); } else { char hackTempName[FILENAMELEN + 1]; strcpy(hackTempName, (*paramArg)[setPWDNAMatrix].c_str()); if(subMatrix->getUserMatFromFile(hackTempName, DNA, Pairwise)) { subMatrix->setCurrentNameAndNum((*paramArg)[setPWDNAMatrix], 3, Protein, Pairwise); pwDNAUserMatrixName = (*paramArg)[setPWDNAMatrix]; } else exit(1); } } } //** ? /dnamatrix=ID (user's file) if(setDNAMatrix != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting dnamatrix parameter."); } #endif temp = (*paramArg)[setDNAMatrix].length(); if(temp > 0) { _matrixname = ConvertStringToLower((*paramArg)[setDNAMatrix]); if (_matrixname.compare("iub") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 1, DNA, MultipleAlign); } else if (_matrixname.compare("clustalw") == 0) { subMatrix->setCurrentNameAndNum(_matrixname, 2, DNA, MultipleAlign); } else { char hackTempName[FILENAMELEN + 1]; strcpy(hackTempName, (*paramArg)[setDNAMatrix].c_str()); if(subMatrix->getUserMatFromFile(hackTempName, DNA, MultipleAlign)) { subMatrix->setCurrentNameAndNum((*paramArg)[setDNAMatrix], 3, Protein, MultipleAlign); DNAUserMatrixName = (*paramArg)[setDNAMatrix]; } else exit(1); } } } //** ? /maxdiv= n if(setMaxDiv != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting maxdiv parameter."); } #endif temp = 0; if((*paramArg)[setMaxDiv].length() > 0) { if (sscanf((*paramArg)[setMaxDiv].c_str(),"%d",&temp)!=1) { reportBadOptionAndExit("maxdiv", "integer"); temp = 0; } } if (temp >= 0) { userParameters->setDivergenceCutoff(temp); } } //** ? /gapdist= n if(setGapDist != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting gapdist parameter."); } #endif temp = 0; if((*paramArg)[setGapDist].length() > 0) if (sscanf((*paramArg)[setGapDist].c_str(),"%d",&temp)!=1) { reportBadOptionAndExit("gapdist", "integer"); } if (temp >= 0) { userParameters->setGapDist(temp); } } //** ? /debug= n if(setDebug != -1) { temp = 0; if((*paramArg)[setDebug].length() > 0) if (sscanf((*paramArg)[setDebug].c_str(),"%d",&temp)!=1) { reportBadOptionAndExit("debug", "integer"); } if (temp >= 0) { userParameters->setDebug(temp); } } //** ? /outfile= (user's file) if(setOutfile != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting outfile parameter."); } #endif if((*paramArg)[setOutfile].length() > 0) { userParameters->setOutfileName((*paramArg)[setOutfile]); } } //*** ? /case= lower/upper if(setCase != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting case parameter."); } #endif if((*paramArg)[setCase].length() > 0) { temp = findMatch((*paramArg)[setCase], caseArg, 2); if(temp == 0) { userParameters->setLowercase(true); } else if(temp == 1) { userParameters->setLowercase(false); } else { cerr << "\nUnknown case " << (*paramArg)[setCase] << endl; } } } //*** ? /seqnos=off/on if(setSeqNo != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting seqnos parameter."); } #endif if((*paramArg)[setSeqNo].length() > 0) { temp = findMatch((*paramArg)[setSeqNo], seqNoArg, 2); if(temp == 0) { userParameters->setClSeqNumbers(false); } else if(temp == 1) { userParameters->setClSeqNumbers(true); } else { cerr << "\nUnknown SEQNO option " << (*paramArg)[setSeqNo] << endl; } } } if(setSeqNoRange != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting seqno range parameter."); } #endif if((*paramArg)[setSeqNoRange].length() > 0) { temp = findMatch((*paramArg)[setSeqNoRange], seqNoRangeArg, 2); cout << "\n comparing " << "\nparamArg[setSeqNoRange]= " << (*paramArg)[setSeqNoRange] << "\n comparing \n "; if(temp == 0) { userParameters->setSeqRange(false); } else if(temp == 1) { userParameters->setSeqRange(true); } else { cerr << "\nUnknown Sequence range option " << (*paramArg)[setSeqNoRange] << endl; } } } //*** ? /range=n:m if(setRange != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting range parameter."); } #endif temp = 0; if((*paramArg)[setRange].length() > 0) { // NOTE I have made a big change here! Mark march 14th 2006. This was being done // in the Alignment output functions. int iFirstRes = -1; int iLastRes = -1; char ignore; if (sscanf((*paramArg)[setRange].c_str(), "%d%[ :,-]%d", &iFirstRes, &ignore, &iLastRes) != 3) { cerr << "setRange: Syntax Error: Cannot set range, should be from:to \n"; } else { userParameters->setRangeFrom(iFirstRes); userParameters->setRangeTo(iLastRes); } } } //*** ? /gapopen=n if(setGapOpen != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting gapopen parameter."); } #endif ftemp = 0.0; if((*paramArg)[setGapOpen].length() > 0) { if (sscanf((*paramArg)[setGapOpen].c_str(),"%f",&ftemp) != 1) { reportBadOptionAndExit("gapopen", "real number"); ftemp = 0.0; } if(ftemp >= 0.0) { if( userParameters->getDNAFlag()) { userParameters->setGapOpen(ftemp); userParameters->setDNAGapOpen(ftemp); } else { userParameters->setGapOpen(ftemp); userParameters->setProteinGapOpen(ftemp); } } } } //*** ? /gapext=n if(setGapExtend != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting gap extention parameter."); } #endif ftemp = 0.0; if((*paramArg)[setGapExtend].length() > 0) { if (sscanf((*paramArg)[setGapExtend].c_str(),"%f",&ftemp) != 1) { reportBadOptionAndExit("gapext", "real number"); ftemp = 0.0; } if(ftemp >= 0) { if(userParameters->getDNAFlag()) { userParameters->setGapExtend(ftemp); userParameters->setDNAGapExtend(ftemp); } else { userParameters->setGapExtend(ftemp); userParameters->setProteinGapExtend(ftemp); } } } } //*** ? /transweight=n if(setTransWeight != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting transweight parameter."); } #endif ftemp = 0.0; if((*paramArg)[setTransWeight].length() > 0) { if (sscanf((*paramArg)[setTransWeight].c_str(), "%f", &ftemp) != 1) { reportBadOptionAndExit("transweight", "real number"); } } userParameters->setTransitionWeight(ftemp); } //*** ? /pwgapopen=n if(setPWGapOpen != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting pwgapopen parameter."); } #endif ftemp = 0.0; if((*paramArg)[setPWGapOpen].length() > 0) { if (sscanf((*paramArg)[setPWGapOpen].c_str(), "%f", &ftemp) != 1) { reportBadOptionAndExit("pwgapopen", "real number"); } } if(ftemp >= 0.0) { if(userParameters->getDNAFlag()) { userParameters->setPWGapOpen(ftemp); userParameters->setDNAPWGapOpenPenalty(ftemp); } else { userParameters->setPWGapOpen(ftemp); userParameters->setProteinPWGapOpenPenalty(ftemp); } } } //*** ? /gapext=n if(setPWGapExtend != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting pwgapext parameter."); } #endif ftemp = 0.0; if((*paramArg)[setPWGapExtend].length() > 0) { if (sscanf((*paramArg)[setPWGapExtend].c_str(), "%f", &ftemp) != 1) { reportBadOptionAndExit("pwgapext", "real number"); } } if(ftemp >= 0) { if(userParameters->getDNAFlag()) { userParameters->setPWGapExtend(ftemp); userParameters->setDNAPWGapExtendPenalty(ftemp); } else { userParameters->setPWGapExtend(ftemp); userParameters->setProteinPWGapExtendPenalty(ftemp); } } } //*** ? /outorder=n if(setOutOrder != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting outorder parameter."); } #endif if((*paramArg)[setOutOrder].length() > 0) { temp = findMatch((*paramArg)[setOutOrder],outOrderArg,2); } if(temp == 0) { userParameters->setOutputOrder(INPUT); } else if(temp == 1) { userParameters->setOutputOrder(ALIGNED); } else { cerr << "\nUnknown OUTPUT ORDER type " << (*paramArg)[setOutOrder] << endl; } } //*** ? /bootlabels=n if(setBootLabels != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting bootlabels parameter."); } #endif if((*paramArg)[setBootLabels].length() > 0) { temp = findMatch((*paramArg)[setBootLabels], bootLabelsArg, 2); } if(temp == 0) { userParameters->setBootstrapFormat(BS_NODE_LABELS); } else if(temp == 1) { userParameters->setBootstrapFormat(BS_BRANCH_LABELS); } else { cerr << "\nUnknown bootlabels type " << (*paramArg)[setBootLabels] << endl; } } //*** ? /endgaps if(setUseEndGaps != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting useendgaps=true"); } #endif userParameters->setUseEndGaps(false); } //*** ? /nopgap if(setNoPGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting noPrefPenalties=true"); } #endif userParameters->setNoPrefPenalties(true); } //*** ? /nohgap if(setNoHGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting nohgap=true"); } #endif userParameters->setNoHydPenalties(true); } //*** ? /novgap if(setNoVGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting novgap=false"); } #endif userParameters->setNoVarPenalties(false); } //*** ? /hgapresidues="string" // NOTE I have made some big changes here. It looks as if there was an error here! if(setHGapRes != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting hgapresidues parameter."); } #endif userParameters->setHydResidues((*paramArg)[setHGapRes]); } //*** ? /nosecstr1 if(setSecStruct1 != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting useSS1=false"); } #endif userParameters->setUseSS1(false); } //*** ? /nosecstr2 if(setSecStruct2 != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting useSS2=false"); } #endif userParameters->setUseSS2(false); } //*** ? /secstroutput if(setSecStructOutput != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting secstroutput parameter."); } #endif if((*paramArg)[setSecStructOutput].length() > 0) { temp = findMatch((*paramArg)[setSecStructOutput], outputSecStrArg, 4); if(temp >= 0 && temp <= 3) { userParameters->setOutputStructPenalties(temp); } else { cerr << "\nUnknown case " << (*paramArg)[setSecStructOutput] << endl; } } } //*** ? /helixgap= n if(setHelixGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting helixgap parameter."); } #endif temp = 0; if((*paramArg)[setHelixGap].length() > 0) { if (sscanf((*paramArg)[setHelixGap].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("helixgap", "integer"); } } if (temp >= 1 && temp <= 9) { userParameters->setHelixPenalty(temp); } } //*** ? /strandgap= n if(setStrandGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting strandgap parameter."); } #endif temp = 0; if((*paramArg)[setStrandGap].length() > 0) { if (sscanf((*paramArg)[setStrandGap].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("strandgap", "integer"); } } if (temp >= 1 && temp <= 9) { userParameters->setStrandPenalty(temp); } } //*** ? /loopgap= n if(setLoopGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting loopgap parameter."); } #endif temp = 0; if((*paramArg)[setLoopGap].length() > 0) { if (sscanf((*paramArg)[setLoopGap].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("loopgap", "integer"); } } if (temp >= 1 && temp <= 9) { userParameters->setLoopPenalty(temp); } } //*** ? /terminalgap= n if(setTerminalGap != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting terminalgap parameter."); } #endif temp = 0; if((*paramArg)[setTerminalGap].length() > 0) { if (sscanf((*paramArg)[setTerminalGap].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("terminalgap", "integer"); temp = 0; } } if (temp >= 1 && temp <= 9) { userParameters->setHelixEndPenalty(temp); userParameters->setStrandEndPenalty(temp); } } //*** ? /helixendin= n if(setHelixEndIn != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting helixendin parameter."); } #endif temp = 0; if((*paramArg)[setHelixEndIn].length() > 0) { if (sscanf((*paramArg)[setHelixEndIn].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("helixendin", "integer"); temp = 0; } } if (temp >= 0 && temp <= 3) { userParameters->setHelixEndMinus(temp); } } //*** ? /helixendout= n if(setHelixEndOut != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting helixendout parameter."); } #endif temp = 0; if((*paramArg)[setHelixEndOut].length() > 0) { if (sscanf((*paramArg)[setHelixEndOut].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("helixendout", "integer"); temp = 0; } } if (temp >= 0 && temp <= 3) { userParameters->setHelixEndPlus(temp); } } //*** ? /strandendin= n if(setStrandEndIn != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting strandendin parameter."); } #endif temp = 0; if((*paramArg)[setStrandEndIn].length() > 0) { if (sscanf((*paramArg)[setStrandEndIn].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("strandendin", "integer"); } } if (temp >= 0 && temp <= 3) { userParameters->setStrandEndMinus(temp); } } //*** ? /strandendout= n if(setStrandEndOut != -1) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg(" Setting strandendout parameter."); } #endif temp = 0; if((*paramArg)[setStrandEndOut].length() > 0) { if (sscanf((*paramArg)[setStrandEndOut].c_str(), "%d", &temp) != 1) { reportBadOptionAndExit("strandendout", "integer"); } } if (temp >= 0 && temp <= 3) { userParameters->setStrandEndPlus(temp); } } } int CommandLineParser::findMatch(string probe, StringArray* list, int n) { int i, j, len; int count, match=0; len = probe.length(); for (i = 0; i < len; i++) { count = 0; for (j = 0; j < n; j++) { if (probe[i] == (*list)[j][i]) { match = j; count++; } } if (count == 0) { return((int)-1); } if (count == 1) { return(match); } } return((int)-1); } /* * The function getCmdLineDataStruct is used to return a struct with the values * specified. It returns it by value. I cant return by reference or else the object will * be destroyed. */ CmdLineData CommandLineParser::getCmdLineDataStruct(const char *str, int *flag, int type, StringArray* arg) { CmdLineData tempStruct = {str, flag, type, arg}; return tempStruct; } void CommandLineParser::printCmdLineData(const CmdLineData& temp) { std::cout << "The str is: " << temp.str << std::endl; std::cout << "The int* is: " << *(temp.flag) << std::endl; std::cout << "The type is: " << temp.type << std::endl; std::cout << "The StringArray is: " << std::endl; if(temp.arg == NULL) { std::cout << " NULL" << std::endl; } else { cout << "The number of elements is " << temp.arg->size() << std::endl; for(int i = 0; i < (int)temp.arg->size(); i++) { cout << "The " << i << "th element is: " << temp.arg->at(i) << endl; } } } /* * Helper function to change string to lower case. * */ string CommandLineParser::ConvertStringToLower(string strToConvert) { for(unsigned int i=0;i #include using namespace std; namespace clustalw { typedef SymMatrix DistMatrix; typedef std::vector > TreeGroups; struct TreeNames { string phylipName; string clustalName; string distName; string nexusName; string pimName; }; struct AlignmentFileNames { string treeFile; string profile2TreeFile; string clustalFile; string nrbfFile; string gcgFile; string phylipFile; string gdeFile; string nexusFile; string fastaFile; }; struct TreeNode { // phylogenetic tree structure struct TreeNode *left; struct TreeNode *right; struct TreeNode *parent; float dist; int leaf; int order; string name; }; struct PhyloTree { TreeGroups treeDesc; vector leftBranch; vector rightBranch; }; struct SeqInfo { int firstSeq; int lastSeq; int numSeqs; }; struct LowScoreSegParams { int firstSeq; int nSeqs; int lastSeq; int nCols; vector* seqWeight; Array2D* lowScoreRes; bool seqWeightCalculated; }; /* Global constants */ const int extraEndElemNum = 2; const int ENDALN = 127; const int OK = -200; const int CANNOTOPENFILE = -300; const int NOSEQUENCESINFILE = -400; const int OTHERERROR = -500; const int ALLNAMESNOTDIFFERENT = -600; const int MUSTREADINPROFILE1FIRST = -700; const int EMPTYSEQUENCE = -800; const int SEQUENCETOOBIG = -900; const int BADFORMAT = -1000; const int AABLOSUM = 0; const int AAPAM = 1; const int AAGONNET = 2; const int AAIDENTITY = 3; const int AAUSERDEFINED = 4; const int PWAABLOSUM = 0; const int PWAAPAM = 1; const int PWAAGONNET = 2; const int PWAAIDENTITY = 3; const int PWAAUSER = 4; const int DNAIUB = 0; const int DNACLUSTALW = 1; const int DNAUSERDEFINED = 2; const int AAHISTIDENTITY = 0; const int AAHISTGONNETPAM80 = 1; const int AAHISTGONNETPAM120 = 2; const int AAHISTGONNETPAM250 = 3; const int AAHISTGONNETPAM350 = 4; const int AAHISTUSER = 5; const int QTAASEGGONNETPAM80 = 0; const int QTAASEGGONNETPAM120 = 1; const int QTAASEGGONNETPAM250 = 2; const int QTAASEGGONNETPAM350 = 3; const int QTAASEGUSER = 4; const int MAXHYDRESIDUES = 9; // Only allowing 9 hyd residue choices const int Protein = 0; const int DNA = 1; const int Pairwise = 0; const int MultipleAlign = 1; const int OUTSECST = 0; const int OUTGAP = 1; const int OUTBOTH = 2; const int OUTNONE = 3; const int MAXNAMES = 150; /* Max chars read for seq. names */ //nige, was 30 //const int MAXNAMESTODISPLAY = 30; // Used for printout. Mark 18-7-07 //const int MAXNAMESTODISPLAY = 10; // Bug #53. Paul 20-12-07 const int MAXNAMESTODISPLAY = 30; //Paul replicate 1.83 behavour 9-2-08 const int MINNAMESTODISPLAY = 10; //Paul replicate 1.83 behavour 9-2-08 const int MAXTITLES = 60; /* Title length */ const int FILENAMELEN = 256; /* Max. file name length */ const int UNKNOWN = 0; const int EMBLSWISS = 1; const int PIR = 2; const int PEARSON = 3; const int GDE = 4; const int CLUSTAL = 5; /* DES */ const int MSF = 6; /* DES */ const int RSF = 7; /* JULIE */ const int USER = 8; /* DES */ const int PHYLIP = 9; /* DES */ const int NEXUS = 10; /* DES */ const int FASTA = 11; /* Ramu */ const int NONE = 0; const int SECST = 1; const int GMASK = 2; const int PROFILE = 0; const int SEQUENCE = 1; const int BS_NODE_LABELS = 2; const int BS_BRANCH_LABELS = 1; const int PAGE_LEN = 22; /* Number of lines of help sent to screen */ const int PAGEWIDTH = 80; /* maximum characters on output file page */ const int LINELENGTH = 60; /* Output file line length */ const int GCG_LINELENGTH = 50; const int NJ = 1; const int UPGMA = 2; const int ALIGNMENT = 1; const int TREE = 2; const int MinIdentifier = 1; const string VALID_COMMAND_SEP = "-/"; #ifdef OS_MAC const char default_commandsep = '-'; const char DIRDELIM = '/'; const int INT_SCALE_FACTOR = 100; /* Scaling factor to convert float to integer for profile scores */ #elif OS_WINDOWS const char default_commandsep = '/'; const char DIRDELIM = '\\'; const int INT_SCALE_FACTOR = 100; /* Scaling factor to convert float to integer for profile scores */ #elif OS_UNIX const char default_commandsep = '-'; const char DIRDELIM = '/'; const int INT_SCALE_FACTOR = 1000; /* Scaling factor to convert float to integer for profile scores */ #endif const int NUMRES = 32; /* max size of comparison matrix */ const int INPUT = 0; const int ALIGNED = 1; const int LEFT = 1; const int RIGHT = 2; const int NODE = 0; const int LEAF = 1; const int GAPCOL = 32; /* position of gap open penalty in profile */ const int LENCOL = 33; /* position of gap extension penalty in profile */ typedef struct { /* Holds values for the pairwise scales */ float gapOpenScale; float gapExtendScale; int intScale; }PairScaleValues; typedef struct { float scale; float intScale; }PrfScaleValues; typedef struct node { /* phylogenetic tree structure */ struct node *left; struct node *right; struct node *parent; float dist; int leaf; int order; char name[64]; } stree, *treeptr; typedef struct { char title[30]; char string[30]; } MatMenuEntry; typedef struct { int noptions; MatMenuEntry opt[10]; } MatMenu; const int MAXMAT = 10; typedef struct { int llimit; int ulimit; vector* matptr; vector* AAXref; } SeriesMat; /* * UserMatSeries holds the number of matrices in the series and */ typedef struct { int nmat; SeriesMat mat[MAXMAT]; } UserMatrixSeries; } #endif clustalx-2.1/clustalW/general/VectorUtility.h0000644000175000017500000000273511470725216021237 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef VECTORUTILITY_H #define VECTORUTILITY_H #include namespace vectorutils { /** * The function mergeVectors will add the contents of * the other two vectors to the end of the first vector. */ template void mergeVectors(std::vector* vecToAddTo, std::vector* vector1, std::vector* vector2) { typename std::vector::iterator beginInsert = vecToAddTo->end(); typename std::vector::iterator beginVec1 = vector1->begin(); typename std::vector::iterator endVec1 = vector1->end(); typename std::vector::iterator beginVec2 = vector2->begin(); typename std::vector::iterator endVec2 = vector2->end(); // Add the first vector vecToAddTo->insert(beginInsert, beginVec1, endVec1); beginInsert = vecToAddTo->end(); // Add the second vector vecToAddTo->insert(beginInsert, beginVec2, endVec2); } /** * The function mergeVectors will add the contents of the second vector * to the end of the first vector. */ template void mergeVectors(std::vector* vecToAddTo, std::vector* vector1) { typename std::vector::iterator beginInsert = vecToAddTo->end(); typename std::vector::iterator beginVec1 = vector1->begin(); typename std::vector::iterator endVec1 = vector1->end(); // Add the first vector vecToAddTo->insert(beginInsert, beginVec1, endVec1); } } #endif clustalx-2.1/clustalW/general/VectorOutOfRange.h0000644000175000017500000000116311470725216021577 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #include #include namespace clustalw { class VectorOutOfRange : public std::exception { public: VectorOutOfRange(std::string vectorName, int index, int max) : _name(vectorName), _index(index), _max(max) {} ~VectorOutOfRange() throw(); int index(){return _index;} int max(){return _max;} const char* what() const throw(); const char* what(); private: std::string _name; int _index; int _max; }; } clustalx-2.1/clustalW/general/VectorOutOfRange.cpp0000644000175000017500000000163311470725216022134 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "VectorOutOfRange.h" #include #include #include namespace clustalw { VectorOutOfRange::~VectorOutOfRange() throw() { // Dont need to do anything } const char* VectorOutOfRange::what() const throw() { std::ostringstream message; message << "\nIn Vector "<< _name << ", vector index " << _index << " exceeds bounds 1-" << _max << "\n"; std::string outputMessage = message.str(); return outputMessage.c_str(); } const char* VectorOutOfRange::what() { std::ostringstream message; message << "\nIn Vector "<< _name << ", vector index " << _index << " exceeds bounds 1-" << _max << "\n"; std::string outputMessage = message.str(); return outputMessage.c_str(); } } clustalx-2.1/clustalW/general/Utility.h0000644000175000017500000000432611470725216020052 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef UTILITY_H #define UTILITY_H #include #include #include #include #include #include #include "clustalw.h" class QLabel; using namespace std; namespace clustalw { class Utility { public: Utility(); virtual ~Utility(){}; char* rTrim(char *str); void rTrim(string *str); char* blankToUnderscore(char *str); string blankToUnderscore(string str); void getStr(string instr, string& outstr); char getChoice(string instr); double getReal(const char *instr, double minx, double maxx, double def); int getInt(const char *instr,int minx,int maxx, int def); unsigned long getUniqueSequenceIdentifier(); bool lineType(char *line, const char *code); bool blankLine(char *line); bool blankLineNumericLabel(char *line); void getPath(string str, string *path); virtual char promptForYesNo(char *title, const char *prompt); virtual char promptForYesNo(const char *title, const char *prompt); virtual void error( char *msg,...); virtual void error( const char *msg,...); virtual void warning( char *msg,...); virtual void warning( const char *msg,...); virtual void info( char *msg,...); virtual void info( const char *msg,...); virtual void myname( char *myname ); template T MIN(T x, T y){if (x <= y){return x;}return y;} template T MAX(T x, T y){if (x >= y){return x;}return y;} // Note the following function will never be used from this class. It is for the // QT version. I need it here so that I can add the function to QTUtility. virtual void setInfoLabelPtr(QLabel* ptrToLabelObj); bool isNumeric(char ch); double average(std::vector& v); double stdDev(std::vector& v); double median(std::vector v); /* Attributes */ void beQuiet(bool b) {quiet=b;}; private: /* Functions */ /* Attributes */ bool quiet; }; } #endif clustalx-2.1/clustalW/general/Utility.cpp0000644000175000017500000002421511470725216020404 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. * * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 2007-12-03, Andreas Wilm (UCD): replaced gets with fgets, and * got rid of some compiler warning * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include "Utility.h" #include "general/userparams.h" namespace clustalw { /** constructor */ Utility::Utility() { quiet=false; } /** * Removes trailing blanks from a string */ void Utility::rTrim(string *str) { string::reverse_iterator rit = str->rbegin(); while(rit != str->rend() && isspace(*rit)) { str->erase((++rit).base()); } } /** * Removes trailing blanks from a string * @param str String to remove trailing blanks from. * @return Pointer to the processed string */ char * Utility::rTrim(char *str) { register int p; p = strlen(str) - 1; while (isspace(str[p])) { p--; } str[p + 1] = EOS; return str; } /** * Replace blanks in a string with underscores. Also replaces , ; : ( or ) with _. * @param str String to process. * @return Pointer to the processed string */ char * Utility::blankToUnderscore(char *str) { int i, p; p = strlen(str) - 1; for (i = 0; i <= p; i++) if ((str[i] == ' ') || (str[i] == ';') || (str[i] == ',') || (str[i] == '(') || (str[i] == ')') || (str[i] == ':')) { str[i] = '_'; } return str; } /** * Replace blanks in a string with underscores. Also replaces , ; : ( or ) with _. * @param str String to process. * @return Pointer to the processed string */ string Utility::blankToUnderscore(string str) { int i, p; p = str.size() - 1; for (i = 0; i <= p; i++) if ((str[i] == ' ') || (str[i] == ';') || (str[i] == ',') || (str[i] == '(') || (str[i] == ')') || (str[i] == ':')) { str[i] = '_'; } return str; } /** * * @param instr * @return */ char Utility::getChoice(string instr) { cout << instr << ": "; cout.flush(); char choice; cin.get(choice); // We only want one character, so we ignore the rest. if(choice != '\n') { cin.ignore(150, '\n'); } cin.clear(); if(isalpha(choice) || isNumeric(choice)) { return choice; } else if(choice == '\n') { return '\n'; } else { return ' '; } } bool Utility::isNumeric(char ch) { if(ch >= '0' && ch <= '9') { return true; } return false; } /** * * @param instr * @param outstr */ void Utility::getStr(string instr, string& outstr) { cout << instr << ": "; cout.flush(); string temp; getline(cin, temp, '\n'); outstr = temp; cin.clear(); } /** * * @param instr * @param minx * @param maxx * @param def * @return */ double Utility::getReal(const char *instr, double minx, double maxx, double def) { int status; float ret; char line[MAXLINE]; while (true) { fprintf(stdout, "%s (%.1f-%.1f) [%.1f]: ", instr, minx, maxx, def); //gets(line); fgets(line, MAXLINE, stdin); status = sscanf(line, "%f", &ret); if (status == EOF) { return def; } if (ret > maxx) { fprintf(stderr, "ERROR: Max. value=%.1f\n\n", maxx); continue; } if (ret < minx) { fprintf(stderr, "ERROR: Min. value=%.1f\n\n", minx); continue; } break; } return (double)ret; } /** * * @param instr * @param minx * @param maxx * @param def * @return */ int Utility::getInt(const char *instr, int minx, int maxx, int def) { int ret, status; char line[MAXLINE]; while (true) { fprintf(stdout, "%s (%d..%d) [%d]: ", instr, minx, maxx, def); //gets(line); fgets(line, MAXLINE, stdin); status = sscanf(line, "%d", &ret); if (status == EOF) { return def; } if (ret > maxx) { fprintf(stderr, "ERROR: Max. value=%d\n\n", maxx); continue; } if (ret < minx) { fprintf(stderr, "ERROR: Min. value=%d\n\n", minx); continue; } break; } return ret; } /** * * @param line * @param code * @return */ bool Utility::lineType(char *line, const char *code) { // AW: introduced sanity check int n; if (strlen(line) 10) return false; else return true; } /** * * @param str * @param path */ void Utility::getPath(string str, string *path) { int i; string _temp; _temp = str; for (i = _temp.length() - 1; i > - 1; --i) { if (str[i] == DIRDELIM) { i = - 1; break; } if (str[i] == '.') { break; } } if (i < 0) { _temp += "."; } else { _temp = _temp.substr(0, i + 1); } *path = _temp; } /** * * @param title * @param prompt * @return * * */ char Utility::promptForYesNo(char *title, const char *prompt) { cout << "\n" << title << "\n"; string promptMessage = string(prompt) + "(y/n) ? [y]"; string answer; getStr(promptMessage, answer); if(!answer.empty()) { if ((answer[0] != 'n') && (answer[0] != 'N')) { return ('y'); } } return ('n'); } /** * * @param title * @param prompt * @return * */ char Utility::promptForYesNo(const char *title, const char *prompt) { cout << "\n" << title << "\n"; string promptMessage = string(prompt) + "(y/n) ? [y]"; string answer; getStr(promptMessage, answer); if(!answer.empty()) { if ((answer[0] != 'n') && (answer[0] != 'N')) { return ('y'); } } return ('n'); } /** * * @param msg */ void Utility::error( char *msg,...) { va_list ap; va_start(ap, msg); fprintf(stderr, "\n\nERROR: "); vfprintf(stderr, msg, ap); fprintf(stderr, "\n\n"); va_end(ap); } /** * * @param msg */ void Utility::warning( char *msg,...) { va_list ap; va_start(ap, msg); fprintf(stderr, "\n\nWARNING: "); vfprintf(stderr, msg, ap); fprintf(stderr, "\n\n"); va_end(ap); } /** * * @param msg */ void Utility::error( const char *msg,...) { va_list ap; va_start(ap, msg); fprintf(stderr, "\n\nERROR: "); vfprintf(stderr, msg, ap); fprintf(stderr, "\n\n"); va_end(ap); } /** * * @param msg */ void Utility::warning( const char *msg,...) { va_list ap; va_start(ap, msg); fprintf(stderr, "\n\nWARNING: "); vfprintf(stderr, msg, ap); fprintf(stderr, "\n\n"); va_end(ap); } /** * * @param msg */ void Utility::info( char *msg,...) { va_list ap; va_start(ap, msg); if(! quiet) { fprintf(stdout, "\n"); vfprintf(stdout, msg, ap); va_end(ap); } } /** * * @param msg */ void Utility::info(const char *msg,...) { va_list ap; if(! quiet) { va_start(ap, msg); fprintf(stdout, "\n"); vfprintf(stdout, msg, ap); va_end(ap); } } /** * */ void Utility::myname( char *myname) { strcpy(myname, "clustalw\0"); } /** * Change: * Mark 25-1-2007. I made this change to get around the problem of having to keep track * of output indexes in the alignment stage. This function returns the next unique id. */ unsigned long Utility::getUniqueSequenceIdentifier() { static unsigned long nextSequenceIdentifier = MinIdentifier; return nextSequenceIdentifier++; } void Utility::setInfoLabelPtr(QLabel* ptrToLabelObj) { // Dont do anything. This is here to allow the function to be added to QTUtility. // Polymorphism wont work with different functions in the classes. } double Utility::average(std::vector& v) { double tmp = 0.0; std::vector::iterator i; if (v.size() == 0) return 0.0; for (i=v.begin(); i != v.end(); i++) tmp += *i; return (tmp / v.size()); } double Utility::stdDev(std::vector& v) { std::vector::iterator i; double tmp = 0.0; double avg = average(v); if (v.size() == 0) return 0.0; for(i=v.begin(); i != v.end(); ++i) tmp += (*i - avg) * (*i - avg); return sqrt(tmp / v.size()); } double Utility::median(std::vector v) { // From Moo & Koenig, "Accelerated C++: typedef vector::size_type vec_sz; vec_sz size = v.size(); if (v.size() == 0) return 0.0; std::sort(v.begin(), v.end()); vec_sz mid = size/2; return size % 2 == 0 ? (v[mid] + v[mid-1]) / 2 : v[mid]; } } clustalx-2.1/clustalW/general/UserParameters.h0000644000175000017500000004230711470725216021352 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Mark Larkin 9 Dec 2005. * Note that most of the variables in this class are from param.h. * * Change log: * Jan 18th 2006: Changed all of the c style strings into c++ string objects! * Mark 30-1-2007: Added useLEScoringFunction and access functions. * 31-01-07,Nigel Brown(EMBL): Revision string now obtained from clustalw * specific versionw.h and updated from 1.83 to 2.1. */ #ifndef USERPARAMETERS_H #define USERPARAMETERS_H #include #include #include "utils.h" #include "general.h" #include "clustalw.h" namespace clustalw { class UserParameters { public: /* Functions */ UserParameters(bool log = false); void setParamsToDefault(); void createParameterOutput(); int resIndex(string t,char c); void setDNAMultiGap(); void setProtMultiGap(); void setDNAParams(); void setProtParams(); void setPWProteinParam(); void setPWDNAParam(); void setPWParamToProtein(); void setPWParamToDNA(); string getRevisionLevel(); void setRevisionLevel(string value); bool getInteractive(){return interactive;}; void setInteractive(bool value); bool getGui(){return gui;}; void setGui(bool value); float getGapOpen(){return gapOpen;}; void setGapOpen(float value); float getGapExtend(){return gapExtend;}; void setGapExtend(float value); float getPWGapOpen(){return PWGapOpen;}; void setPWGapOpen(float value); float getPWGapExtend(){return PWGapExtend;}; void setPWGapExtend(float value); float getAAGapOpen(){return AAGapOpen;} void setAAGapOpen(float gap){AAGapOpen = gap;} float getAAGapExtend(){return AAGapExtend;} void setAAGapExtend(float gap){AAGapExtend = gap;} float getAAPWGapOpen(){return AAPWGapOpen;} void setAAPWGapOpen(float gap){AAPWGapOpen = gap;} float getAAPWGapExtend(){return AAPWGapExtend;} void setAAPWGapExtend(float gap){AAPWGapExtend = gap;} int getMaxAA(){return maxAA;}; void setMaxAA(int value); int getGapPos1(){return gapPos1;}; void setGapPos1(int value); int getGapPos2(){return gapPos2;}; void setGapPos2(int value); int getProfileNum(){return profileNum;}; void setProfileNum(int value); bool getMenuFlag(){return menuFlag;}; void setMenuFlag(bool value); bool getDNAFlag(){return DNAFlag;}; void setDNAFlag(bool value); bool getDistanceTree(){return distanceTree;}; void setDistanceTree(bool value); string getSeqName(){return seqName;}; void setSeqName(string value); float getDNAGapOpen(){return DNAGapOpen;}; void setDNAGapOpen(float value); float getDNAGapExtend(){return DNAGapExtend;}; void setDNAGapExtend(float value); float getProteinGapOpen(){return AAGapOpen;}; void setProteinGapOpen(float value); float getProteinGapExtend(){return AAGapExtend;}; void setProteinGapExtend(float value); int getGapDist(){return gapDist;}; void setGapDist(int value); int getOutputOrder(){return outputOrder;}; void setOutputOrder(int value); void toggleOutputOrder(); int getDivergenceCutoff(){return divergenceCutoff;}; void setDivergenceCutoff(int value); string getHydResidues(){return hydResidues;}; void setHydResidues(string value); bool getNoWeights(){return noWeights;}; void setNoWeights(bool value); bool getUseNegMatrix(){return negMatrix;}; void setUseNegMatrix(bool value); void toggleUseNegMatrix(); bool getNoHydPenalties(){return noHydPenalties;}; void setNoHydPenalties(bool value); void toggleNoHydPenalties(); bool getNoVarPenalties(){return noVarPenalties;}; void setNoVarPenalties(bool value); bool getNoPrefPenalties(){return noPrefPenalties;}; void setNoPrefPenalties(bool value); void toggleNoPrefPenalties(); bool getUseEndGaps(){return useEndGaps;}; void setUseEndGaps(bool value); void toggleUseEndGaps(); bool getEndGapPenalties(){return endGapPenalties;}; void setEndGapPenalties(bool value); bool getResetAlignmentsNew(){return resetAlignmentsNew;}; void setResetAlignmentsNew(bool value); bool getResetAlignmentsAll(){return resetAlignmentsAll;}; void toggleResetAlignmentsNew(); void setResetAlignmentsAll(bool value); int getOutputStructPenalties(){return outputStructPenalties;}; void setOutputStructPenalties(int value); int getStructPenalties1(){return structPenalties1;}; void setStructPenalties1(int value); int getStructPenalties2(){return structPenalties2;}; void setStructPenalties2(int value); bool getUseSS1(){return useSS1;}; void setUseSS1(bool value); void toggleUseSS1(); bool getUseSS2(){return useSS2;}; void setUseSS2(bool value); void toggleUseSS2(); int getHelixPenalty(){return helixPenalty;}; void setHelixPenalty(int value); int getStrandPenalty(){return strandPenalty;}; void setStrandPenalty(int value); int getLoopPenalty(){return loopPenalty;}; void setLoopPenalty(int value); int getHelixEndMinus(){return helixEndMinus;}; void setHelixEndMinus(int value); int getHelixEndPlus(){return helixEndPlus;}; void setHelixEndPlus(int value); int getStrandEndMinus(){return strandEndMinus;}; void setStrandEndMinus(int value); int getStrandEndPlus(){return strandEndPlus;}; void setStrandEndPlus(int value); int getHelixEndPenalty(){return helixEndPenalty;}; void setHelixEndPenalty(int value); int getStrandEndPenalty(){return strandEndPenalty;}; void setStrandEndPenalty(int value); bool getUseAmbiguities(){return useAmbiguities;}; void setUseAmbiguities(bool value); float getDNAPWGapOpenPenalty(){return DNAPWGapOpen;}; void setDNAPWGapOpenPenalty(float value); float getDNAPWGapExtendPenalty(){return DNAPWGapExtend;}; void setDNAPWGapExtendPenalty(float value); float getProteinPWGapOpenPenalty(){return AAPWGapOpen;}; void setProteinPWGapOpenPenalty(float value); float getProteinPWGapExtendPenalty(){return AAPWGapExtend;}; void setProteinPWGapExtendPenalty(float value); bool getQuickPairAlign(){return quickPairAlign;}; void setQuickPairAlign(bool value); void toggleQuickPairAlign(); // Mark new!!! float getTransitionWeight(){return transitionWeight;}; void setTransitionWeight(float value); int getDNAKtup(){return DNAKtup;}; void setDNAKtup(int value); int getDNAWindowGap(){return DNAWindowGap;}; void setDNAWindowGap(int value); int getDNASignif(){return DNASignif;}; void setDNASignif(int value); int getDNAWindow(){return DNAWindow;}; void setDNAWindow(int value); int getAAKtup(){return AAKtup;}; void setAAKtup(int value); int getAAWindowGap(){return AAWindowGap;}; void setAAWindowGap(int value); int getAASignif(){return AASignif;}; void setAASignif(int value); int getAAWindow(){return AAWindow;}; void setAAWindow(int value); bool getPercent(){return percent;}; void setPercent(bool value); bool getTossGaps(){return tossgaps;}; void setTossGaps(bool value); void toggleTossGaps(); bool getKimura(){return kimura;}; void setKimura(bool value); void toggleKimura(); int getBootNumTrials(){return bootNumTrials;}; void setBootNumTrials(int value); unsigned int getBootRanSeed(){return bootRanSeed;}; void setBootRanSeed(unsigned int value); int getDebug(){return debug;}; void setDebug(int value); bool getExplicitDNAFlag(){return explicitDNAFlag;}; void setExplicitDNAFlag(bool value); bool getLowercase(){return lowercase;}; void setLowercase(bool value); void toggleLowercase(); bool getClSeqNumbers(){return clSeqNumbers;}; void setClSeqNumbers(bool value); void toggleClSeqNumbers(); bool getSeqRange(){return seqRange;}; void setSeqRange(bool value); void toggleSeqRange(); bool getOutputClustal(){return outputClustal;}; void setOutputClustal(bool value); void toggleOutputClustal(); bool getOutputGCG(){return outputGcg;}; void setOutputGCG(bool value); void toggleOutputGCG(); bool getOutputPhylip(){return outputPhylip;}; void setOutputPhylip(bool value); void toggleOutputPhylip(); bool getOutputNbrf(){return outputNbrf;}; void setOutputNbrf(bool value); void toggleOutputNbrf(); bool getOutputGde(){return outputGde;}; void setOutputGde(bool value); void toggleOutputGde(); bool getOutputNexus(){return outputNexus;}; void setOutputNexus(bool value); void toggleOutputNexus(); bool getOutputFasta(){return outputFasta;}; void setOutputFasta(bool value); void toggleOutputFasta(); bool getShowAlign(){return showAlign;}; void setShowAlign(bool value); void toggleShowAlign(); bool getSaveParameters(){return saveParameters;}; void setSaveParameters(bool value); void toggleSaveParameters(); bool getOutputTreeClustal(){return outputTreeClustal;}; void setOutputTreeClustal(bool value); void toggleOutputTreeClustal(); bool getOutputTreePhylip(){return outputTreePhylip;}; void setOutputTreePhylip(bool value); void toggleOutputTreePhylip(); bool getOutputTreeDistances(){return outputTreeDistances;}; void setOutputTreeDistances(bool value); void toggleOutputTreeDistances(); bool getOutputTreeNexus(){return outputTreeNexus;}; void setOutputTreeNexus(bool value); void toggleOutputTreeNexus(); bool getOutputPim(){return outputPim;}; void setOutputPim(bool value); int getBootstrapFormat(){return bootstrapFormat;}; void setBootstrapFormat(int value); void toggleBootstrapFormat(); string getProfile1Name(){return profile1Name;}; void setProfile1Name(string value); string getProfile2Name(){return profile2Name;}; void setProfile2Name(string value); bool getEmpty(){return empty;}; void setEmpty(bool value); bool getProfile1Empty(){return profile1Empty;}; void setProfile1Empty(bool value); bool getProfile2Empty(){return profile2Empty;}; void setProfile2Empty(bool value); string getOutfileName(){return outfileName;}; void setOutfileName(string value); bool getUseTreeFile(){return useTreeFile;}; void setUseTreeFile(bool value); bool getNewTreeFile(){return newTreeFile;}; void setNewTreeFile(bool value); bool getUseTree1File(){return useTree1File;}; void setUseTree1File(bool value); bool getUseTree2File(){return useTree2File;}; void setUseTree2File(bool value); bool getNewTree1File(){return newTree1File;}; void setNewTree1File(bool value); bool getNewTree2File(){return newTree2File;}; void setNewTree2File(bool value); string getAminoAcidCodes(){return aminoAcidCodes;}; char getAminoAcidCode(int i){return aminoAcidCodes[i];}; void setAminoAcidCodes(string value); int getKtup(){return ktup;}; void setKtup(int value); int getWindow(){return window;}; void setWindow(int value); int getWindowGap(){return windowGap;}; void setWindowGap(int value); int getSignif(){return signif;}; void setSignif(int value); int getRangeFrom(){return rangeFrom;}; int getRangeTo(){return rangeTo;}; void setRangeFrom(int from); void setRangeTo(int to); bool getRangeFromToSet(){return rangeFromToSet;}; void setRangeFromToSet(bool set){rangeFromToSet = set;}; int getQTScorePlotScale(); void setQTScorePlotScale(int score); int getQTResExceptionCutOff(); void setQTResExceptionCutOff(int cutOff); bool getQTseqWeightCalculated(); void setQTseqWeightCalculated(bool calculated); int getQTminLenLowScoreSegment(); void setQTminLenLowScoreSegment(int minLen); int getQTlowScoreDNAMarkingScale(); void setQTlowScoreDNAMarkingScale(int dnaScale); // Access functions for the iteration variables. void setNumIterations(int num){numIterations = num;} int getNumIterations(){return numIterations;} void setDoRemoveFirstIteration(int doIter){doRemoveFirstIteration = doIter;} int getDoRemoveFirstIteration(){return doRemoveFirstIteration;} bool IterationIsEnabled(); void setClusterAlgorithm(int clust){clusterAlgorithm = clust;} int getClusterAlgorithm(){return clusterAlgorithm;} void setDisplayInfo(bool display){displayInfo = display;} bool getDisplayInfo(){return displayInfo;} bool getHelpFlag() {return helpFlag;} void setHelpFlag(bool b) {helpFlag = b;} bool getFullHelpFlag() {return fullHelpFlag;} void setFullHelpFlag(bool b) {fullHelpFlag = b;} void setMaxAllowedSeqLength(int num){maxAllowedSeqLength = num;} int getMaxAllowedSeqLength(){return maxAllowedSeqLength;} bool ResetGapsIsEnabled() {return (resetAlignmentsNew || resetAlignmentsAll);}; /* Attributes */ private: /* Functions */ /* Attributes */ string revisionLevel; bool interactive; bool gui; float gapOpen; float gapExtend; float PWGapOpen; float PWGapExtend; int maxAA; int gapPos1; int gapPos2; int profileNum; bool menuFlag; bool DNAFlag; bool distanceTree; string seqName; float DNAGapOpen; float DNAGapExtend; float AAGapOpen; float AAGapExtend; int gapDist; int outputOrder; int divergenceCutoff; string hydResidues; bool noWeights; bool negMatrix; bool noHydPenalties; bool noVarPenalties; bool noPrefPenalties; bool useEndGaps; bool endGapPenalties; bool resetAlignmentsNew; bool resetAlignmentsAll; int outputStructPenalties; int structPenalties1; int structPenalties2; bool useSS1; bool useSS2; int helixPenalty; int strandPenalty; int loopPenalty; int helixEndMinus; int helixEndPlus; int strandEndMinus; int strandEndPlus; int helixEndPenalty; int strandEndPenalty; bool useAmbiguities; float DNAPWGapOpen; float DNAPWGapExtend; float AAPWGapOpen; float AAPWGapExtend; bool quickPairAlign; float transitionWeight; int DNAKtup; int DNAWindowGap; int DNASignif; int DNAWindow; int AAKtup; int AAWindowGap; int AASignif; int AAWindow; bool percent; bool tossgaps; bool kimura; int bootNumTrials; unsigned int bootRanSeed; int debug; bool explicitDNAFlag; bool lowercase; /* Flag for GDE output - set on comm. line*/ bool clSeqNumbers; bool seqRange; bool outputClustal; bool outputGcg; bool outputPhylip; bool outputNbrf; bool outputGde; bool outputNexus; bool outputFasta; bool showAlign; bool saveParameters; bool outputTreeClustal; bool outputTreePhylip; bool outputTreeDistances; bool outputTreeNexus; bool outputPim; int bootstrapFormat; string profile1Name; string profile2Name; bool empty; bool profile1Empty; bool profile2Empty; string outfileName; //int profile1NumSeqs; MARK CHANGE Jan 10 bool useTreeFile; bool newTreeFile; bool useTree1File; bool useTree2File; bool newTree1File; bool newTree2File; string aminoAcidCodes; int ktup; int window; int windowGap; int signif; int rangeFrom; int rangeTo; bool rangeFromToSet; int QTscorePlotScale; int QTresExceptionCutOff; bool QTseqWeightCalculated; int QTminLenLowScoreSegment; int QTlowScoreDNAMarkingScale; // New variables for iteration int numIterations; int doRemoveFirstIteration; int clusterAlgorithm; bool displayInfo; bool helpFlag; bool fullHelpFlag; bool quiet; int maxAllowedSeqLength; }; } #endif clustalx-2.1/clustalW/general/UserParameters.cpp0000644000175000017500000006263411470725216021712 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This file contains the implementation of the UserParameter functions * Mark Larkin Dec 8 2005 * * Modified: 17 January 2008 Paul McGettigan added in the O aminoacid residue to handle Pyrrolysine */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include #include #include #include #include "UserParameters.h" #include "clustalw_version.h" #ifdef HAVE_CONFIG_H #include "config.h" #endif namespace clustalw { using namespace std; UserParameters::UserParameters(bool log) { // FIXME: huge parts should be merged/replaced with // setParamsToDefault (which is not used at all) gapPos1 = NUMRES - 2; /* code for gaps inserted by clustalw */ gapPos2 = NUMRES - 1; /* code for gaps already in alignment */ //revisionLevel = CLU_SHORT_VERSION_STRING; revisionLevel = CLUSTALW_VERSION; interactive = false; gui = false; seqName = ""; DNAGapOpen = 15.0; DNAGapExtend = 6.66; AAGapOpen = 10.0; AAGapExtend = 0.2; gapDist = 4; outputOrder = ALIGNED; // Note: All macros should be replaced by const divergenceCutoff = 30; hydResidues = "GPSNDQEKR"; noWeights = false; negMatrix = false; noHydPenalties = false; noVarPenalties = true; noPrefPenalties = false; useEndGaps = false; endGapPenalties = false; resetAlignmentsNew = false; resetAlignmentsAll = false; outputStructPenalties = OUTSECST; structPenalties1 = NONE; structPenalties2 = NONE; useSS1 = true; useSS2 = true; helixPenalty = 4; strandPenalty = 4; loopPenalty = 1; helixEndMinus = 3; helixEndPlus = 0; strandEndMinus = 1; strandEndPlus = 1; helixEndPenalty = 2; strandEndPenalty = 2; useAmbiguities = false; DNAPWGapOpen = 15.0; DNAPWGapExtend = 6.66; AAPWGapOpen = 10.0; AAPWGapExtend = 0.1; quickPairAlign = false; transitionWeight = 0.5; DNAKtup = 2; DNAWindowGap = 5; DNASignif = 4; DNAWindow = 4; AAKtup = 1; AAWindowGap = 3; AASignif = 5; AAWindow = 5; percent = true; tossgaps = false; kimura = false; bootNumTrials = 1000; bootRanSeed = 111; debug = 0; explicitDNAFlag = false; lowercase = true; clSeqNumbers = false; seqRange = false; outputClustal = true; outputGcg = false; outputPhylip = false; outputNbrf = false; outputGde = false; outputNexus = false; outputFasta = false; showAlign = true; saveParameters = false; outputTreeClustal = false; outputTreePhylip = true; outputTreeDistances = false; outputTreeNexus = false; outputPim = false; bootstrapFormat = BS_BRANCH_LABELS; profile1Name = ""; // Initialise to blank strings profile2Name = ""; empty = true; profile1Empty = true; profile2Empty = true; outfileName = ""; //profile1NumSeqs = 0; // MARK: Set to default before used. useTreeFile = false; newTreeFile = false; useTree1File = false; useTree2File = false; newTree1File = false; newTree2File = false; aminoAcidCodes = "ABCDEFGHIKLMNOPQRSTUVWXYZ-"; maxAA = aminoAcidCodes.length() - 2; // Some variables need the alignment to be read in before they can be set. // I am putting the default as protein. Note: this should not make a difference // as they are not used before they have been set a value again!! gapOpen = AAGapOpen; gapExtend = AAGapExtend; PWGapOpen = AAPWGapOpen; PWGapExtend = AAPWGapExtend; gapPos1 = NUMRES - 2; gapPos2 = NUMRES - 1; profileNum = 0; menuFlag = false; // MARK: I set to default value. DNAFlag = false; // MARK: I set to default value. distanceTree = true; // MARK: I set to default value. ktup = AAKtup; window = AAWindow; windowGap = AAWindowGap; signif = AASignif; rangeFrom = -1; rangeTo = -1; rangeFromToSet = false; QTscorePlotScale = 5; QTresExceptionCutOff = 5; QTseqWeightCalculated = false; QTminLenLowScoreSegment = 1; QTlowScoreDNAMarkingScale = 5; // Set defaults for iteration variables. numIterations = 3; doRemoveFirstIteration = NONE; maxAllowedSeqLength = INT_MAX; clusterAlgorithm = NJ; displayInfo = true; helpFlag = false; fullHelpFlag = false; quiet = false; } // FIXME:never used void UserParameters::setParamsToDefault() { DNAGapOpen = 15.0; DNAGapExtend = 6.66; AAGapOpen = 10.0; AAGapExtend = 0.2; gapDist = 4; outputOrder = ALIGNED; divergenceCutoff = 30; hydResidues = "GPSNDQEKR"; noWeights = false; negMatrix = false; noHydPenalties = false; noVarPenalties = true; noPrefPenalties = false; useEndGaps = false; endGapPenalties = false; resetAlignmentsNew = false; resetAlignmentsAll = false; outputStructPenalties = OUTSECST; structPenalties1 = NONE; structPenalties2 = NONE; useSS1 = true; useSS2 = true; helixPenalty = 4; strandPenalty = 4; loopPenalty = 1; helixEndMinus = 3; helixEndPlus = 0; strandEndMinus = 1; strandEndPlus = 1; helixEndPenalty = 2; strandEndPenalty = 2; useAmbiguities = false; DNAPWGapOpen = 15.0; DNAPWGapExtend = 6.66; AAPWGapOpen = 10.0; AAPWGapExtend = 0.1; quickPairAlign = false; transitionWeight = 0.5; DNAKtup = 2; DNAWindowGap = 5; DNASignif = 4; DNAWindow = 4; AAKtup = 1; AAWindowGap = 3; AASignif = 5; AAWindow = 5; percent = true; tossgaps = false; kimura = false; bootNumTrials = 1000; bootRanSeed = 111; debug = 0; lowercase = true; clSeqNumbers = false; seqRange = false; outputClustal = true; outputGcg = false; outputPhylip = false; outputNbrf = false; outputGde = false; outputNexus = false; outputFasta = false; outputTreeClustal = false; outputTreePhylip = true; outputTreeDistances = false; outputTreeNexus = false; outputPim = false; bootstrapFormat = BS_BRANCH_LABELS; useTreeFile = false; newTreeFile = false; useTree1File = false; useTree2File = false; newTree1File = false; newTree2File = false; rangeFrom = -1; rangeTo = -1; rangeFromToSet = false; QTscorePlotScale = 5; QTresExceptionCutOff = 5; QTminLenLowScoreSegment = 1; QTlowScoreDNAMarkingScale = 5; distanceTree = true; // MARK: I set to default value. numIterations = 3; if(getDNAFlag()) { setDNAParams(); } else { setProtParams(); } clusterAlgorithm = NJ; displayInfo = true; helpFlag = false; fullHelpFlag = false; quiet = false; doRemoveFirstIteration = NONE; maxAllowedSeqLength = INT_MAX; } /* * The function createParameterOutput is used to put all the user parameters in * a file. It is used for testing and for saving parameters. * * * FIXME: AW: Some parameters are missing here (e.g. the new ones like * clustering, etc) * */ void UserParameters::createParameterOutput(void) { string parname, temp; string path; string message; utilityObject->getPath(seqName, &path); parname = path + "par"; if(menuFlag) { message = "\nEnter a name for the parameter output file [" + parname + "]"; utilityObject->getStr(message, temp); if(temp != "") { parname = temp; } } ofstream outfile; outfile.open(parname.c_str(), ofstream::out); if(!outfile) { return; // Failed to open } outfile << "clustalw \\\n"; if (!empty && profile1Empty) { outfile << "-infile=" << seqName << " \\\n"; } if (!profile1Empty) { outfile << "-profile1=" << profile1Name << "\\\n"; } if (!profile2Empty) { outfile << "-profile2=" << profile2Name << " \\\n"; } if (DNAFlag == true) { outfile << "-type=dna \\\n"; } else { outfile << "-type=protein \\\n"; } if (quickPairAlign) { outfile << "-quicktree \\\n"; outfile << "-ktuple=" << ktup << " \\\n"; outfile << "-window=" << window << " \\\n"; outfile << "-pairgap=" << windowGap << " \\\n"; outfile << "-topdiags=" << signif << " \\\n"; if (percent) { outfile << "-score=percent \\\n"; } else { outfile << "-score=absolute \\\n"; } } else { if (!DNAFlag) { //outfile << "-pwmatrix=" << pwMatrixName << " \\\n"; outfile << "-pwgapopen=" << fixed << setprecision(2) << AAPWGapOpen << " \\\n"; outfile << "-pwgapext=" << AAPWGapExtend << " \\\n"; } else { outfile << "-pwgapopen=" << fixed << setprecision(2) << PWGapOpen << " \\\n"; outfile << "-pwgapext=" << PWGapExtend << " \\\n"; } } if (!DNAFlag) { //outfile << "-matrix=" << matrixName << " \\\n"; outfile << "-gapopen=" << fixed << setprecision(2) << AAGapOpen << " \\\n"; outfile << "-gapext=" << AAGapExtend << " \\\n"; } else { outfile << "-gapopen=" << fixed << setprecision(2) << DNAGapOpen << " \\\n"; outfile << "-gapext=" << DNAGapExtend << " \\\n"; } outfile << "-maxdiv=" << divergenceCutoff << " \\\n"; if (!useEndGaps) { outfile << "-endgaps \\\n"; } if (!DNAFlag) { if (negMatrix) { outfile << "-negative \\\n"; } if (noPrefPenalties) { outfile << "-nopgap \\\n"; } if (noHydPenalties) { outfile << "-nohgap \\\n"; } if (noVarPenalties) { outfile << "-novgap \\\n"; } outfile << "-hgapresidues=" << hydResidues << " \\\n"; outfile << "-gapdist=" << gapDist << " \\\n"; } else { outfile << "-transweight=" << transitionWeight << " \\\n"; } if (outputGcg) { outfile << "-output=gcg \\\n"; } else if (outputGde) { outfile << "-output=gde \\\n"; } else if (outputNbrf) { outfile << "-output=pir \\\n"; } else if (outputPhylip) { outfile << "-output=phylip \\\n"; } else if (outputNexus) { outfile << "-output=nexus \\\n"; } if (outfileName[0]!=EOS) { outfile << "-outfile=" << outfileName << " \\\n"; } if (outputOrder==ALIGNED) { outfile << "-outorder=aligned \\\n"; } else { outfile << "-outorder=input \\\n"; } if (outputGde) { if (lowercase) { outfile << "-case=lower \\\n"; } else { outfile << "-case=upper \\\n"; } } outfile << "-interactive\n"; outfile.close(); } /* * The function resIndex returns the index of the character c in the string t. * */ int UserParameters::resIndex(string t, char c) { register int i; for (i = 0; t[i] && t[i] != c; i++) ; if (t[i]) { return (i); } else { return -1; } } void UserParameters::setDNAMultiGap() { gapOpen = DNAGapOpen; gapExtend = DNAGapExtend; } void UserParameters::setProtMultiGap() { gapOpen = AAGapOpen; gapExtend = AAGapExtend; } void UserParameters::setDNAParams() { gapOpen = DNAGapOpen; gapExtend = DNAGapExtend; PWGapOpen = DNAPWGapOpen; PWGapExtend = DNAPWGapExtend; ktup = DNAKtup; window = DNAWindow; signif = DNASignif; windowGap = DNAWindowGap; } void UserParameters::setProtParams() { gapOpen = AAGapOpen; gapExtend = AAGapExtend; PWGapOpen = AAPWGapOpen; PWGapExtend = AAPWGapExtend; ktup = AAKtup; window = AAWindow; signif = AASignif; windowGap = AAWindowGap; } void UserParameters::setPWParamToProtein() { PWGapOpen = AAPWGapOpen; PWGapExtend = AAPWGapExtend; ktup = AAKtup; window = AAWindow; signif = AASignif; windowGap = AAWindowGap; } void UserParameters::setPWParamToDNA() { PWGapOpen = DNAPWGapOpen; PWGapExtend = DNAPWGapExtend; ktup = DNAKtup; window = DNAWindow; signif = DNASignif; windowGap = DNAWindowGap; } void UserParameters::setPWProteinParam() { AAPWGapOpen = PWGapOpen; AAPWGapExtend = PWGapExtend; AAKtup = ktup; AAWindow = window; AASignif = signif; AAWindowGap = windowGap; } void UserParameters::setPWDNAParam() { DNAPWGapOpen = PWGapOpen; DNAPWGapExtend = PWGapExtend; DNAKtup = ktup; DNAWindow = window; DNASignif = signif; DNAWindowGap = windowGap; } /* * The rest of the functions are get, set and toggle functions for the variables. */ string UserParameters::getRevisionLevel() { return revisionLevel; } void UserParameters::setRevisionLevel(string value) { revisionLevel = value; } void UserParameters::setInteractive(bool value) { interactive = value; } void UserParameters::setGui(bool value) { gui = value; } void UserParameters::setGapOpen(float value) { gapOpen = value; } void UserParameters::setGapExtend(float value) { gapExtend = value; } void UserParameters::setPWGapOpen(float value) { PWGapOpen = value; } void UserParameters::setPWGapExtend(float value) { PWGapExtend = value; } void UserParameters::setMaxAA(int value) { maxAA = value; } void UserParameters::setGapPos1(int value) { gapPos1 = value; } void UserParameters::setGapPos2(int value) { gapPos2 = value; } void UserParameters::setProfileNum(int value) { profileNum = value; } void UserParameters::setMenuFlag(bool value) { menuFlag = value; } void UserParameters::setDNAFlag(bool value) { if(value == true) { setDNAParams(); } else { setProtParams(); } DNAFlag = value; } void UserParameters::setDistanceTree(bool value) { distanceTree = value; } void UserParameters::setSeqName(string value) { seqName = value; } void UserParameters::setDNAGapOpen(float value) { DNAGapOpen = value; } void UserParameters::setDNAGapExtend(float value) { DNAGapExtend = value; } void UserParameters::setProteinGapOpen(float value) { AAGapOpen = value; } void UserParameters::setProteinGapExtend(float value) { AAGapExtend = value; } void UserParameters::setGapDist(int value) { gapDist = value; } void UserParameters::setOutputOrder(int value) { outputOrder = value; } void UserParameters::toggleOutputOrder() { if (outputOrder == INPUT) { outputOrder = ALIGNED; } else { outputOrder = INPUT; } } void UserParameters::setDivergenceCutoff(int value) { divergenceCutoff = value; } void UserParameters::setHydResidues(string value) { //hydResidues = value; char hydResidue; string tempHydRes = ""; int inputStringLength = value.length(); if(inputStringLength > 0) { // NOTE this was causing an error, but I fixed it. Was giving an // out of range error. for (int i = 0; i < MAXHYDRESIDUES && i < inputStringLength; i++) { hydResidue = toupper(value.at(i)); if (isalpha(hydResidue)) { tempHydRes += hydResidue; } else // Not Alphabetic character! { break; } } if(tempHydRes.size() > 0) { hydResidues = tempHydRes; } } } void UserParameters::setNoWeights(bool value) { noWeights = value; } void UserParameters::setUseNegMatrix(bool value) { negMatrix = value; } void UserParameters::toggleUseNegMatrix() { negMatrix ^= true; } void UserParameters::setNoHydPenalties(bool value) { noHydPenalties = value; } void UserParameters::toggleNoHydPenalties() { noHydPenalties ^= true; } void UserParameters::setNoVarPenalties(bool value) { noVarPenalties = value; } void UserParameters::setNoPrefPenalties(bool value) { noPrefPenalties = value; } void UserParameters::toggleNoPrefPenalties() { noPrefPenalties ^= true; } void UserParameters::setUseEndGaps(bool value) { useEndGaps = value; } void UserParameters::toggleUseEndGaps() { useEndGaps ^= true; } void UserParameters::setEndGapPenalties(bool value) { endGapPenalties = value; } void UserParameters::toggleResetAlignmentsNew() { resetAlignmentsNew ^= true; } void UserParameters::setResetAlignmentsNew(bool value) { resetAlignmentsNew = value; } void UserParameters::setResetAlignmentsAll(bool value) { resetAlignmentsAll = value; } void UserParameters::setOutputStructPenalties(int value) { outputStructPenalties = value; } void UserParameters::setStructPenalties1(int value) { structPenalties1 = value; } void UserParameters::setStructPenalties2(int value) { structPenalties2 = value; } void UserParameters::setUseSS1(bool value) { useSS1 = value; } void UserParameters::toggleUseSS1() { useSS1 ^= true; } void UserParameters::setUseSS2(bool value) { useSS2 = value; } void UserParameters::toggleUseSS2() { useSS2 ^= true; } void UserParameters::setHelixPenalty(int value) { helixPenalty = value; } void UserParameters::setStrandPenalty(int value) { strandPenalty = value; } void UserParameters::setLoopPenalty(int value) { loopPenalty = value; } void UserParameters::setHelixEndMinus(int value) { helixEndMinus = value; } void UserParameters::setHelixEndPlus(int value) { helixEndPlus = value; } void UserParameters::setStrandEndMinus(int value) { strandEndMinus = value; } void UserParameters::setStrandEndPlus(int value) { strandEndPlus = value; } void UserParameters::setHelixEndPenalty(int value) { helixEndPenalty = value; } void UserParameters::setStrandEndPenalty(int value) { strandEndPenalty = value; } void UserParameters::setUseAmbiguities(bool value) { useAmbiguities = value; } void UserParameters::setDNAPWGapOpenPenalty(float value) { DNAPWGapOpen = value; } void UserParameters::setDNAPWGapExtendPenalty(float value) { DNAPWGapExtend = value; } void UserParameters::setProteinPWGapOpenPenalty(float value) { AAPWGapOpen = value; } void UserParameters::setProteinPWGapExtendPenalty(float value) { AAPWGapExtend = value; } void UserParameters::toggleQuickPairAlign() { quickPairAlign ^= true; } void UserParameters::setQuickPairAlign(bool value) { quickPairAlign = value; } void UserParameters::setTransitionWeight(float value) { transitionWeight = value; } void UserParameters::setDNAKtup(int value) { DNAKtup = value; } void UserParameters::setDNAWindowGap(int value) { DNAWindowGap = value; } void UserParameters::setDNASignif(int value) { DNASignif = value; } void UserParameters::setDNAWindow(int value) { DNAWindow = value; } void UserParameters::setAAKtup(int value) { AAKtup = value; } void UserParameters::setAAWindowGap(int value) { AAWindowGap = value; } void UserParameters::setAASignif(int value) { AASignif = value; } void UserParameters::setAAWindow(int value) { AAWindow = value; } void UserParameters::setPercent(bool value) { percent = value; } void UserParameters::toggleTossGaps() { tossgaps ^= true; } void UserParameters::setTossGaps(bool value) { tossgaps = value; } void UserParameters::setKimura(bool value) { kimura = value; } void UserParameters::toggleKimura() { kimura ^= true; } void UserParameters::setBootNumTrials(int value) { bootNumTrials = value; } void UserParameters::setBootRanSeed(unsigned int value) { bootRanSeed = value; } void UserParameters::setDebug(int value) { debug = value; } void UserParameters::setExplicitDNAFlag(bool value) { explicitDNAFlag = value; } void UserParameters::setLowercase(bool value) { lowercase = value; } void UserParameters::toggleLowercase() { lowercase ^= true; } void UserParameters::setClSeqNumbers(bool value) { clSeqNumbers = value; } void UserParameters::toggleClSeqNumbers() { clSeqNumbers ^= true; } void UserParameters::setSeqRange(bool value) { seqRange = value; } void UserParameters::toggleSeqRange() { seqRange ^= true; } void UserParameters::setOutputClustal(bool value) { outputClustal = value; } void UserParameters::toggleOutputClustal() { outputClustal ^= true; } void UserParameters::setOutputGCG(bool value) { outputGcg = value; } void UserParameters::toggleOutputGCG() { outputGcg ^= true; } void UserParameters::setOutputPhylip(bool value) { outputPhylip = value; } void UserParameters::toggleOutputPhylip() { outputPhylip ^= true; } void UserParameters::setOutputNbrf(bool value) { outputNbrf = value; } void UserParameters::toggleOutputNbrf() { outputNbrf ^= true; } void UserParameters::setOutputGde(bool value) { outputGde = value; } void UserParameters::toggleOutputGde() { outputGde ^= true; } void UserParameters::setOutputNexus(bool value) { outputNexus = value; } void UserParameters::toggleOutputNexus() { outputNexus ^= true; } void UserParameters::setOutputFasta(bool value) { outputFasta = value; } void UserParameters::toggleOutputFasta() { outputFasta ^= true; } void UserParameters::setShowAlign(bool value) { showAlign = value; } void UserParameters::toggleShowAlign() { showAlign ^= true; } void UserParameters::setSaveParameters(bool value) { saveParameters = value; } void UserParameters::toggleSaveParameters() { saveParameters ^= true; } void UserParameters::setOutputTreeClustal(bool value) { outputTreeClustal = value; } void UserParameters::toggleOutputTreeClustal() { outputTreeClustal ^= true; } void UserParameters::setOutputTreePhylip(bool value) { outputTreePhylip = value; } void UserParameters::toggleOutputTreePhylip() { outputTreePhylip ^= true; } void UserParameters::setOutputTreeDistances(bool value) { outputTreeDistances = value; } void UserParameters::toggleOutputTreeDistances() { outputTreeDistances ^= true; } void UserParameters::setOutputTreeNexus(bool value) { outputTreeNexus = value; } void UserParameters::toggleOutputTreeNexus() { outputTreeNexus ^= true; } void UserParameters::setOutputPim(bool value) { outputPim = value; } void UserParameters::setBootstrapFormat(int value) { bootstrapFormat = value; } void UserParameters::toggleBootstrapFormat() { if (bootstrapFormat == BS_NODE_LABELS) { bootstrapFormat = BS_BRANCH_LABELS; } else { bootstrapFormat = BS_NODE_LABELS; } } void UserParameters::setProfile1Name(string value) { profile1Name = value; } void UserParameters::setProfile2Name(string value) { profile2Name = value; } void UserParameters::setEmpty(bool value) { empty = value; } void UserParameters::setProfile1Empty(bool value) { profile1Empty = value; } void UserParameters::setProfile2Empty(bool value) { profile2Empty = value; } void UserParameters::setOutfileName(string value) { outfileName = value; } /*void UserParameters::setProfile1NumSeqs(int value) { profile1NumSeqs = value; }*/ // MARK CHANGE Jan 10 void UserParameters::setUseTreeFile(bool value) { useTreeFile = value; } void UserParameters::setNewTreeFile(bool value) { newTreeFile = value; } void UserParameters::setUseTree1File(bool value) { useTree1File = value; } void UserParameters::setUseTree2File(bool value) { useTree2File = value; } void UserParameters::setNewTree1File(bool value) { newTree1File = value; } void UserParameters::setNewTree2File(bool value) { newTree2File = value; } void UserParameters::setAminoAcidCodes(string value) { aminoAcidCodes = value; } void UserParameters::setKtup(int value) { ktup = value; } void UserParameters::setWindow(int value) { window = value; } void UserParameters::setWindowGap(int value) { windowGap = value; } void UserParameters::setSignif(int value) { signif = value; } void UserParameters::setRangeFrom(int from) { rangeFrom = from; rangeFromToSet = true; } void UserParameters::setRangeTo(int to) { rangeTo = to; rangeFromToSet = true; } int UserParameters::getQTScorePlotScale() { return QTscorePlotScale; } void UserParameters::setQTScorePlotScale(int score) { QTscorePlotScale = score; } int UserParameters::getQTResExceptionCutOff() { return QTresExceptionCutOff; } void UserParameters::setQTResExceptionCutOff(int cutOff) { QTresExceptionCutOff = cutOff; } bool UserParameters::getQTseqWeightCalculated() { return QTseqWeightCalculated; } void UserParameters::setQTseqWeightCalculated(bool calculated) { QTseqWeightCalculated = calculated; } int UserParameters::getQTminLenLowScoreSegment() { return QTminLenLowScoreSegment; } void UserParameters::setQTminLenLowScoreSegment(int minLen) { QTminLenLowScoreSegment = minLen; } int UserParameters::getQTlowScoreDNAMarkingScale() { return QTlowScoreDNAMarkingScale; } void UserParameters::setQTlowScoreDNAMarkingScale(int dnaScale) { QTlowScoreDNAMarkingScale = dnaScale; } bool UserParameters::IterationIsEnabled() { if (doRemoveFirstIteration == clustalw::NONE) return false; else return true; } } clustalx-2.1/clustalW/general/SymMatrix.h0000644000175000017500000002005511470725216020341 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * The SymMatrix class is used to store the distance matrix. It stores it in a double * array. * This class throws an out_of_range exception if we try to access an element outside * of the array bounds. It will throw a bad_alloc if we cannot allocate enough memory for * the distance matrix. */ #ifndef SYMMATRIX_H #define SYMMATRIX_H #include #include #include #include #include #include #include namespace clustalw { using namespace std; class SymMatrix { public: SymMatrix() : elements(0), numSeqs(0), subElements(0), firstSeq(0), numSeqsInSub(0){;} SymMatrix(int size) : elements(0), numSeqs(0), subElements(0), firstSeq(0), numSeqsInSub(0) { // Size is the numSeqs + 1 numSeqs = size - 1; sizeElements = ((numSeqs + 1) * (numSeqs + 2)) >> 1; try { elements = new double[sizeElements]; setAllArrayToVal(elements, sizeElements, 0.0); } catch(bad_alloc& e) { cout << "Could not allocate a distance matrix for " << numSeqs << " seqs.\n"; throw e; } } ~SymMatrix() { if(elements) { delete [] elements; } if(subElements) { delete [] subElements; } } inline void SetAt(int nRow, int nCol, const double& value) { index = getIndex(nRow, nCol, numSeqs); elements[index] = value; } inline double GetAt(int nRow, int nCol) { index = getIndex(nRow, nCol, numSeqs); return elements[index]; } inline void ResizeRect(int size, double val = 0.0) { numSeqs = size - 1; sizeElements = ((numSeqs + 1) * (numSeqs + 2)) >> 1; if(elements) { delete [] elements; } try { elements = new double[sizeElements]; setAllArrayToVal(elements, sizeElements, val); } catch(bad_alloc& e) { cout << "Could not allocate a distance matrix for " << numSeqs << " seqs. Need to terminate program.\n"; throw e; } } inline void setAllArrayToVal(double* array, int size, double val) { for(int i = 0; i < size; i++) { array[i] = val; } } inline int getSize() { return numSeqs + 1; } inline void clearArray() { sizeElements = 0; numSeqs = 0; if(elements) { delete [] elements; elements = 0; } numSeqsInSub = 0; sizeSubElements = 0; if(subElements) { delete [] subElements; subElements = 0; } } void printArray() { printArray(elements, sizeElements); } void printSubArray() { printArray(subElements, sizeSubElements); } void printArray(double* array, int size) { int numThisTime = 1; int numSoFar = 0; for(int i = 0; i < size; i++) { numSoFar++; if(numSoFar == numThisTime) { cout << " " << setw(4) << array[i] << "\n"; numSoFar = 0; numThisTime++; } else { cout << " " << setw(4) << array[i]; } } cout << "\n"; } inline int getIndex(const int &i, const int &j, const int &nSeqs) const { if(i == 0 || j == 0) { return 0; } int _i = i - 1, _j = j - 1; if (_i == _j) { if ((_i >= nSeqs) || (_i < 0)) { throw out_of_range("index out of range\n"); } return (_i * (_i + 3)) >> 1; } if (_i > _j) { if ((_i >= nSeqs) || (_j < 0)) { throw out_of_range("index out of range\n"); } return ((_i * (_i + 1)) >> 1) + _j; } if ((_j >= nSeqs) || (_i < 0)) { throw out_of_range("index out of range\n"); } return ((_j * (_j + 1)) >> 1) + _i; } //inline inline double operator() (unsigned row, unsigned col) const { return elements[getIndex(row, col, numSeqs)]; } inline double& operator() (unsigned row, unsigned col) { return elements[getIndex(row, col, numSeqs)]; } inline double* getElements() { return elements; } inline double* getDistMatrix(int fSeq, int nSeqsInSub) { if(fSeq == 1 && numSeqs == nSeqsInSub) { return elements; // We want the whole array. } else { // Calculate the subMatrix and return it!!! try { if(subElements) { delete [] subElements; } sizeSubElements = ((nSeqsInSub + 1) * (nSeqsInSub + 2)) >> 1; numSeqsInSub = nSeqsInSub; subElements = new double[sizeSubElements]; setAllArrayToVal(subElements, sizeSubElements, 0.0); int currIndex = 0; subElements[0] = 0.0; int lSeq = fSeq + numSeqsInSub - 1; // NOTE this is wrong!!!!! Need to fix for(int i = fSeq; i <= lSeq; i++) { for(int j = i + 1; j <= lSeq; j++) { currIndex = getIndex(i - fSeq + 1, j - fSeq + 1, numSeqsInSub); subElements[currIndex] = elements[getIndex(i, j, numSeqs)]; } } return subElements; } catch(bad_alloc& e) { cout << "Out of Memory!\n"; throw e; } } } void clearSubArray() { if(subElements) { delete [] subElements; } subElements = 0; sizeSubElements = 0; numSeqsInSub = 0; } void makeSimilarityMatrix() { double value; for (int i = 0; i < numSeqs; i++) { SetAt(i + 1, i + 1, 0.0); for (int j = 0; j < i; j++) { value = 100.0 - (GetAt(i + 1, j + 1)) * 100.0; SetAt(i + 1, j + 1, value); //distMat->SetAt(j + 1, i + 1, value); } } } private: double* elements; int sizeElements; int numSeqs; int index; double* subElements; // To be used to return a sub matrix. int firstSeq, numSeqsInSub; int sizeSubElements; }; } #endif clustalx-2.1/clustalW/general/SymMatrix.cpp0000644000175000017500000000026411470725216020674 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "SymMatrix.h" clustalx-2.1/clustalW/general/Stats.h0000644000175000017500000000565211470725216017510 0ustar ddineenddineen/** * Author: Andreas Wilm * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. * */ #ifndef STATS_H #define STATS_H namespace clustalw { using namespace std; /** Log simple statistics about input and output * * @section LICENSE * FIXME: please add license text * * @section DESCRIPTION * The hashing functions have to be enabled at compile/configure time * otherwise they will not be used. * * */ class Stats { public: /* FUNCTIONS */ /** constructor */ Stats(); /** destructor */ ~Stats(); /** set the name of the file where stats are logged to * @param f filename */ void setStatsFile(string f) {logfilename=f;}; /** * return name of used stats file * @return file name */ string getStatsFile() {return logfilename;}; /** enable stats logging */ void setEnabled(bool b) {enabled=b;}; /** check if stats logging is enabled * @return true if enabled, false otherwise */ bool isEnabled() {return enabled;}; /** log the command line arguments * @param argc number arguments * @param argv argument array */ void logCmdLine(int argc, char **argv); /** log statistics about sequence input * @param alnObj: input "alignment" (confusing isn't it?) */ void logInputSeqStats(Alignment *alnObj); /** log statistics about sequence input * @param alnObj: input "alignment" (confusing isn't it?) */ void logAlignedSeqStats(Alignment *alnObj); /* ATTRIBUTES */ private: /* FUNCTIONS */ /** Compute pairwise identity of two sequences * * Adopted from Sean Eddy'ssquid:aligneval.c * He defines pairwise identity as fraction: * idents / min(len1, len2) * * Function is case sensitive! * * @param alnObj alignment object * @param s1 index of first sequence * @param s2 index of first sequence * @return pairwise identity as float */ float pairwiseIdentity(Alignment *alnObj, int s1, int s2); #ifdef HAVE_MHASH_H /** compute md5 hash from string * @param thread string to hash * @return hash string, or NULL on failure */ char * Md5Hash(const char *thread); /** compute md5 hash for a sequence * @param alnObj alignment object * @param s index of sequence * @return md5 hash string */ string Md5ForSeq(Alignment *alnObj, int s); /** compute md5 hash for all sequences * sorts and concatenates all sequences beforehand * thus sequence order doesn't matter * * @param alnObj alignment object * @param s index of sequence * @return md5 hash string */ string ConcatInputHash(Alignment *alnObj); #endif /* ATTRIBUTES */ /** the filename used for logging statistics */ string logfilename; /** logging of statistics enabled */ bool enabled; }; } #endif clustalx-2.1/clustalW/general/Stats.cpp0000644000175000017500000002302011470725216020030 0ustar ddineenddineen/** * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include #include "../alignment/Alignment.h" #ifdef HAVE_MHASH_H #include "mhash.h" #endif #include "Stats.h" using namespace std; namespace clustalw { Stats::Stats() { enabled = false; } Stats::~Stats() { } /* adopted from Sean Eddy'ssquid:aligneval.c */ /* Function: PairwiseIdentity() * * Purpose: Calculate the pairwise fractional identity between * two aligned sequences s1 and s2. This is simply * (idents / MIN(len1, len2)). * * Note how many ways there are to calculate pairwise identity, * because of the variety of choices for the denominator: * idents/(idents+mismat) has the disadvantage that artifactual * gappy alignments would have high "identities". * idents/(AVG|MAX)(len1,len2) both have the disadvantage that * alignments of fragments to longer sequences would have * artifactually low "identities". * * Case sensitive; also, watch out in nucleic acid alignments; * U/T RNA/DNA alignments will be counted as mismatches! */ /* float PairwiseIdentity(char @s1, char @s2) { int idents; /@ total identical positions @/ int len1, len2; /@ lengths of seqs @/ int x; /@ position in aligned seqs @/ idents = len1 = len2 = 0; for (x = 0; s1[x] != '\0' && s2[x] != '\0'; x++) { if (!isgap(s1[x])) { len1++; if (s1[x] == s2[x]) idents++; } if (!isgap(s2[x])) len2++; } if (len2 < len1) len1 = len2; return (len1 == 0 ? 0.0 : (float) idents / (float) len1); } */ /* s1/s2 are unit-offset as usual * */ float Stats::pairwiseIdentity(Alignment *alnObj, int s1, int s2) { int idents; /* total identical positions */ int len1, len2; /* real lengths of seqs */ int x; /* position in aligned seqs */ const vector* seq1 = alnObj->getSequence(s1); const vector* seq2 = alnObj->getSequence(s2); idents = len1 = len2 = 0; // cerr << "comparing " << alnObj->getName(s1).c_str() << ":" << alnObj->getName(s2).c_str() << " " << s1 << ":" << s2 << "\n"; // sequence length should be identical, but be paranoid for (x = 1; x<=alnObj->getSeqLength(s1) && x<=alnObj->getSeqLength(s2); x++) { if (! alnObj->isGap(s1, x)) { len1++; //cerr << " pos " << x << ": " << (*seq1)[x] << ":" << (*seq2)[x] << "\n"; if ((*seq1)[x] == (*seq2)[x]) idents++; } //DEBUG //else { //cerr << " gap at pos " << x << " (" << s1 << ")\n"; //} if (! alnObj->isGap(s2, x)) len2++; //DEBUG //else // cerr << " gap at pos " << x << " (" << s2 << ")\n"; } if (len2 < len1) len1 = len2; return (len1 == 0 ? 0.0 : (float) idents / (float) len1); } #ifdef HAVE_MHASH_H string Stats::ConcatInputHash(Alignment *alnObj) { vector rawSeqArray; string ret; char *hash; // collect all sequences and sort const clustalw::SeqArray* seqArray = alnObj->getSeqArray(); for(int s = 1; s <= alnObj->getNumSeqs(); s++) { string seq; for(int r = 1; r <= alnObj->getSeqLength(s); r++) { int val = (*seqArray)[s][r]; seq.append(1, clustalw::userParameters->getAminoAcidCode(val)); } rawSeqArray.push_back(seq); } std::sort(rawSeqArray.begin(), rawSeqArray.end()); // concatenate sorted seqs string concatSeq; std::vector::iterator iter; for(iter=rawSeqArray.begin(); iter != rawSeqArray.end(); ++iter) concatSeq.append(*iter); // build hash and return hash = Md5Hash(concatSeq.c_str()); if (hash==NULL) { ret="HASHING_FAILURE"; } else { for (int i=0; igetSeqArray(); char *hash; string ret; for(int l = 1; l <= alnObj->getSeqLength(s); l++) { int val = (*seqArray)[s][l]; // continue if gap if((val < 0) || (val > userParameters->getMaxAA())) continue; seq.append(1, clustalw::userParameters->getAminoAcidCode(val)); } std::transform(seq.begin(), seq.end(), seq.begin(), ::toupper); hash = Md5Hash(seq.c_str()); if (hash==NULL) { ret = "HASHING_FAILURE"; } else { ret = hash; } return ret; } /* create md5 hash from input string * returns NULL on failure * user must free returned string */ char * Stats::Md5Hash(const char *thread) { MHASH td; char *tmpcstr; int i; unsigned char *hash; char *rethash; td = mhash_init(MHASH_MD5); if (td == MHASH_FAILED) return NULL; if (thread==NULL) return NULL; mhash(td, thread, strlen(thread)); //mhash_deinit(td, hash); hash = (unsigned char*) mhash_end(td); rethash = (char*) calloc(mhash_get_block_size(MHASH_MD5)*2+1, sizeof(char)); for (i = 0; i < mhash_get_block_size(MHASH_MD5); i++) { sprintf(&rethash[i*2], "%.2x", hash[i]); } return rethash; } #endif // HAVE_MHASH_H void Stats::logCmdLine(int argc, char **argv) { FILE *fp = fopen(logfilename.c_str(), "a"); if (fp == NULL) { cerr << "couldn't open file " << logfilename << " for logging of stats\n"; return; } if (argc > 1) { for (int i=1; i lengths; time_t t = time(NULL); tm s = *localtime(&t); int shortest; string hash; FILE *fp = fopen(logfilename.c_str(), "a"); if (fp == NULL) { cerr << "couldn't open file " << logfilename << " for logging of stats\n"; return; } fprintf(fp, "logging job: %s on %s", userParameters->getSeqName().c_str(), asctime(&s)); fprintf(fp, "clustal version: %s\n", userParameters->getRevisionLevel().c_str()); fprintf(fp, "seq type: "); if (userParameters->getDNAFlag()) fprintf(fp, "DNA"); else fprintf(fp, "protein"); fprintf(fp, "\n"); fprintf(fp, "numseqs: %d\n", alnObj->getNumSeqs()); // create a vector of seq lengths for later // and get shortest seq at the same time shortest=alnObj->getLengthLongestSequence(); for (i = 1; i <= alnObj->getNumSeqs(); i++) { int l = alnObj->getSeqLength(i); lengths.push_back(l); if (lgetLengthLongestSequence()); fprintf(fp, "seqlen shortest: %d\n", shortest); fprintf(fp, "seqlen avg: %.2f\n", utilityObject->average(lengths)); fprintf(fp, "seqlen std-dev: %.2f\n", utilityObject->stdDev(lengths)); fprintf(fp, "seqlen median: %.2f\n", utilityObject->median(lengths)); #ifdef HAVE_MHASH_H //hash = concatInputHash(alnObj); //fprintf(fp, "seq hash: %s\n", hash.c_str()); for (int s = 1; s <= alnObj->getNumSeqs(); s++) { string md5 = Md5ForSeq(alnObj, s); fprintf(fp, "md5 for seq %d: %s\n", s, md5.c_str()); } #else fprintf(fp, "md5: disabled\n"); #endif fclose(fp); } /* log some statistics for aligned sequences, i.e. alnObj should hold * the already aligned sequences * */ void Stats::logAlignedSeqStats(Alignment *alnObj) { FILE *fp = fopen(logfilename.c_str(), "a"); if (fp == NULL) { cerr << "couldn't open file " << logfilename << " for logging of stats\n"; return; } // alignment length is the length of any sequence fprintf(fp, "aln len: %d\n", alnObj->getSeqLength(1)); std::vector pwIdents; double lowestPwId = 1.0; double hightestPwId = 0.0; // create vector of pairwise identities for(int s1 = 1; s1 <= alnObj->getNumSeqs(); s1++) { for(int s2 = s1+1; s2 <= alnObj->getNumSeqs(); s2++) { double thisPwId = pairwiseIdentity(alnObj, s1, s2); pwIdents.push_back(thisPwId); if (thisPwId>hightestPwId) hightestPwId=thisPwId; if (thisPwIdgetName(s1).c_str(), alnObj->getName(s2).c_str(), s1, s2, PairwiseIdentity(alnObj, s1, s2)); } } fprintf(fp, "aln pw-id highest: %.2f\n", hightestPwId); fprintf(fp, "aln pw-id lowest: %.2f\n", lowestPwId); fprintf(fp, "aln pw-id avg: %.2f\n", utilityObject->average(pwIdents)); fprintf(fp, "aln pw-id std-dev: %.2f\n", utilityObject->stdDev(pwIdents)); fprintf(fp, "aln pw-id median: %.2f\n", utilityObject->median(pwIdents)); fclose(fp); } } clustalx-2.1/clustalW/general/SquareMat.h0000644000175000017500000000371511470725216020312 0ustar ddineenddineen /** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #include #include #include namespace clustalw { using namespace std; template class SquareMat { public: SquareMat():m_dimRow(0), m_dimCol(0){;} SquareMat(int size) { m_dimRow = size; m_dimCol = size; for (int i=0; i < size; i++) { vector x(size); int y = x.size(); m_2DVector.push_back(x); } } void SetAt(int nRow, int nCol, const T& value) { m_2DVector[nRow][nCol] = value; } T GetAt(int nRow, int nCol) { return m_2DVector[nRow][nCol]; } void GrowRow(int newSize) { if (newSize <= m_dimRow) return; m_dimRow = newSize; for(int i = 0 ; i < newSize - m_dimCol; i++) { vector x(m_dimRow); m_2DVector.push_back(x); } } void GrowCol(int newSize) { if(newSize <= m_dimCol) return; m_dimCol = newSize; for (int i=0; i & operator[](int x) { return m_2DVector[x]; } private: vector< vector > m_2DVector; unsigned int m_dimRow; unsigned int m_dimCol; }; } clustalx-2.1/clustalW/general/SequenceNotFoundException.h0000644000175000017500000000066011470725216023510 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef SEQUENCENOTFOUNDEXCEPTION_H #define SEQUENCENOTFOUNDEXCEPTION_H // standard exceptions #include #include using namespace std; class SequenceNotFoundException: public exception { virtual const char* what() const throw () { return "Could not find sequence with given id\n"; } }; #endif clustalx-2.1/clustalW/general/RandomAccessLList.h0000644000175000017500000001352311470725216021720 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Author: Mark Larkin UCD Conway Institute * * This class is to be used as a linked list type container. * The features are as follows: * 1) It allows efficient deletions anywhere. * 2) It also allows efficient random access. * 3) Its maximum size is fixed at creation. * 4) Items can only be added at the end. * 5) when it is destroyed it will delete the structure, but not the elements. * * Limitations: * It returns the ListElements directly, this is not the best way. A better way would be * to return an iterator of some kind, but this would take alot more work. At the moment * it is possible to have 2 LList objects and get an element from one list, and delete * it from another list. But this is ok for the time being. I will fix this later. */ #ifndef RANDOMACCESSLLIST_H #define RANDOMACCESSLLIST_H namespace clustalw { template class ListElement { public: ListElement() { // Do nothing } ListElement(T* e, ListElement* n, ListElement* p, unsigned int _index) { element = e; next = n; prev = p; index = _index; } ~ListElement() { element = 0; next = 0; } unsigned int getIndex(){return index;} T* element; ListElement* next; ListElement* prev; private: unsigned int index; }; template class RandomAccessLList { public: RandomAccessLList(int size) : maxSize(size), numElements(0) { elements = new ListElement*[size]; for(int i = 0; i < size; i++) { elements[i] = 0; } } ~RandomAccessLList() { for(int i = 0; i < maxSize; i++) { if(elements[i] != 0) { delete elements[i]; } } delete [] elements; } unsigned int getNumElements(){return numElements;} void addElementToBack(T* element) { if(numElements < maxSize) { // Add the element elements[numElements] = new ListElement(element, 0, 0, numElements); if(numElements == 0) { // First element elements[numElements]->next = 0; elements[numElements]->prev = 0; firstElementInList = elements[numElements]; numElements++; } else if(numElements > 0) // If it is not the first element { elements[numElements - 1]->next = elements[numElements]; elements[numElements]->prev = elements[numElements - 1]; elements[numElements]->next = 0; numElements++; } } } ListElement* getAt(int index) { if(index >= 0 && index < maxSize) { return elements[index]; } else { cout << "\nCannot get item : " << index << "\n"; return 0; } } ListElement* getFirst() { return firstElementInList; } void removeItem(ListElement* item) { if(item != 0 ) { int index = item->getIndex(); if(elements[index] != 0) { if(item->prev == 0 && item->next == 0) // Last item in the list { firstElementInList = 0; delete item; // Not sure if this is good or not. item = 0; numElements--; } else if(item->prev == 0) // remove First in the list { firstElementInList = item->next; firstElementInList->prev = 0; item->next = 0; delete item; // Not sure if this is good or not. item = 0; numElements--; } else if(item->next == 0) // remove last item { ListElement* prevElem = item->prev; prevElem->next = 0; item->prev = 0; delete item; // Not sure if this is good or not. item = 0; numElements--; } else // remove middle item { ListElement* prevElem = item->prev; ListElement* nextElem = item->next; prevElem->next = nextElem; nextElem->prev = prevElem; item->prev = 0; item->next = 0; delete item; // Not sure if this is good or not. item = 0; numElements--; } elements[index] = 0; } } else { cout << "ERROR: trying to remove item outside the bounds\n"; } } private: ListElement** elements; ListElement* firstElementInList; int maxSize; unsigned int numElements; }; } #endif clustalx-2.1/clustalW/general/OutputFile.h0000644000175000017500000000155111470725216020504 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef OUTPUTFILE_H #define OUTPUTFILE_H #include #include namespace clustalw { class OutputFile { public: OutputFile(); ~OutputFile(); bool openFile(std::string* fileName, const std::string msg, const std::string* path, const std::string ext, const std::string fileType); bool isOpen(); //void writeToFile(std::string* info); std::ofstream* getPtrToFile(); private: std::string getOutputFileName(const std::string prompt, std::string path, const std::string fileExtension); std::auto_ptr file; std::string typeOfFileMsg; // used for closing message! std::string name; }; } #endif clustalx-2.1/clustalW/general/OutputFile.cpp0000644000175000017500000000455611470725216021047 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "OutputFile.h" #include "utils.h" #include "userparams.h" namespace clustalw { OutputFile::OutputFile() { } OutputFile::~OutputFile() { // If it is open, close it and say that a file has been created!!!!! if(file.get()) { file->close(); utilityObject->info("%s file created: [%s]\n", typeOfFileMsg.c_str(), name.c_str()); } } bool OutputFile::openFile(std::string* fileName, const std::string msg, const std::string* path, const std::string ext, const std::string fileType) { if (fileName->empty()) { *fileName = getOutputFileName(msg, *path, ext); if(fileName->empty()) { return false; } } file.reset(new std::ofstream(fileName->c_str(), std::ofstream::trunc)); if(!file->is_open()) { utilityObject->error("Cannot open output file [%s]\n", fileName->c_str()); return false; } name = *fileName; typeOfFileMsg = fileType; return true; } bool OutputFile::isOpen() { return file->is_open(); } std::ofstream* OutputFile::getPtrToFile() { return file.get(); } std::string OutputFile::getOutputFileName(const std::string prompt, std::string path, const std::string fileExtension) { std::string temp; std::string _fileName; // Will return this name. std::string message; _fileName = path + fileExtension; if(_fileName.compare(userParameters->getSeqName()) == 0) { cerr << "WARNING: Output file name is the same as input file.\n"; if (userParameters->getMenuFlag()) { message = "\n\nEnter new name to avoid overwriting [" + _fileName + "]: "; utilityObject->getStr(message, temp); if(temp != "") { _fileName = temp; } } } else if (userParameters->getMenuFlag()) { message = prompt + " [" + _fileName + "]"; utilityObject->getStr(message, temp); if(temp != "") { _fileName = temp; } } return _fileName; } } clustalx-2.1/clustalW/general/InvalidCombination.cpp0000644000175000017500000000215411470725216022510 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This class will be used to report invalid combinations of residue type and * aligntype. Correct values are either 0 or 1. * Note: It will not be possible for a user to cause this exception, only programmer. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include namespace clustalw { class InvalidCombination { public: InvalidCombination(int alignResidueType, int alignType) : _alignResidueType(alignResidueType), _alignType(alignType) {} void whatHappened(std::ostream &os = std::cerr) { os << "Incorrect Combination of alignResidueType and alignType.\n" << "Values should be 0 or 1\n" << "alignResidueType = " << _alignResidueType << "\n" << "alignType = " << _alignType << "\n"; } private: int _alignResidueType; int _alignType; }; } clustalx-2.1/clustalW/general/DebugLog.h0000644000175000017500000000175211470725216020077 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This class is used to log messages to a file that is specified when the object * is created. * The file is closed when the object is destroyed. The user simply needs to * create a DebugLog * object, and then call the logMsg function whenever they wish to write something to the * file. */ #ifndef DEBUGLOG_H #define DEBUGLOG_H #include #include namespace clustalw { using namespace std; class DebugLog { public: DebugLog(std::string); ~DebugLog(); void logMsg(std::string); void logScore(float x); void printScoreInfo(); private: /* Attributes */ std::string logFileName; std::ofstream* logFile; int numScores; float sumSoFar; float averageScore; float minScore; float maxScore; /* Functions */ DebugLog(); }; } #endif clustalx-2.1/clustalW/general/DebugLog.cpp0000644000175000017500000000310411470725216020423 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "DebugLog.h" #include #include namespace clustalw { DebugLog::DebugLog(std::string _logFileName) : logFileName(_logFileName), logFile(0), numScores(0), sumSoFar(0.0), averageScore(0.0), minScore(0.0), maxScore(0.0) { logFile = new std::ofstream(); logFile->open(logFileName.c_str(), ios::out); if(logFile->is_open()) { std::cout << "Logging debug info to file: " << logFileName << std::endl; } else { std::cerr << "Could not open log file.\n"; } } DebugLog::~DebugLog() { // Release the file! logFile->close(); delete logFile; } void DebugLog::logMsg(std::string msg) { if(logFile->is_open()) { (*logFile) << msg << "\n"; } } void DebugLog::logScore(float x) { if(x < minScore) { minScore = x; } if(x > maxScore) { maxScore = x; } sumSoFar += x; numScores++; } void DebugLog::printScoreInfo() { if(numScores > 0) { averageScore = sumSoFar / static_cast(numScores); ostringstream outs; outs << "SCORE INFO--------------------------------------------------->" << " The score was calculated " << numScores << " times. The average = " << averageScore << "\n" << "The max score=" << maxScore << " The min score=" << minScore << "\n"; logMsg(outs.str()); } } } clustalx-2.1/clustalW/general/ClustalWResources.h0000644000175000017500000000316611470725216022041 0ustar ddineenddineen/** * Implements a singleton that maintains program resources. * The single instance is (re)instantiated on demand like: * Resources *res = Resources::Instance(); * * 24-05-07,Nigel Brown(EMBL): created. * 3-7-07, Mark Larkin, modified this class for clustalw */ #ifndef RESOURCESCLUSTALW_H #define RESOURCESCLUSTALW_H #include using namespace std; namespace clustalw { enum ClustalWResourcePathType { DefaultPath, InstallPath, ExecutablePath, HomePath }; class ClustalWResources { public: /* return the Resources singleton */ static ClustalWResources *Instance(); /* setters */ void setPathToExecutable(std::string pathToFiles); /* getters */ std::string getDefaultPath() { return defaultPath; } std::string getInstallPath() { return installPath; } std::string getExecutablePath() { return executablePath; } std::string getHomePath() { return homePath; } std::string findFile(const char *file, const ClustalWResourcePathType where = DefaultPath) const; std::string findFile(const std::string file, const ClustalWResourcePathType where = DefaultPath) const; std::string searchPathsForFile(const std::string fileName) const; /* debug */ void dump(); protected: /* hide the constructors */ ClustalWResources(); ClustalWResources(const ClustalWResources&); ClustalWResources& operator= (const ClustalWResources&); std::string dirname(std::string path); private: std::string defaultPath; std::string installPath; std::string executablePath; std::string homePath; }; } #endif //RESOURCES_H clustalx-2.1/clustalW/general/ClustalWResources.cpp0000644000175000017500000000640611470725216022374 0ustar ddineenddineen/** * Implements a singleton that maintains program resources. * The single instance is (re)instantiated on demand like: * Resources *res = Resources::Instance(); * * 24-05-07,Nigel Brown(EMBL): created. * 3-7-07, Mark Larkin, modified this class for clustalw */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ClustalWResources.h" #include #include "clustalw.h" #include using namespace std; namespace clustalw { //environment variables static const char *CLUW_INSTALL_DIR = "CLUW_INSTALL_DIR"; //return the sole instance ClustalWResources *ClustalWResources::Instance() { static ClustalWResources instance; return &instance; } ClustalWResources::ClustalWResources() { //defaultPath defaultPath = "."; //executablePath executablePath = "."; //installPath installPath = "."; char *env; if ((env = getenv(CLUW_INSTALL_DIR)) != 0) { installPath = string(env); } homePath = ""; } void ClustalWResources::setPathToExecutable(string path) { executablePath = dirname(path); } string ClustalWResources::dirname(string path) { string tempString; int size = path.size(); tempString = path; for (int i = size - 1; i > 0; i--) { if (tempString[i] == DIRDELIM) // Mark, no standard function in c++ { tempString.erase(i); break; } } return tempString; } void ClustalWResources::dump() { printf("%s => %s [%s]\n%s => %s\n%s => %s\n", "installPath", installPath.c_str(), CLUW_INSTALL_DIR, "executablePath", executablePath.c_str(), "homePath", homePath.c_str() ); } string ClustalWResources::findFile(const char *file, const ClustalWResourcePathType where) const { return findFile(string(file), where); } string ClustalWResources::findFile(const string file, const ClustalWResourcePathType where) const { const string *path; ifstream ifs; switch (where) { case InstallPath: path = &installPath; break; case ExecutablePath: path = &executablePath; break; case HomePath: path = &homePath; break; default: path = &defaultPath; break; } char delim[1]; delim[0] = DIRDELIM; delim[1] = 0; string fileName = *path + string(delim) + file; ifs.open(fileName.c_str(), ifstream::in); if (ifs.fail()) { return string(); } if (ifs.is_open() && ifs.good()) { ifs.close(); return fileName; } return string(); //not found/readable } // Search for a (string) file in a succession of likely locations and // return the full path as (string). // string ClustalWResources::searchPathsForFile(const string fileName) const { string file; while (1) { file = findFile(fileName, InstallPath); if (file != "") break; file = findFile(fileName, ExecutablePath); if (file != "") break; file = findFile(fileName, HomePath); if (file != "") break; file = findFile(fileName); if (file != "") break; file = fileName; // give up break; } return file; } } clustalx-2.1/clustalW/general/Array2D.h0000644000175000017500000000534211470725216017652 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef ARRAY2D_H #define ARRAY2D_H #include #include namespace clustalw { using namespace std; template class Array2D { public: Array2D():m_dimRow(0), m_dimCol(0){;} Array2D(int nRow, int nCol) { m_dimRow = nRow; m_dimCol = nCol; for (int i=0; i < nRow; i++) { vector x(nCol); int y = x.size(); m_2DVector.push_back(x); } } int getRowSize() { return m_dimRow; } int getColSize() { return m_dimCol; } void SetAt(int nRow, int nCol, const T& value) { m_2DVector[nRow][nCol] = value; } T GetAt(int nRow, int nCol) { return m_2DVector[nRow][nCol]; } void GrowRow(int newSize) { if (newSize <= (int)m_dimRow) return; m_dimRow = newSize; for(int i = 0 ; i < newSize - (int)m_dimCol; i++) { vector x(m_dimRow); m_2DVector.push_back(x); } } void GrowCol(int newSize) { if(newSize <= (int)m_dimCol) return; m_dimCol = newSize; for (int i=0; i < (int)m_dimRow; i++) m_2DVector[i].resize(newSize); } void ResizeRect(int row, int col) { GrowRow(row); GrowCol(col); } /* This is to get everything initialised to a value */ void GrowRow(int newSize, const T& value) { if (newSize <= m_dimRow) return; m_dimRow = newSize; for(int i = 0 ; i < newSize - m_dimCol; i++) { vector x(m_dimRow, value); m_2DVector.push_back(x); } } void GrowCol(int newSize, const T& value) { if(newSize <= m_dimCol) return; m_dimCol = newSize; for (int i=0; i & operator[](int x) { return m_2DVector[x]; } private: vector< vector > m_2DVector; unsigned int m_dimRow; unsigned int m_dimCol; }; } #endif clustalx-2.1/clustalW/general/0000755000175000017500000000000011470725216016231 5ustar ddineenddineenclustalx-2.1/clustalW/fileInput/RSFFileParser.h0000644000175000017500000000200611470725216021331 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef RSFFILEPARSER_H #define RSFFILEPARSER_H #include #include "FileParser.h" namespace clustalw { class RSFFileParser : public FileParser { public: /* Functions */ RSFFileParser(string filePath); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ void getRSFFeature(char* line, vector& secStructMask, int length); bool keyword(char *line, const char *code); /* Attributes */ string fileName; }; } #endif clustalx-2.1/clustalW/fileInput/RSFFileParser.cpp0000644000175000017500000002776211470725216021704 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "RSFFileParser.h" namespace clustalw { /** * Constructor sets up the chartab array. * @param filePath */ RSFFileParser::RSFFileParser(string filePath) { fileName = filePath; fillCharTab(); } vector RSFFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // start at the beginning // Need to get the cursor to the begining of the correct sequence. // This will be the case when we get to the seqNum { while (_currentSeqNum != seqNum) { while(*_line != '{') { if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore! { _fileIn->close(); return Sequence(blank, blank, blank); } } ++_currentSeqNum; if(_currentSeqNum == seqNum) // Found the sequence { break; } // Get next line so that we are past the '{' line _fileIn->getline(_line, MAXLINE + 1); } while (!keyword(_line, "name")) { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return Sequence(blank, blank, blank); } } for (i = 5; i <= (int)strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(_sname, _line + i, MAXNAMES); // remember entryname for (i = 0; i <= (int)strlen(_sname); i++) { if (_sname[i] == ' ') { _sname[i] = EOS; break; } } _sname[MAXNAMES] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); // replace blanks with '_' name = string(_sname); while (!keyword(_line, "sequence")) { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return Sequence(blank, blank, blank); } } while (_fileIn->getline(_line, MAXLINE + 1)) { for (i = 0; i <= MAXLINE; i++) { c = _line[i]; if (c == EOS || c == '}') { break; } // EOL if (c == '.') { characterSeq += '-'; } c = chartab[c]; if (c) { characterSeq += c; } } if (c == '}') { break; } } _fileIn->close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { _fileIn->close(); cerr << "There was an exception in the RSFFileParser::getSeq function.\n" << "Need to end program\n"; exit(1); } } /** * count the number of sequences in a GCG RSF alignment file * @return The number of sequences in the file. */ int RSFFileParser::countSeqs() { char _line[MAXLINE + 1]; int numSeqs; try { numSeqs = 0; _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // start at the beginning if(!_fileIn->is_open()) { return 0; // No sequences found! } // skip the comments while (_fileIn->getline(_line, MAXLINE + 1)) { // NOTE needed to change to -1 and -2 (it was -2 and -3) // This is because getline does not put the \n in! if (_line[strlen(_line) - 1] == '.' && _line[strlen(_line) - 2] == '.') { break; } } while (_fileIn->getline(_line, MAXLINE + 1)) { if (*_line == '{') { numSeqs++; } } _fileIn->close(); return numSeqs; } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function RSFFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * Get the secondary structure information from the file. * @param gapPenaltyMask * @param secStructMask * @param secStructName * @param structPenalties * @param length */ void RSFFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { bool guigetss = false; if(userParameters->getProfileNum() == 1 && userParameters->getStructPenalties1()) guigetss = true; if(userParameters->getProfileNum() == 2 && userParameters->getStructPenalties2()) guigetss = true; char _title[MAXLINE + 1]; char _line[MAXLINE + 1]; char _lin2[MAXLINE + 1]; char _sname[MAXNAMES + 1]; int i; _line[0] = EOS; try { secStructMask.clear(); secStructMask.assign(length, '.'); _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // Need to start at begining // skip the comments while (_fileIn->getline(_line, MAXLINE + 1)) { if (_line[strlen(_line) - 1] == '.' && _line[strlen(_line) - 2] == '.') { break; } } // find the start of the sequence entry for (;;) { while (_fileIn->getline(_line, MAXLINE + 1)) if (*_line == '{') { break; } while (!keyword(_line, "name")) { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return; } } for (i = 5; i <= (int)strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(_sname, _line + i, MAXNAMES); // remember entryname for (i = 0; i <= (int)strlen(_sname); i++) { if (_sname[i] == ' ') { _sname[i] = EOS; break; } } _sname[MAXNAMES] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); // replace blanks with '_' // look for secondary structure feature table / gap penalty mask while (_fileIn->getline(_line, MAXLINE + 1)) { if (keyword(_line, "feature")) { if (userParameters->getInteractive() && !userParameters->getGui()) { strcpy(_title, "Found secondary structure in alignment file: "); strcat(_title, _sname); (*_lin2) = utilityObject->promptForYesNo(_title, "Use it to set local gap penalties "); } else { (*_lin2) = 'y'; } if (guigetss || ((*_lin2 != 'n') && (*_lin2 != 'N'))) { structPenalties = SECST; secStructMask.assign(length, '.'); do { if (keyword(_line, "feature")) { getRSFFeature(&_line[7], secStructMask, length); } _fileIn->getline(_line, MAXLINE + 1); } while (!keyword(_line, "sequence")); } else { do { _fileIn->getline(_line, MAXLINE + 1); } while (!keyword(_line, "sequence")); } secStructName = string(_sname); } else if (keyword(_line, "sequence")) { break; } if (structPenalties != NONE) { break; } } } _fileIn->close(); } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function RSFFileParser::getSecStructure()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * get a feature from the file. Called by getSecStructure * @param line * @param secStructMask * @param length */ void RSFFileParser::getRSFFeature(char* line, vector& secStructMask, int length) { char c, s; char str1[MAXLINE + 1], str2[MAXLINE + 1], feature[MAXLINE + 1]; int i, tmp, startPos, endPos; try { if (sscanf(line, "%d%d%d%s%s%s", &startPos, &endPos, &tmp, str1, str2, feature) != 6) { return; } if (strcmp(feature, "HELIX") == 0) { c = 'A'; s = '$'; } else if (strcmp(feature, "STRAND") == 0) { c = 'B'; s = '%'; } else { return ; } if (startPos >= length || endPos >= length) { return ; } secStructMask[startPos - 1] = s; for (i = startPos; i < endPos - 1; i++) { secStructMask[i] = c; } secStructMask[endPos - 1] = s; } catch(...) { cerr << "An exception has occured in the function RSFFileParser::getRSFFeature()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * keyword checks if code is on the line! * @param line * @param code * @return */ bool RSFFileParser::keyword(char *line, const char *code) { int i; char key[MAXLINE]; for (i = 0; !isspace(line[i]) && line[i] != EOS; i++) { key[i] = line[i]; } key[i] = EOS; return (strcmp(key, code) == 0); } } clustalx-2.1/clustalW/fileInput/PearsonFileParser.h0000644000175000017500000000175511470725216022320 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This file is for parsing pearson format files. * CHANGE: */ #ifndef PEARSONFILEPARSER_H #define PEARSONFILEPARSER_H #include #include "FileParser.h" namespace clustalw { using namespace std; class PearsonFileParser : public FileParser { public: /* Functions */ PearsonFileParser(string filePath); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ /* Attributes */ string fileName; }; } #endif clustalx-2.1/clustalW/fileInput/PearsonFileParser.cpp0000644000175000017500000003236711470725216022656 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * Mark 24-1-2007. I am now using the char "delimiter" for the delimiter when using * getline. This is to get around the problem of some files having '\r'. * * 10-02-07,Nigel Brown(EMBL): Changed ifstream to InFileStream to handle * cross-platform end-of-lines and removed delimiter member. * * 28-12-07,Paul McGettigan : replaced array processing with string processing this fixes bug #72 * * 9-2-2008, Paul McGettigan : fixed problem where space after '>' but before sequence name was causing * alignment to fail due to no sequence name being read in * 15-2-2008, Paul McGettigan : fixed bug 91 where Pseudo -FASTA format files were not being processed as * previously in v1.83 */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "PearsonFileParser.h" namespace clustalw { /** * Constructor for the Pearson file parser. * @param filePath * @return */ PearsonFileParser::PearsonFileParser(string filePath) { fileName = filePath; fillCharTab(); } /** * reads fasta/pearson file in one go instead of calling getSeq for * each single sequence. * * FIXME AW: only PearsonFileParser::getSeqRange is special, rest is the * same. should be defined in FileParser and then overloaded in special * cases like here */ vector PearsonFileParser::getSeqRange(int firstSeq, int nSeqsToRead, string *offendingSeq) { string characterSeq = ""; string name = ""; string title = ""; string blank = ""; string greater = ">"; //_line[0] = EOS; vector seqRangeVector; string line; //int i, j; int nSeqsRead = 0; unsigned char c; char delim; int _currentSeqNum = 0; // Not at any sequence yet! try { delim=FileParser::getDelimiter(fileName); //cout << "delim = " << delim << endl; ifstream _fileIn; _fileIn.open(fileName.c_str(),ios::in); // Read in lines until we get to the begining of sequence firstSeq. string line=""; do { std::getline(_fileIn,line,delim); if(line.substr(0,1) == greater){ _currentSeqNum++; } } while(_currentSeqNum ' and read up to first ' ' or MAXNAMES // remove the first char i.e. '>' name=line.substr(1,MAXNAMES); //if(name.find(">") != string::npos){ // andreas wilm: exit if angle bracket within header? //} while(name.substr(0,1)==" "){ name=name.substr(1,MAXNAMES); } //int i; //i = name.find(" "); if(name.find(" ") != string::npos){ name=name.substr(0,name.find(" ")); } utilityObject->rTrim(&name); // also replaces linef name=utilityObject->blankToUnderscore(name); // replace blanks with '_' // Read in lines until we get to the begining of next sequence. title = ""; // No title information while(std::getline(_fileIn,line,delim) ){ string::const_iterator iterator1 = line.begin(); while(iterator1 != line.end()){ // Andreas Wilm (UCD): exit if angle brackets within sequence if(*iterator1=='>' && iterator1!=line.begin()) { /* error output handled in Clustal.cpp cerr << "\nMultiple angle brackets inside sequence found:" << " invalid format.\n" << "Maybe you forgot a linebreak between sequences?\n"; */ parseExitCode=BADFORMAT; _fileIn.close(); seqRangeVector.clear(); return seqRangeVector; } if(*iterator1 =='\n' || *iterator1 =='\r' || *iterator1 == EOS || *iterator1 =='>'){ break; } c = *iterator1; c = chartab[c]; if(c){ characterSeq.append(1,c); } iterator1++; } if(*iterator1 == '>'){ break; } } // check sequence if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { /* error output handled in Clustal.cpp */ parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); _fileIn.close(); seqRangeVector.clear(); return seqRangeVector; } else if (characterSeq.length() == 0) { parseExitCode=EMPTYSEQUENCE; if (offendingSeq!=NULL) offendingSeq->assign(name); _fileIn.close(); seqRangeVector.clear(); return seqRangeVector; } seqRangeVector.push_back(Sequence(characterSeq, name, title)); characterSeq = ""; nSeqsRead++; } // while (nSeqsRead < nSeqsToRead) _fileIn.close(); return seqRangeVector; } catch(...) { cerr << "There was an exception in the PearsonFileParser::getSeqRange function.\n" << "Need to end program\n"; exit(1); } } /** * The function getSeq is used to get the sequence 'seqNum' in the file. It returns a * sequence object containing the sequence. * Deprecated: where possible use faster getSeqRange which reads * sequences in one go * @param seqNum The number of the sequence to get. * @return */ Sequence PearsonFileParser::getSeq(int seqNum, string *offendingSeq) { //char _line[MAXLINE + 1]; //char tseq[MAXLINE + 1]; //char sname[MAXNAMES + 1]; //sname [MAXNAMES] = '\0'; string characterSeq = ""; string name = ""; string title = ""; string blank = ""; string greater = ">"; //_line[0] = EOS; string line; cerr << "Use of PearsonFileParser::getSeq is deprecated!\n"; //int i, j; unsigned char c; char delim; int _currentSeqNum = 0; // Not at any sequence yet! try { /* _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // start at the beginning */ delim=FileParser::getDelimiter(fileName); //cout << "delim = " << delim << endl; ifstream _fileIn; _fileIn.open(fileName.c_str(),ios::in); ////////////////////////////////////////////////// //PMcG replace char array with string processing ////////////////////////////////////////////////// // Read in lines until we get to the begining of sequence seqNum. string line=""; do { std::getline(_fileIn,line,delim); if(line.substr(0,1) == greater){ _currentSeqNum++; } } while(_currentSeqNum ' and read up to first ' ' or MAXNAMES // remove the first char i.e. '>' name=line.substr(1,MAXNAMES); ////////////////////////////////////// // PMcG 9-2-2008 need to handle spaces at start of sequence name to conform to 1.83 handling ////////////////////////////////////// while(name.substr(0,1)==" "){ name=name.substr(1,MAXNAMES); } //int i; //i = name.find(" "); if(name.find(" ") != string::npos){ name=name.substr(0,name.find(" ")); } name=utilityObject->blankToUnderscore(name); // replace blanks with '_' // Read in lines until we get to the begining of sequence seqNum. /* PMcG replace char array with string processing while (_currentSeqNum != seqNum) { while(*_line != '>') { if(!_fileIn->getline(_line, MAXLINE + 1)) { freeFileResources(_fileIn); return Sequence(blank, blank, blank); } } ++_currentSeqNum; if(_currentSeqNum == seqNum) // Found the sequence { break; } // Get next line so that we are past the '>' line _fileIn->getline(_line, MAXLINE + 1); } // line contains the name of the sequence for (i = 1; i <= strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(sname, _line + i, MAXNAMES); // remember entryname for (i = 1; i <= strlen(sname); i++) { if (sname[i] == ' ') { break; } } sname[i] = EOS; utilityObject->rTrim(sname); utilityObject->blankToUnderscore(sname); // replace blanks with '_' name = string(sname); */ title = ""; // No title information string seqLine = ""; while(std::getline(_fileIn,seqLine,delim) ){ string::const_iterator iterator1 = seqLine.begin(); while(iterator1 != seqLine.end()){ if(*iterator1 =='\n' || *iterator1 =='\r' || *iterator1 == EOS || *iterator1 =='>'){ break; } c = *iterator1; c = chartab[c]; // PMcG 15-02-2008 bug 91 // strip out spaces and numbers from pseudo_fasta files // but need to maintain gaps if present in sequence input // to replicate behaviour of v1.83 //if(*iterator1 != ' ' && !isdigit(*iterator1)){ if(c){ characterSeq.append(1,c); } iterator1++; } if(*iterator1 == '>'){ break; } } /* while (_fileIn->getline(_line, MAXLINE + 1)) { for (i = 0; i <= MAXLINE; i++) { c = _line[i]; if (c == '\n' || c == EOS || c == '>') { break; } c = chartab[c]; if (c) { characterSeq += c; } } if (c == '>') { break; } } */ _fileIn.close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; // return empty seq return Sequence(blank, blank, blank); } else if (characterSeq.length() == 0) { parseExitCode=EMPTYSEQUENCE; // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { cerr << "There was an exception in the PearsonFileParser::getSeq function.\n" << "Need to end program\n"; exit(1); } } /** * The function countSeqs, counts the number of sequences in a file. * @return The number of sequences in the file. */ int PearsonFileParser::countSeqs() { //char line[1000 + 1]; int _nseqs = 0; string line2; char delim; try { //_fileIn = new InFileStream; //nige //_fileIn->open(fileName.c_str()); //nige delim=FileParser::getDelimiter(fileName); ifstream _fileIn; _fileIn.open(fileName.c_str(),ios::in); if(!_fileIn.is_open()) { return 0; // No sequences found! } /* while ((*_fileIn) >> line2/@_fileIn->getline(line, 1000 + 1)@/) { /@if(_nseqs == 50) { cout << "\n\n" << line << "\n\n"; exit(1); }@/ */ while (std::getline(_fileIn,line2,delim)) { if (line2[0] == '>') { _nseqs++; } } _fileIn.close(); return _nseqs; } catch(...) { freeFileResources(_fileIn); cerr << "An exception has occured in the function PearsonFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * There is no secondary structure information in the Pearson file. This is here to * set the structPenalties to NONE. * @param gapPenaltyMask * @param secStructMask * @param secStructName * @param structPenalties * @param length */ void PearsonFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { structPenalties = NONE; } } clustalx-2.1/clustalW/fileInput/PIRFileParser.h0000644000175000017500000000160211470725216021332 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef PIRFILEPARSER_H #define PIRFILEPARSER_H #include #include "FileParser.h" namespace clustalw { class PIRFileParser : public FileParser { public: /* Functions */ PIRFileParser(string filePath); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ /* Attributes */ string fileName; }; } #endif clustalx-2.1/clustalW/fileInput/PIRFileParser.cpp0000644000175000017500000001737011470725216021676 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "PIRFileParser.h" namespace clustalw { /** * PIRFileParser contructor sets up the chartab array. * @param filePath */ PIRFileParser::PIRFileParser(string filePath) { fileName = filePath; fillCharTab(); } /* * get range of sequences */ vector PIRFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // Read in lines until we get to the begining of sequence seqNum. while (_currentSeqNum != seqNum) { while(*_line != '>') { if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore! { _fileIn->close(); return Sequence(blank, blank, blank); } } ++_currentSeqNum; if(_currentSeqNum == seqNum) // Found the sequence { break; } // Get next line so that we are past the '>' line _fileIn->getline(_line, MAXLINE + 1); } // line contains the name of the sequence for (i = 4; i <= (int)strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(_sname, _line + i, MAXNAMES); // remember entryname _sname[MAXNAMES] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); // replace blanks with '_' name = string(_sname); _fileIn->getline(_line, MAXLINE + 1); strncpy(_title, _line, MAXTITLES); _title[MAXTITLES] = EOS; i = strlen(_title); if (_title[i - 1] == '\n') { _title[i - 1] = EOS; } title = string(_title); while (_fileIn->getline(_line, MAXLINE + 1)) { for (i = 0; i <= MAXLINE; i++) { c = _line[i]; if (c == '\n' || c == EOS || c == '*') { break; } c = chartab[c]; if (c) { characterSeq += c; } } if (c == '*') { break; } } _fileIn->close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { _fileIn->close(); cerr << "There was an exception in the PIRFileParser::getSeq function.\n" << "Need to end program\n"; exit(1); } } /** * The function countSeqs finds the number of sequences in the file and returns it. * @return The number of sequences in the file. */ int PIRFileParser::countSeqs() { char line[MAXLINE + 1], c; line[0] = EOS; int numSeqs, i; bool seqOk; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige if(!_fileIn->is_open()) { return 0; // No sequences found! } // Get to begining of sequences! while (_fileIn->getline(line, MAXLINE + 1)) { if (!utilityObject->blankLine(line)) { break; } } // Now check the 1st sequence to make sure it ends with * seqOk = false; while (_fileIn->getline(line, MAXLINE + 1)) { // Look for end of first seq if (*line == '>') { break; } for (i = 0; seqOk == false; i++) { c = line[i]; if (c == '*') { seqOk = true; // ok - end of sequence found break; } // EOL if (c == '\n' || c == EOS) { break; } // EOL } if (seqOk == true) { break; } } if (seqOk == false) { _fileIn->close(); utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n"); return 0; // funny format } numSeqs = 1; while (_fileIn->getline(line, MAXLINE + 1)) { if (*line == '>') { // Look for start of next seq seqOk = false; while (_fileIn->getline(line, MAXLINE + 1)) { // Look for end of seq if (*line == '>') { _fileIn->close(); utilityObject->error("PIR format sequence end marker '*'\nmissing for one or more sequences.\n"); return 0; // funny format } for (i = 0; seqOk == false; i++) { c = line[i]; if (c == '*') { seqOk = true; // ok - sequence found break; } if (c == '\n' || c == EOS) { break; } } if (seqOk == true) { numSeqs++; break; } } } } _fileIn->close(); return numSeqs; } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function PIRFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * There is no secondary structure information in PIR files! * @param gapPenaltyMask * @param secStructMask * @param secStructName * @param structPenalties * @param length */ void PIRFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { structPenalties = NONE; } } clustalx-2.1/clustalW/fileInput/MSFFileParser.h0000644000175000017500000000160211470725216021325 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef MSFFILEPARSER_H #define MSFFILEPARSER_H #include #include "FileParser.h" namespace clustalw { class MSFFileParser : public FileParser { public: /* Functions */ MSFFileParser(string filePath); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ /* Attributes */ string fileName; }; } #endif clustalx-2.1/clustalW/fileInput/MSFFileParser.cpp0000644000175000017500000001501311470725216021661 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "MSFFileParser.h" namespace clustalw { /** * MSFFileParser contructor sets up the chartab array. * @param filePath * @return */ MSFFileParser::MSFFileParser(string filePath) { fileName = filePath; fillCharTab(); } vector MSFFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); for (i = 0;; i++) { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return Sequence(blank, blank, blank); } // read the title if (utilityObject->lineType(_line, "//")) { break; } // lines...ignore } while (_fileIn->getline(_line, MAXLINE + 1)) { if (!utilityObject->blankLine(_line)) { for (i = 1; i < seqNum; i++) { _fileIn->getline(_line, MAXLINE + 1); } for (j = 0; j <= (int)strlen(_line); j++) { if (_line[j] != ' ') { break; } } for (k = j; k <= (int)strlen(_line); k++) { if (_line[k] == ' ') { break; } } // Get the name of the sequence strncpy(_sname, _line + j, utilityObject->MIN(MAXNAMES, k - j)); _sname[utilityObject->MIN(MAXNAMES, k - j)] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); name = string(_sname); for (i = k; i <= MAXLINE; i++) { c = _line[i]; if (c == '.' || c == '~') { c = '-'; } if (c == '*') { c = 'X'; } if (c == '\n' || c == EOS) { break; } // EOL c = chartab[c]; if (c) { characterSeq += c; } } for (i = 0;; i++) { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return Sequence(characterSeq, name, title); } if (utilityObject->blankLine(_line)) { break; } } } } _fileIn->close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title);; } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function MSFFileParser::getSeq()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * The function countSeqs counts the number of sequences in the file. * @return The number of sequences in the file. */ int MSFFileParser::countSeqs() { char _line[MAXLINE + 1]; int _numSeqs; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige if(!_fileIn->is_open()) { return 0; // No sequences found! } while (_fileIn->getline(_line, MAXLINE + 1)) { if (utilityObject->lineType(_line, "//")) { break; } } while (_fileIn->getline(_line, MAXLINE + 1)) { if (!utilityObject->blankLine(_line)) { break; } // Look for next non- blank line } _numSeqs = 1; while (_fileIn->getline(_line, MAXLINE + 1)) { if (utilityObject->blankLineNumericLabel(_line)) { _fileIn->close(); return _numSeqs; } _numSeqs++; } return 0; // if you got to here-funny format/no seqs. } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function MSFFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /** * There is no secondary structure information in MSF files. Set structPenalties to NONE. * @param gapPenaltyMask * @param secStructMask * @param secStructName * @param structPenalties * @param length */ void MSFFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { structPenalties = NONE; } } clustalx-2.1/clustalW/fileInput/InFileStream.h0000644000175000017500000000243711470725216021254 0ustar ddineenddineen/** * Author: Nigel Brown * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * InFileStream subclasses std::ifstream, adding a check for the end-of-line * character convention in the input file. This is then used by the getline() * member as the line delimiter, unless the caller supplies an explicit * delimiter. * * Created: 09-02-07,Nigel Brown(EMBL) ***************************************************************************/ #ifndef INFILESTREAM_H #define INFILESTREAM_H #include #include #include #include class InFileStream : public std::ifstream { public: InFileStream(); InFileStream(const char *filename); //- InFileStream(const InFileStream ©); void open(const char *filename); void close(); //int get(); //bool is_open(); std::istream& getline(char *s, std::streamsize n);/*{return ifstream::getline(s, n, delim);}*/ std::istream& getline(char *s, std::streamsize n, char delim); /*{ return ifstream::getline(s, n, delim); }*/ protected: char findDelimiter(); private: //disable copy-constructor InFileStream(const InFileStream ©); std::string filename; //auto_ptr inFile; char delim; }; #endif //INFILESTREAM_H clustalx-2.1/clustalW/fileInput/InFileStream.cpp0000644000175000017500000000715111470725216021605 0ustar ddineenddineen/** * Author: Nigel Brown * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * InFileStream subclasses std::ifstream, adding a check for the end-of-line * character convention in the input file. This is then used by the getline() * member as the line delimiter, unless the caller supplies an explicit * delimiter. * * Note: This is an ugly workaround; at present various operations repeatedly * construct/destruct an instance and open/close a sequence file up to 12 * times! A cleaner class will probably derive this class from something like * 'istream' aggregating a 'filebuf' under control of istream::seekg(). * * Created: 09-02-07,Nigel Brown(EMBL) * * Changes: * Mark Larkin 13-2-07: I removed the dynamic cast from the getline functions. ***************************************************************************/ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include "InFileStream.h" using namespace std; const char LF = 0x0a; //linefeed const char CR = 0x0d; //carriage return InFileStream::InFileStream() : ifstream() { delim = '\n'; // default //cout << "InFileStream() constructor 1" << endl; } InFileStream::InFileStream(const char *filename) : ifstream(filename, ios::in), filename(filename) { //cout << "InFileStream(f) constructor 2" << endl; delim = findDelimiter(); } //- copy-constructor: can't copy superclass private members //- InFileStream::InFileStream(const InFileStream ©) : //- ifstream(static_cast(copy)) //- { //- cout << "InFileStream() constructor 3" << endl; //- delim = copy.delim; //- } void InFileStream::open(const char *filename) { this->filename = filename; ifstream::open(filename, ios::in); if (ifstream::fail()) return; delim = findDelimiter(); } //not necessary, but for symmetry to open() void InFileStream::close() { ifstream::close(); } //getline with stored delimiter std::istream& InFileStream::getline(char *s, streamsize n) { return ifstream::getline(s, n, delim); } //getline with caller supplied delimiter std::istream& InFileStream::getline(char *s, streamsize n, char delim) { return ifstream::getline(s, n, delim); } /** * Mark 24-1-2007. I added the function findDelimiter to determine if '\r' or * '\n' will be used as the line delimiter when parsing the file. * * 25-01-07,Nigel Brown(EMBL): changed body of loop to check successive chars * in case of DOS/Windows * * 09-02-07,Nigel Brown(EMBL): moved member into new InFileStream subclassed * from std::ifstream, so this is called automatically for any file reader * that uses InFileStream in place of std::ifstream. Replaced if/then/else * with switch. */ char InFileStream::findDelimiter() { ifstream in; int type = 0; in.open(filename.c_str(), ios::in); if (in.fail()) return delim; in.seekg(0, ios::beg); //look for CR or LF or CRLF (or LFCR) if (in.is_open()) { char c; while (in.get(c)) { if (c == CR) type |= 1; else if (c == LF) type |= 2; else if (type) break; } } in.close(); switch (type) { case 1: //cout << "file is Mac System 9" << endl; delim = '\r'; break; case 2: //cout << "file is UNIX" << endl; delim = '\n'; break; case 3: //cout << "file is DOS" << endl; delim = '\n'; break; default: //short or empty file //cout << "file is UNIX (default)" << endl; delim = '\n'; } return delim; } clustalx-2.1/clustalW/fileInput/GDEFileParser.h0000644000175000017500000000155711470725216021310 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef GDEFILEPARSER_H #define GDEFILEPARSER_H #include #include "FileParser.h" namespace clustalw { class GDEFileParser : public FileParser { public: /* Functions */ GDEFileParser(string filePath); ~GDEFileParser(); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ /* Attributes */ string fileName; }; } #endif clustalx-2.1/clustalW/fileInput/GDEFileParser.cpp0000644000175000017500000003057711470725216021647 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "GDEFileParser.h" namespace clustalw { /* * Constructor sets up the chartab array. * */ GDEFileParser::GDEFileParser(string filePath) { fileName = filePath; fillCharTab(); } /* * Nothing to do in destruction of object. */ GDEFileParser::~GDEFileParser() { } vector GDEFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); bool dnaFlagSet = userParameters->getDNAFlag(); while (_currentSeqNum != seqNum) { while((*_line != '#' && dnaFlagSet) || (*_line != '%' && !dnaFlagSet)) { if(!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return Sequence(blank, blank, blank); } } ++_currentSeqNum; if(_currentSeqNum == seqNum) // Found the sequence { break; } // Get next line so that we are past the '#' or '%' line _fileIn->getline(_line, MAXLINE + 1); //nige } for (i = 1; i <= MAXNAMES; i++) { if (_line[i] == '(' || _line[i] == '\n' || _line[i] == '\r') { break; } _sname[i - 1] = _line[i]; } i--; _sname[i] = EOS; for (i--; i > 0; i--) { if (isspace(_sname[i])) { _sname[i] = EOS; } else { break; } } utilityObject->blankToUnderscore(_sname); name = string(_sname); title = ""; while (_fileIn->getline(_line, MAXLINE + 1)) { if (*_line == '%' || *_line == '#' || *_line == '"') { break; } for (i = 0; i <= MAXLINE; i++) { c = _line[i]; if (c == '\n' || c == EOS) { break; } c = chartab[c]; if (c) { characterSeq += c; } } } _fileIn->close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { _fileIn->close(); cerr << "There was an exception in the GDEnFileParser::getSeq function.\n" << "Need to end program\n"; exit(1); } } /* * The countSeqs function returns the number of sequences in the file. */ int GDEFileParser::countSeqs() { char line[MAXLINE + 1]; int _nseqs = 0; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige if(!_fileIn->is_open()) { return 0; // No sequences found! } while (_fileIn->getline(line, MAXLINE + 1)) { if ((*line == '%') && (userParameters->getDNAFlag() == false)) { _nseqs++; } else if ((*line == '#') && (userParameters->getDNAFlag() == true)) { _nseqs++; } } _fileIn->close(); return _nseqs; } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function GDEFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /* * getSecStructure gets the secondary structure from the file. */ void GDEFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { bool guigetss = false; if(userParameters->getProfileNum() == 1 && userParameters->getStructPenalties1()) guigetss = true; if(userParameters->getProfileNum() == 2 && userParameters->getStructPenalties2()) guigetss = true; char _title[MAXLINE + 1]; char _line[MAXLINE + 1]; char _lin2[MAXLINE + 1]; char _sname[MAXNAMES + 1]; int i, len, offset = 0; unsigned char c; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // NOTE I think I should empty the masks before pushing onto them! gapPenaltyMask.clear(); secStructMask.clear(); for (;;) { _line[0] = '\0'; // search for the next comment line while (*_line != '"') { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return; } } // is it a secondary structure entry? if (strncmp(&_line[1], "SS_", 3) == 0) { for (i = 1; i <= MAXNAMES - 3; i++) { if (_line[i + 3] == '(' || _line[i + 3] == '\n' || _line[i + 3] == '\r') { break; } _sname[i - 1] = _line[i + 3]; } i--; _sname[i] = EOS; // NOTE NOTE NOTE // Is it possible for this to be executed???????????????? // if _line contains ( then we break and dont put it into _sname // So how can sname have it??????? if (_sname[i - 1] == '(') { sscanf(&_line[i + 3], "%d", &offset); } else { offset = 0; } for (i--; i > 0; i--) { if (isspace(_sname[i])) { _sname[i] = EOS; } else { break; } } utilityObject->blankToUnderscore(_sname); secStructName = string(_sname); if (userParameters->getInteractive() && !userParameters->getGui()) { strcpy(_title, "Found secondary structure in alignment file: "); strcat(_title, _sname); (*_lin2) = utilityObject->promptForYesNo(_title, "Use it to set local gap penalties "); } else { (*_lin2) = 'y'; } if (guigetss || ((*_lin2 != 'n') && (*_lin2 != 'N'))) { structPenalties = SECST; for (i = 0; i < length; i++) { secStructMask.push_back('.'); } len = 0; while (_fileIn->getline(_line, MAXLINE + 1)) { if (*_line == '%' || *_line == '#' || *_line == '"') { break; } for (i = offset; i < length; i++) { c = _line[i]; if (c == '\n' || c == EOS) { break; } // EOL secStructMask[len++] = c; } if (len >= length) // NOTE i put in >= { break; } } } } // or is it a gap penalty mask entry? else if (strncmp(&_line[1], "GM_", 3) == 0) { for (i = 1; i <= MAXNAMES - 3; i++) { if (_line[i + 3] == '(' || _line[i + 3] == '\n') { break; } _sname[i - 1] = _line[i + 3]; } i--; _sname[i] = EOS; // NOTE NOTE // Again I dont think it is possible for _sname to have ( !!!! if (_sname[i - 1] == '(') { sscanf(&_line[i + 3], "%d", &offset); } else { offset = 0; } for (i--; i > 0; i--) { if (isspace(_sname[i])) { _sname[i] = EOS; } else { break; } } utilityObject->blankToUnderscore(_sname); secStructName = string(_sname); if (userParameters->getInteractive() && !userParameters->getGui()) { strcpy(_title, "Found gap penalty mask in alignment file: "); strcat(_title, _sname); (*_lin2) = utilityObject->promptForYesNo(_title, "Use it to set local gap penalties "); } else { (*_lin2) = 'y'; } if (guigetss || ((*_lin2 != 'n') && (*_lin2 != 'N'))) { structPenalties = GMASK; for (i = 0; i < length; i++) { gapPenaltyMask.push_back('1'); } len = 0; while (_fileIn->getline(_line, MAXLINE + 1)) { if (*_line == '%' || *_line == '#' || *_line == '"') { break; } for (i = offset; i < length; i++) { c = _line[i]; if (c == '\n' || c == EOS) { break; } // EOL gapPenaltyMask[len++] = c; } if (len >= length) // NOTE I put in >= { break; } } } } if (structPenalties != NONE) { break; } } _fileIn->close(); } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function GDEFileParser::getSecStructure()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } } clustalx-2.1/clustalW/fileInput/FileReader.h0000644000175000017500000000264311470725216020733 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef FILEREADER_H #define FILEREADER_H #include #include #include #include "../alignment/Alignment.h" #include "../alignment/Sequence.h" #include "../general/userparams.h" #include "../general/utils.h" #include "FileParser.h" #include "ClustalFileParser.h" #include "PearsonFileParser.h" #include "PIRFileParser.h" #include "GDEFileParser.h" #include "MSFFileParser.h" #include "RSFFileParser.h" #include "EMBLFileParser.h" namespace clustalw { class FileReader { public: /* Functions */ FileReader(); ~FileReader(); int seqInput(Alignment* alignPtr, bool append, string *offendingSeq); int readSeqs(Alignment* alignPtr, int firstSeq, string *offendingSeq); int profileInput(Alignment* alignPtr); /* Attributes */ private: /* Functions */ void checkInfile(int* nseqs, auto_ptr& fileParser); /* Attributes */ string sequenceFileName; bool noEmptySequence(vector seqRangeVector, string *offendingSeq); InFileStream* fileIn; int structPenalties; string secStructName; vector secStructMask; // Will need to be cleared out before every reading! vector gapPenaltyMask; vector formatNames; }; } #endif clustalx-2.1/clustalW/fileInput/FileReader.cpp0000644000175000017500000004116411470725216021267 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. * * 23-03-07,Nigel Brown(EMBL): added call to Alignment::testUniqueNames() to * test sequence names prior to appending; moved * Alignment::checkAllNamesDifferent() test into block handling loading of new * sequences. * * 02-04-07,Nigel Brown(EMBL): commented out the "No sequences in file..." * warnings in seqInput() and profileInput()and enabled the * higher-up-the-stack counterpart in MainWindow::errorHandler(), since the * former was causing a crash during the repaint after the user accepts the * message, because some state (what exactly?) is unstable at this depth after * a sequence load error, specifically when loading an empty file after * already loading some sequences. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "FileReader.h" namespace clustalw { FileReader::FileReader() { fileIn = new InFileStream; structPenalties = NONE; // I think this should be ok. } FileReader::~FileReader() { delete fileIn; } /* check if we've read sequences without any information, i.e. header * but no sequence at all */ bool FileReader::noEmptySequence(vector seqVector, string *offendingSeq) { vector::iterator si; for (si = seqVector.begin(); si != seqVector.end(); si++) { if (si->isEmpty()) { offendingSeq->assign(si->getName()); return false; } } return true; } /* * The function seqInput is called from the interactive menu only. This is because it * allows us to append seqs. But this would not happen on command line. * It is called twice in the interactive menu, both times with append as false. It calls * the readseqs function to do the work. */ int FileReader::seqInput(Alignment *alignPtr, bool append, string *offendingSeq) { int code; if(userParameters->getMenuFlag()) { cout << "\n\nSequences should all be in 1 file.\n"; cout << "\n7 formats accepted: \n"; cout << "NBRF/PIR, EMBL/SwissProt, Pearson (Fasta), GDE, Clustal, GCG/MSF, \ RSF.\n\n\n"; } if (append) { int numSeqsAlready = alignPtr->getNumSeqs(); code = readSeqs(alignPtr, numSeqsAlready + 1, offendingSeq); } else { code = readSeqs(alignPtr, 1, offendingSeq); // 1 is the first seq to be read } if(code == OK) { userParameters->setStructPenalties1(false); userParameters->setStructPenalties2(false); alignPtr->clearSecStruct1(); alignPtr->clearSecStruct2(); string typeOfAlign = userParameters->getDNAFlag() ? "DNA" : "PROTEIN"; cout << "Sequences assumed to be " << typeOfAlign << endl; if (userParameters->getMenuFlag()) { cout << "\n\n"; alignPtr->printSequencesAddedInfo(); } if(userParameters->getDNAFlag()) { userParameters->setDNAMultiGap(); } else { userParameters->setProtMultiGap(); } userParameters->setEmpty(false); } else if(code == NOSEQUENCESINFILE) // no sequences { /* 02-04-07,nige: this causes a fatal repaint: let * MainWindow::errorHandler deal with it. */ //utilityObject->error("No sequences in file! Bad format?\n"); return code; } else { return code; } return code; } /* * The function readSeqs will create the FileParser that is needed and * it will read in all of the sequences. Depending on the filetype, it * will also check for secondary structure information. * * If there is a problem with one of the sequences its name will be * assigned to offendingSeq */ int FileReader::readSeqs(Alignment *alignPtr, int firstSeq, string *offendingSeq) { string line; string fileName; string linuxFilePath = "file://"; // static char *seq1, sname1[MAXNAMES + 1], title[MAXTITLES + 1]; //int i, j; int noSeqs; //static int l1; static bool DNAFlag1; vector seqVector; vector seqRangeVector; auto_ptr fileParser; // Means we dont need to delete it! vector _outputIndex; // Local version of outputIndex. if (userParameters->getMenuFlag()) { utilityObject->getStr(string("Enter the name of the sequence file "), line); } else { line = userParameters->getSeqName(); } if (line.length() == 0) { // We have no fileName, so return -1 return -1; } // If we have file:// in the string, remove it!! string::size_type loc = line.find(linuxFilePath, 0); if(loc != string::npos) { fileName = line.substr(loc + linuxFilePath.length()); line = fileName; } // Now we have the file name, we must open the file. fileIn->open(line.c_str()); //nige if (fileIn->fail()) return CANNOTOPENFILE; sequenceFileName = line; // Check if the file exists! if (!fileIn->is_open()) { utilityObject->error("Cannot open input file (%s)\n", line.c_str()); return CANNOTOPENFILE; } userParameters->setSeqName(line); // NOTE I made a change here because the profile2Name was not stored! if(userParameters->getProfileNum() == 2 && userParameters->getProfile2Name().empty()) { userParameters->setProfile2Name(line); } else if(userParameters->getProfileNum() == 1 && userParameters->getProfile1Name().empty()) { userParameters->setProfile1Name(line); } noSeqs = 0; checkInfile(&noSeqs, fileParser); if (noSeqs == 0) { return NOSEQUENCESINFILE; } seqRangeVector = fileParser->getSeqRange(1, noSeqs, offendingSeq); if (seqRangeVector.size()==0) return fileParser->getParseExitCode(); // FIXME Andreas Wilm (UCD): noEmptySequence check should be done // internally by FileParser instances if (noEmptySequence(seqRangeVector, offendingSeq) == false) { return EMPTYSEQUENCE; // Error there are same names. } for (int i=0; i < (int)seqRangeVector.size(); i++) { // Andreas Wilm (UCD): fixed wrong default output order // which is important when no alignment (convert!) is done // _outputIndex.push_back(i); // default output order _outputIndex.push_back(i+1); // default output order Sequence tempSeq = seqRangeVector[i]; if (!userParameters->getExplicitDNAFlag()) { DNAFlag1 = tempSeq.checkDNAFlag(); // check DNA/Prot if (i == 1) { userParameters->setDNAFlag(DNAFlag1); } } // type decided by first seq else { DNAFlag1 = userParameters->getDNAFlag(); } seqVector.push_back(tempSeq); } if(firstSeq == 1) // New mulitple alignment, or else profile1 { int prfNum = userParameters->getProfileNum(); alignPtr->addSequences(&seqVector); //test names after saving if(alignPtr->checkAllNamesDifferent(offendingSeq) == false) { //nige moved return ALLNAMESNOTDIFFERENT; // Error there are same names. } userParameters->setProfileNum(prfNum); if(!alignPtr->addOutputIndex(&_outputIndex)) { return OTHERERROR; } } else // profile2 { //test names before appending if (! alignPtr->testUniqueNames(&seqVector, offendingSeq)) { //nige added return ALLNAMESNOTDIFFERENT; } alignPtr->appendSequences(&seqVector); if(!alignPtr->appendOutputIndex(&_outputIndex)) { return OTHERERROR; } } // Then need to pass the length to the parser to get the secondary structure info. int maxAlnLength; // Get values from the Alignment object. maxAlnLength = alignPtr->getMaxAlnLength(); // look for a feature table / gap penalty mask (only if this is a profile) if (userParameters->getProfileNum() > 0) { structPenalties = NONE; secStructMask.clear(); gapPenaltyMask.clear(); secStructName = ""; fileParser->getSecStructure(gapPenaltyMask, secStructMask, secStructName, structPenalties, maxAlnLength); // Andreas Wilm (UCD): bug 114 // after calling getSecStructure gapPenaltyMask needs to be // allocated (it's copied/linked to later) if (gapPenaltyMask.empty()) { gapPenaltyMask.resize(secStructMask.size()); } } if (fileIn->is_open()) { fileIn->close(); } return OK; // return the number of seqs. read in this call } /* * The function profileInput is used to read profiles into the Alignment. If the first * profile is already there it will read in the second profile. It returns the number * of seqs. If it returns -1, couldnt open file. */ int FileReader::profileInput(Alignment *alignPtr) { int code; //int i, totalNumOfSeqs = 0; string offendingSeq; if(userParameters->getProfileNum() == 2 && userParameters->getProfile1Empty()) { utilityObject->error("You must read in profile number 1 first\n"); return MUSTREADINPROFILE1FIRST; } if(userParameters->getProfileNum() == 1) // for the 1st profile { code = readSeqs(alignPtr, 1, &offendingSeq); if (code == OK) { // success; found some seqs. userParameters->setStructPenalties1(NONE); alignPtr->clearSecStruct1(); // Clear old info if (structPenalties != NONE) // feature table / mask in alignment { userParameters->setStructPenalties1(structPenalties); if (structPenalties == SECST) { alignPtr->addSecStructMask1(&secStructMask); } alignPtr->addGapPenaltyMask1(&gapPenaltyMask); alignPtr->addSecStructName1(secStructName); } alignPtr->setProfile1NumSeqs(alignPtr->getNumSeqs()); userParameters->setProfile1Empty(false); userParameters->setProfile2Empty(true); cout << "No. of seqs = " << alignPtr->getNumSeqs() << endl; } else { return code; // FIXME and offendingSeq? } } else { // first seq to be read = profile1_nseqs + 1 int profile1NumOfSeqs = alignPtr->getNumSeqs(); code = readSeqs(alignPtr, profile1NumOfSeqs + 1, &offendingSeq); if(code == OK) { userParameters->setStructPenalties2(NONE); alignPtr->clearSecStruct2(); // Clear old info if (structPenalties != NONE) // feature table / mask in alignment { userParameters->setStructPenalties2(structPenalties); if (structPenalties == SECST) { alignPtr->addSecStructMask2(&secStructMask); } alignPtr->addGapPenaltyMask2(&gapPenaltyMask); alignPtr->addSecStructName2(secStructName); } cout << "No. of seqs in profile=" << alignPtr->getNumSeqs() - profile1NumOfSeqs << endl; cout << "Total no. of seqs =" << alignPtr->getNumSeqs() << endl; userParameters->setProfile2Empty(false); userParameters->setEmpty(false); } else { return code; // FIXME and offendingSeq? } } // Clear out the masks used in this class. This is important!!!! secStructMask.clear(); gapPenaltyMask.clear(); secStructName = ""; string typeOfAlign = userParameters->getDNAFlag() ? "DNA" : "PROTEIN"; cout << "Sequences assumed to be " << typeOfAlign << endl; if (userParameters->getMenuFlag()) { cout<< "\n\n"; } alignPtr->printSequencesAddedInfo(); if(userParameters->getDNAFlag()) { userParameters->setDNAMultiGap(); } else { userParameters->setProtMultiGap(); } return OK; } /* * The function checkInfile is used to find out which format the file is in, and it * also calls the appropriate function to count the seqences. */ void FileReader::checkInfile(int *nseqs, auto_ptr& fileParser) { char _lineIn[MAXLINE + 1]; int i; int lengthLine = 0; *nseqs = 0; while (fileIn->getline(_lineIn, MAXLINE + 1)) { if (!utilityObject->blankLine(_lineIn)) { break; } } lengthLine = strlen(_lineIn) - 1; // Mark Change 20-6-07 for (i = lengthLine; i >= 0; i--) { if (isgraph(_lineIn[i])) { break; } } _lineIn[i + 1] = EOS; // Put first 7 characters into upper case! for (i = 0; i <= 6 && i <= lengthLine; i++) { _lineIn[i] = toupper(_lineIn[i]); } // Create the parser to read the file. if (utilityObject->lineType(_lineIn, "ID")) { // EMBL/Swiss-Prot format ? fileParser.reset(new EMBLFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is EMBL\n"; } else if (utilityObject->lineType(_lineIn, "CLUSTAL")) { fileParser.reset(new ClustalFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is CLUSTAL\n"; } else if (utilityObject->lineType(_lineIn, "PILEUP")) // MSF { fileParser.reset(new MSFFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is MSF\n"; } else if (utilityObject->lineType(_lineIn, "!!AA_MULTIPLE_ALIGNMENT")) // MSF { fileParser.reset(new MSFFileParser(sequenceFileName)); userParameters->setDNAFlag(false); if(userParameters->getDisplayInfo()) cout << "Sequence format is MSF\n"; } else if (utilityObject->lineType(_lineIn, "!!NA_MULTIPLE_ALIGNMENT")) // MSF { fileParser.reset(new MSFFileParser(sequenceFileName)); userParameters->setDNAFlag(true); if(userParameters->getDisplayInfo()) cout << "Sequence format is MSF\n"; } else if (strstr(_lineIn, "MSF") && _lineIn[strlen(_lineIn) - 1] == '.' && _lineIn[strlen(_lineIn) - 2] == '.') // MSF { fileParser.reset(new MSFFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is MSF\n"; } else if (utilityObject->lineType(_lineIn, "!!RICH_SEQUENCE")) // RSF { fileParser.reset(new RSFFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is RSF\n"; } else if (utilityObject->lineType(_lineIn, "#NEXUS")) { //utilityObject->error("Cannot read NEXUS format\n"); return; } else if (*_lineIn == '>') { if ((lengthLine>=3) && _lineIn[3] == ';') { //if(_lineIn[3] == ';') // distinguish PIR and Pearson { // PIR fileParser.reset(new PIRFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is PIR\n"; } } else { // PEARSON fileParser.reset(new PearsonFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is Pearson\n"; } } else if ((*_lineIn == '"') || (*_lineIn == '%') || (*_lineIn == '#')) { // GDE format fileParser.reset(new GDEFileParser(sequenceFileName)); if(userParameters->getDisplayInfo()) cout << "Sequence format is GDE\n"; if (*_lineIn == '%') { userParameters->setDNAFlag(false); } else if (*_lineIn == '#') { userParameters->setDNAFlag(true); } } else { return ; } try { // Get the number of sequences. This is passed back as a pointer! *nseqs = fileParser->countSeqs(); // no output in 1.83: cout << "number of seqs is: " << *nseqs << "\n"; } catch(exception ex) { *nseqs = 0; } } } clustalx-2.1/clustalW/fileInput/FileParser.h0000644000175000017500000000354711470725216020771 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** The aim of this class is to return one sequence at a time. * Note that the file must be open when it is passed to the FileParser. * The parser does not know the name of the file to open. Only the filereader knows. * * Changes: * * Mark 24-1-2007. I added the function findDelimiter to determine if '\r' or '\n' * will be used as the line delimiter when parsing the file. * * 10-02-07,Nigel Brown(EMBL): Removed delimiter and findDelimiter() * members, as functionality now handled by the stream class. */ #ifndef FILEPARSER_H #define FILEPARSER_H #include #include "../alignment/Sequence.h" #include "../general/userparams.h" #include #include "InFileStream.h" namespace clustalw { class FileParser { public: /* Functions */ FileParser(); virtual ~FileParser(); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq) = 0; virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL) = 0; virtual int countSeqs() = 0; // VIRTUAL virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) = 0; void fillCharTab(void); char getDelimiter(string filename); /* Attributes */ char chartab[128]; int getParseExitCode() { return parseExitCode; }; protected: void freeFileResources(InFileStream* filePtr); InFileStream* _fileIn; int parseExitCode; // reason for returning empty sequence // vector; same as used in FileReader private: /* Functions */ /* Attributes */ }; } #endif clustalx-2.1/clustalW/fileInput/FileParser.cpp0000644000175000017500000000367511470725216021326 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * 10-02-07,Nigel Brown(EMBL): Removed delimiter and findDelimiter() * members, as functionality now handled by the stream class. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "FileParser.h" const char LF = 0x0a; //linefeed const char CR = 0x0d; //carriage return namespace clustalw { FileParser::FileParser() { parseExitCode = OK; } FileParser::~FileParser() { // Dont do anything!!!! } void FileParser::fillCharTab(void) { register int i; register char c; for (i = 0; i < 128; chartab[i++] = 0) ; for (i = 0; i <= userParameters->getMaxAA() + 1; i++) { c = userParameters->getAminoAcidCode(i); chartab[(int)c] = chartab[tolower(c)] = c; } } void FileParser::freeFileResources(InFileStream* filePtr) { if(filePtr != 0) { filePtr->close(); delete filePtr; filePtr = 0; } } char FileParser::getDelimiter(string filename) { ifstream in; int type = 0; char delim; in.open(filename.c_str(), ios::in); in.seekg(0, ios::beg); //look for CR or LF or CRLF (or LFCR) if (in.is_open()) { char c; while (in.get(c)) { if (c == CR) type |= 1; else if (c == LF) type |= 2; else if (type) break; } } in.close(); switch (type) { case 1: //cout << "file is Mac System 9" << endl; delim = '\r'; break; case 2: //cout << "file is UNIX" << endl; delim = '\n'; break; case 3: //cout << "file is DOS" << endl; delim = '\n'; break; default: //short or empty file //cout << "file is UNIX (default)" << endl; delim = '\n'; } return delim; } } clustalx-2.1/clustalW/fileInput/EMBLFileParser.h0000644000175000017500000000202711470725216021421 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef EMBLFILEPARSER_H #define EMBLFILEPARSER_H #include #include "FileParser.h" namespace clustalw { class EMBLFileParser : public FileParser { public: /* Functions */ EMBLFileParser(string filePath); ~EMBLFileParser(); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ void getSwissFeature(char* line, vector& secStructMask, int length); void getSwissMask(char* line, vector& gapPenaltyMask, int length); /* Attributes */ string fileName; }; } #endif clustalx-2.1/clustalW/fileInput/EMBLFileParser.cpp0000644000175000017500000003305211470725216021756 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "EMBLFileParser.h" namespace clustalw { /* * constructor sets up chartab array. */ EMBLFileParser::EMBLFileParser(string filePath) { fileName = filePath; fillCharTab(); } /* * dont need to destruct anything. */ EMBLFileParser::~EMBLFileParser() { } /* * get range of sequences */ vector EMBLFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // start at the beginning // Read in lines until we get to the begining of sequence seqNum. while (_currentSeqNum != seqNum) { while(!utilityObject->lineType(_line, "ID")) { if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore! { _fileIn->close(); return Sequence(blank, blank, blank); } } ++_currentSeqNum; if(_currentSeqNum == seqNum) // Found the sequence { break; } // Get next line so that we are past the '>' line _fileIn->getline(_line, MAXLINE + 1); } for (i = 5; i <= (int)strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(_sname, _line + i, MAXNAMES); // remember entryname for (i = 0; i <= (int)strlen(_sname); i++) { if (_sname[i] == ' ') { _sname[i] = EOS; break; } } _sname[MAXNAMES] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); name = string(_sname); // Andreas Wilm (UCD): why cout here? cout << name << "\n"; while (!utilityObject->lineType(_line, "SQ")) { if(!_fileIn->getline(_line, MAXLINE + 1)) // If we cannot get anymore! { _fileIn->close(); // FIXME AW: why return with a name but otherwise empty seq? return Sequence(blank, name, blank); } } while (_fileIn->getline(_line, MAXLINE + 1)) { if (gotSeq && utilityObject->blankLine(_line)) { break; } // NOTE I changed this to -1 and -2 because the getline doesnt return the \n if (strlen(_line) > 2 && _line[strlen(_line) - 1] == '.' && _line[strlen(_line) - 2] == '.') { continue; } for (i = 0; i <= MAXLINE; i++) { c = _line[i]; if (c == '\n' || c == EOS || c == '/') { break; } // EOL c = chartab[c]; if (c) { gotSeq = true; characterSeq += c; } } if (c == '/') { break; } } _fileIn->close(); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { _fileIn->close(); cerr << "There was an exception in the EMBLFileParser::getSeq function.\n" << "Need to end program\n"; exit(1); } } /* * count the number of sequences in the file and return the number */ int EMBLFileParser::countSeqs() { char line[MAXLINE + 1]; // char c; line[0] = EOS; int numSeqs; // int i; //bool seqOk; numSeqs = 0; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige if(!_fileIn->is_open()) { return 0; // No sequences found! } while (_fileIn->getline(line, MAXLINE + 1)) { if (utilityObject->lineType(line, "ID")) { numSeqs++; } } _fileIn->close(); return numSeqs; } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function EMBLFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /* * get secondary structure information from the file. */ void EMBLFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { bool guigetss = false; if(userParameters->getProfileNum() == 1 && userParameters->getStructPenalties1()) guigetss = true; if(userParameters->getProfileNum() == 2 && userParameters->getStructPenalties2()) guigetss = true; char _title[MAXLINE + 1]; char _line[MAXLINE + 1]; char _lin2[MAXLINE + 1]; char _sname[MAXNAMES + 1]; char _feature[MAXLINE + 1]; int i; _line[0] = '\0'; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // clear out the masks gapPenaltyMask.clear(); secStructMask.clear(); // find the start of the sequence entry for (;;) { while (!utilityObject->lineType(_line, "ID")) { if (!_fileIn->getline(_line, MAXLINE + 1)) { _fileIn->close(); return; } } for (i = 5; i <= (int)strlen(_line); i++) { if (_line[i] != ' ') { break; } } strncpy(_sname, _line + i, MAXNAMES); // remember entryname for (i = 0; i <= (int)strlen(_sname); i++) { if (_sname[i] == ' ') { _sname[i] = EOS; break; } } _sname[MAXNAMES] = EOS; utilityObject->rTrim(_sname); utilityObject->blankToUnderscore(_sname); // look for secondary structure feature table / gap penalty mask while (_fileIn->getline(_line, MAXLINE + 1)) { if (utilityObject->lineType(_line, "FT")) { sscanf(_line + 2, "%s", _feature); if (strcmp(_feature, "HELIX") == 0 || strcmp(_feature, "STRAND") == 0) { if (userParameters->getInteractive() && !userParameters->getGui()) { strcpy(_title, "Found secondary structure in alignment file: "); strcat(_title, _sname); (*_lin2) = utilityObject->promptForYesNo(_title, "Use it to set local gap penalties "); } else { (*_lin2) = 'y'; } if (guigetss || ((*_lin2 != 'n') && (*_lin2 != 'N'))) { structPenalties = SECST; for (i = 0; i < length; i++) { secStructMask.push_back('.'); } do { getSwissFeature(&_line[2], secStructMask, length); _fileIn->getline(_line, MAXLINE + 1); } while (utilityObject->lineType(_line, "FT")); } else { do { _fileIn->getline(_line, MAXLINE + 1); } while (utilityObject->lineType(_line, "FT")); } secStructName = string(_sname); } } else if (utilityObject->lineType(_line, "GM")) { if (userParameters->getInteractive()) { strcpy(_title, "Found gap penalty mask in alignment file: "); strcat(_title, _sname); (*_lin2) = utilityObject->promptForYesNo(_title, "Use it to set local gap penalties "); } else { (*_lin2) = 'y'; } if (guigetss || ((*_lin2 != 'n') && (*_lin2 != 'N'))) { structPenalties = GMASK; for (i = 0; i < length; i++) { gapPenaltyMask.push_back('1'); } do { getSwissMask(&_line[2], gapPenaltyMask, length); _fileIn->getline(_line, MAXLINE + 1); } while (utilityObject->lineType(_line, "GM")); } else { do { _fileIn->getline(_line, MAXLINE + 1); } while (utilityObject->lineType(_line, "GM")); } secStructName = string(_sname); } if (utilityObject->lineType(_line, "SQ")) { break; } if (structPenalties != NONE) { break; } } } _fileIn->close(); } catch(...) { _fileIn->close(); cerr << "An exception has occured in the function EMBLFileParser::getSecStructure()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /* * get the sec structure mask */ void EMBLFileParser::getSwissFeature(char* line, vector& secStructMask, int length) { char c, s, feature[MAXLINE + 1]; int i, startPos, endPos; try { if (sscanf(line, "%s%d%d", feature, &startPos, &endPos) != 3) { return ; } if (strcmp(feature, "HELIX") == 0) { c = 'A'; s = '$'; } else if (strcmp(feature, "STRAND") == 0) { c = 'B'; s = '%'; } else { return ; } if (startPos >= length || endPos >= length) { return ; } secStructMask[startPos - 1] = s; for (i = startPos; i < endPos - 1; i++) { secStructMask[i] = c; } secStructMask[endPos - 1] = s; } catch(...) { cerr << "An exception has occured in the function EMBLFileParser::getSwissFeature()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /* * get the gap penalty mask. */ void EMBLFileParser::getSwissMask(char* line, vector& gapPenaltyMask, int length) { int i, value, startPos, endPos; try { if (sscanf(line, "%d%d%d", &value, &startPos, &endPos) != 3) { return; } if (value < 1 || value > 9) { return ; } if (startPos >= length || endPos >= length) { return ; } for (i = startPos - 1; i < endPos; i++) { gapPenaltyMask[i] = value + '0'; } } catch(...) { cerr << "An exception has occured in the function EMBLFileParser::getSwissMask()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } } clustalx-2.1/clustalW/fileInput/ClustalFileParser.h0000644000175000017500000000172111470725216022311 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifndef CLUSTALFILEPARSER_H #define CLUSTALFILEPARSER_H #include #include "FileParser.h" namespace clustalw { class ClustalFileParser : public FileParser { public: /* Functions */ ClustalFileParser(string filePath); ~ClustalFileParser(); virtual Sequence getSeq(int seqNum, string *offendingSeq=NULL); virtual vector getSeqRange(int firstSeq, int num, string *offendingSeq=NULL); virtual int countSeqs(); virtual void getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length); /* Attributes */ private: /* Functions */ bool clustalBlankline(char* line); // Only used in this class! string fileName; /* Attributes */ }; } #endif clustalx-2.1/clustalW/fileInput/ClustalFileParser.cpp0000644000175000017500000004332411470725216022651 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines. * * 27-4-2007, Mark Larkin (UCD): Made 2 small changes to getSecStructure function. There * was a problem with the secondary structure info in windows. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "ClustalFileParser.h" namespace clustalw { ClustalFileParser::ClustalFileParser(string filePath) { fileName = filePath; fillCharTab(); _fileIn = 0; } ClustalFileParser::~ClustalFileParser() { } vector ClustalFileParser::getSeqRange(int firstSeq, int no, string *offendingSeq) { vector seqRangeVector; int i; for (i=0; iopen(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // start at the beginning _fileIn->getline(line, MAXLINE + 1); // read the title line...ignore it while (_fileIn->getline(line, MAXLINE + 1)) //nige { if (!clustalBlankline(line)) { for (i = 1; i < seqNum; i++) { _fileIn->getline(line, MAXLINE + 1); //nige } for (j = 0; j <= (int)strlen(line); j++) if (line[j] != ' ') { break; } string _tempStr = string(line); sscanf(_tempStr.c_str(), "%s%s", sname, tseq); for (j = 0; j < MAXNAMES; j++) { if (sname[j] == ' ') { break; } } sname[j] = EOS; utilityObject->rTrim(sname); utilityObject->blankToUnderscore(sname); // replace blanks with '_' name = string(sname); for (i = 0; i <= MAXLINE; i++) { c = tseq[i]; if (isspace(c) || c == EOS) { break; } // EOL c = chartab[c]; if (c) { characterSeq += c; // Add the character to the sequence } } for (i = 0;; i++) { if (!_fileIn->getline(line, MAXLINE + 1)) // If we cant get another line! { freeFileResources(_fileIn); if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } if (clustalBlankline(line)) { break; } } } } freeFileResources(_fileIn); // getSecStructure(vector& gapPenaltyMask, // vector& secStructMask, string& secStructName, int &structPenalties, int length) if ((int)characterSeq.length() > userParameters->getMaxAllowedSeqLength()) { parseExitCode=SEQUENCETOOBIG; if (offendingSeq!=NULL) offendingSeq->assign(name); // return empty seq return Sequence(blank, blank, blank); } return Sequence(characterSeq, name, title); } catch(...) { freeFileResources(_fileIn); cerr << "An exception has occured in the function ClustalFileParser::getSeq()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /* * The function countSeqs tells us how many sequences are in a clustal format file. * Need to check if the file is open! */ int ClustalFileParser::countSeqs() { char line[MAXLINE + 1]; int _nseqs; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige if(!_fileIn->is_open()) { freeFileResources(_fileIn); return 0; // No sequences found! } while (_fileIn->getline(line, MAXLINE + 1)) { if (!utilityObject->blankLine(line)) { break; } } // This gets us to the begining of the sequence lines! while (_fileIn->getline(line, MAXLINE + 1)) { if (!clustalBlankline(line)) { break; } } _nseqs = 1; while (_fileIn->getline(line, MAXLINE + 1)) { if (clustalBlankline(line)) { freeFileResources(_fileIn); return _nseqs; } _nseqs++; } freeFileResources(_fileIn); return (int)0; // if you got to here-funny format/no seqs. } catch(...) { freeFileResources(_fileIn); cerr << "An exception has occured in the function ClustalFileParser::countSeqs()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } /* * This function is to get the secondary structure for a Clustal format file. * I am aware that I am using some C and some C++ here, and this may seem like * bad style, but I think it is better to use the C functions for processing the * strings as they are already working. */ void ClustalFileParser::getSecStructure(vector& gapPenaltyMask, vector& secStructMask, string& secStructName, int &structPenalties, int length) { bool guigetss = false; if(userParameters->getProfileNum() == 1 && userParameters->getStructPenalties1()) guigetss = true; if(userParameters->getProfileNum() == 2 && userParameters->getStructPenalties2()) guigetss = true; char title[MAXLINE + 1]; title[0] = '\0'; char line[MAXLINE + 1]; line[0] = '\0'; char lin2[MAXLINE + 1]; lin2[0] = '\0'; char tseq[MAXLINE + 1]; tseq[0] = '\0'; char sname[MAXNAMES + 1]; sname[0] = '\0'; for(int i = 1; i < MAXNAMES + 1; i++) { title[i] = line[i] = lin2[i] = tseq[i] = sname[i] ='0'; } int i, j, len, ix, struct_index = 0; char c; try { _fileIn = new InFileStream; //nige _fileIn->open(fileName.c_str()); //nige _fileIn->seekg(0, std::ios::beg); // NOTE clear out the masks gapPenaltyMask.clear(); secStructMask.clear(); len = 0; // initialise length to zero if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } // read the title line...ignore it if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } // read the next line... // skip any blank lines for (;;) { if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } if (!utilityObject->blankLine(line)) { break; } } // look for structure table lines ix = -1; for (;;) { if (line[0] != '!') { break; } if (strncmp(line, "!SS", 3) == 0) { ix++; sscanf(line + 4, "%s%s", sname, tseq); for (j = 0; j < MAXNAMES; j++) { if (sname[j] == ' ') { break; } } sname[j] = EOS; utilityObject->rTrim(sname); utilityObject->blankToUnderscore(sname); if (userParameters->getInteractive() && !userParameters->getGui()) { strcpy(title, "Found secondary structure in alignment file: "); strcat(title, sname); (*lin2) = utilityObject->promptForYesNo(title, "Use it to set local gap penalties "); } else { (*lin2) = 'y'; } if (guigetss || ((*lin2 != 'n') && (*lin2 != 'N'))) { structPenalties = SECST; struct_index = ix; for (i = 0; i < length; i++) { secStructMask.push_back('.'); gapPenaltyMask.push_back('.'); } secStructName = string(sname); for (i = 0; len < length; i++) { c = tseq[i]; if (c == '\n' || c == EOS) { break; } // EOL if (!isspace(c)) { if(len < (int)secStructMask.size()) { secStructMask[len++] = c; // NOTE array notation = BAD } } } } } else if (strncmp(line, "!GM", 3) == 0) { ix++; sscanf(line + 4, "%s%s", sname, tseq); for (j = 0; j < MAXNAMES; j++) { if (sname[j] == ' ') { break; } } sname[j] = EOS; utilityObject->rTrim(sname); utilityObject->blankToUnderscore(sname); if (userParameters->getInteractive() && !userParameters->getGui()) { strcpy(title, "Found gap penalty mask in alignment file: "); strcat(title, sname); (*lin2) = utilityObject->promptForYesNo(title, "Use it to set local gap penalties "); } else { (*lin2) = 'y'; } if (guigetss || ((*lin2 != 'n') && (*lin2 != 'N'))) { structPenalties = GMASK; struct_index = ix; for (i = 0; i < length; i++) { gapPenaltyMask.push_back('1'); } secStructName = string(sname); for (i = 0; len < length; i++) { c = tseq[i]; if (c == '\n' || c == EOS) { break; } // EOL if (!isspace(c)) { if(len < (int)gapPenaltyMask.size()) { gapPenaltyMask[len++] = c; } } } } } if (structPenalties != NONE) { break; } if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } if (structPenalties == NONE) { freeFileResources(_fileIn); return ; } // skip any more comment lines while (line[0] == '!') { if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } // skip the sequence lines and any comments after the alignment for (;;) { if (isspace(line[0]) || line[0] == '\0') // Mark change 27-4-2007 { break; } if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } // read the rest of the alignment for (;;) { // skip any blank lines for (;;) { if (!utilityObject->blankLine(line)) { break; } if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } // get structure table line for (ix = 0; ix < struct_index; ix++) { if (line[0] != '!') { if (structPenalties == SECST) { utilityObject->error("bad secondary structure format\n"); } else { utilityObject->error("bad gap penalty mask format\n"); } structPenalties = NONE; freeFileResources(_fileIn); return ; } if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } if (structPenalties == SECST) { if (strncmp(line, "!SS", 3) != 0) { utilityObject->error("bad secondary structure format\n"); structPenalties = NONE; freeFileResources(_fileIn); return ; } sscanf(line + 4, "%s%s", sname, tseq); for (i = 0; len < length; i++) { c = tseq[i]; if (c == '\n' || c == EOS) { break; } // EOL if (!isspace(c)) { secStructMask[len++] = c; } } } else if (structPenalties == GMASK) { if (strncmp(line, "!GM", 3) != 0) { utilityObject->error("bad gap penalty mask format\n"); structPenalties = NONE; freeFileResources(_fileIn); return ; } sscanf(line + 4, "%s%s", sname, tseq); for (i = 0; len < length; i++) { c = tseq[i]; if (c == '\n' || c == EOS) { break; } // EOL if (!isspace(c)) { gapPenaltyMask[len++] = c; } } } // skip any more comment lines while (line[0] == '!') { if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } // skip the sequence lines for (;;) { if (isspace(line[0]) || line[0] == '\0') // Mark change 27-4-2007 { break; } if (!_fileIn->getline(line, MAXLINE + 1)) { freeFileResources(_fileIn); return ; } } } freeFileResources(_fileIn); } catch(...) { freeFileResources(_fileIn); cerr << "An exception has occured in the function ClustalFileParser::getSecStructure()\n" << "Program needs to terminate.\nPlease contact the Clustal developers\n"; exit(1); } } bool ClustalFileParser::clustalBlankline(char* line) { int i; if (line[0] == '!') { return true; } for (i = 0; line[i] != '\n' && line[i] != EOS; i++) { if (isdigit(line[i]) || isspace(line[i]) || (line[i] == '*') || (line[i] == ':') || (line[i] == '.')) ; else { return false; } } return true; } } clustalx-2.1/clustalW/fileInput/0000755000175000017500000000000011470725216016553 5ustar ddineenddineenclustalx-2.1/clustalW/clustalw_version.h0000644000175000017500000000020111470725216020361 0ustar ddineenddineen#ifndef CLUSTALW_VERSION_H #define CLUSTALW_VERSION_H #define CLUSTALW_NAME "ClustalW" #define CLUSTALW_VERSION "2.1" #endif clustalx-2.1/clustalW/alignment/Sequence.h0000644000175000017500000000314111470725216020512 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This class contains the sequence information. It should also contain * the sequence name, and the encoded version. * A vector of Sequences is passed to the Alignment object to set it up. * CHANGES: * Mark 22-1-2007: Added a unique sequence identifier to help with correct output * order of sequences. */ #ifndef SEQUENCE_H #define SEQUENCE_H #include #include #include "../general/userparams.h" #include "../general/utils.h" namespace clustalw { class Sequence { public: /* Functions */ Sequence(std::string& seq, std::string& name, std::string& title); Sequence(std::string& seq, std::string& name, std::string& title, unsigned long id); Sequence(std::vector* encodedSequence, std::string& name, std::string& title, unsigned long id); void encodeSequence(); void printSequence(); std::vector* getSequence(); bool isEmpty(); std::string getName(); std::string getTitle(); bool checkDNAFlag(); unsigned long getIdentifier(){return identifier;} /* Attributes */ private: /* Functions */ void checkIntegrity(); void copyStringIntoVector(std::vector* _vectorTo, std::string* _stringFrom); /* Attributes */ std::vector _sequence; std::vector _encodedSequence; std::string _name; std::string _title; unsigned long identifier; }; } #endif clustalx-2.1/clustalW/alignment/Sequence.cpp0000644000175000017500000001104411470725216021046 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "Sequence.h" using namespace std; namespace clustalw { /** * * @param seq * @param name * @param title * @return */ Sequence::Sequence(string& seq, string& name, string& title) { copyStringIntoVector(&_sequence, &seq); encodeSequence(); _name = name; _title = title; identifier = utilityObject->getUniqueSequenceIdentifier(); } Sequence::Sequence(std::string& seq, std::string& name, std::string& title, unsigned long id) { copyStringIntoVector(&_sequence, &seq); encodeSequence(); _name = name; _title = title; identifier = id; } /** * This is an overloaded contructor that is used to construct a seq object from an * encoded sequenced instead of a string. * @param encodedSequence * @param name * @param title * @param id The unique identifier from the previous sequence!!! * @return */ Sequence::Sequence(std::vector* encodedSequence, std::string& name, std::string& title, unsigned long id) { _encodedSequence = *encodedSequence; _name = name; _title = title; identifier = id; } /** * */ void Sequence::encodeSequence() { /* code seq as ints .. use gapPos2 for gap */ std::vector::iterator it; _encodedSequence.push_back(0); for(it = _sequence.begin(); it != _sequence.end(); ++it) { if (*it == '-') { _encodedSequence.push_back(userParameters->getGapPos2()); } else { _encodedSequence.push_back(userParameters->resIndex( userParameters->getAminoAcidCodes(), *it)); } } } /** * * @param _vectorTo * @param _stringFrom */ void Sequence::copyStringIntoVector(vector* _vectorTo, string* _stringFrom) { _vectorTo->clear(); for(int i = 0; i < (int)_stringFrom->size(); i++) { _vectorTo->push_back(_stringFrom->at(i)); } if(_vectorTo->size() != _stringFrom->size()) { std::cerr << "Error: In function copyStringIntoVector. Strings different length!\n"; exit(1); } } /** * */ void Sequence::printSequence() { std::cout << "This is the sequence and the encoded sequence " << _name << std::endl; std::vector::iterator itChar; for(itChar = _sequence.begin(); itChar != _sequence.end(); ++itChar) { cout << *itChar; } cout << std::endl; std::vector::iterator itInt; for(itInt = _encodedSequence.begin(); itInt != _encodedSequence.end(); ++itInt) { cout << " " << *itInt; } cout << std::endl; } /** * */ void Sequence::checkIntegrity() { // The sequences should be the same length. if(_sequence.size() != _encodedSequence.size()) { std::cerr << "Error: _sequence is not same size as _encodedSequence\n"; exit(1); } } /** * * @return the encoded sequence, this is what is used in the pairwise! */ std::vector* Sequence::getSequence() { return &_encodedSequence; } /** * * @return */ std::string Sequence::getName() { return _name; } /** * * @return */ std::string Sequence::getTitle() { return _title; } /** * * @return */ bool Sequence::isEmpty() { if(_sequence.size() == 0) { return true; } else { return false; } } /** * * @return */ bool Sequence::checkDNAFlag() // check if DNA or Protein // The decision is based on counting all A,C,G,T,U or N. // If >= 85% of all characters (except -) are as above => DNA { int c, numResidues, numBases; float ratio; string dna_codes = "ACGTUN"; numResidues = numBases = 0; vector::iterator seqIterator = _sequence.begin(); while (seqIterator != _sequence.end()) { if (*seqIterator != '-') { numResidues++; if (*seqIterator == 'N') { numBases++; } else { c = userParameters->resIndex(dna_codes, *seqIterator); if (c >= 0) { numBases++; } } } seqIterator++; } if ((numBases == 0) || (numResidues == 0)) { return false; } ratio = (float)numBases / (float)numResidues; if (ratio >= 0.85) { return true; } else { return false; } } } clustalx-2.1/clustalW/alignment/ObjectiveScore.h0000644000175000017500000000233311470725216021652 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This ObjectiveScore class is used to provide an objective function to score * an alignment. * It is used with iteration to improve an alignment. */ #ifndef OBJECTIVESCORE_H #define OBJECTIVESCORE_H #include "../general/clustalw.h" #include "../substitutionMatrix/globalmatrix.h" namespace clustalw { class Alignment; typedef struct { int first; int second; } Pair; class ObjectiveScore { public: ObjectiveScore(); long getScore(const Alignment* alnToScore); private: float scoreLetters(int seq1, int seq2); float scoreGaps(int seq1, int seq2); void calcNormalisedSeqWeights(const vector* seqWeight, vector* normSeqWeight); long score; int matrix[NUMRES][NUMRES]; const Alignment* alignToScore; long scale; int weightScale; int sagaGapEx, sagaGapOp; int gapPos1, gapPos2; static const int BOTHGAPS = 0; static const int NOGAPS = -1; static const int GAPINSEQB = 1; static const int GAPINSEQA = 2; }; } #endif clustalx-2.1/clustalW/alignment/ObjectiveScore.cpp0000644000175000017500000001665011470725216022214 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "ObjectiveScore.h" #include "Alignment.h" #include "../general/userparams.h" #include "../general/debuglogObject.h" namespace clustalw { ObjectiveScore::ObjectiveScore() : score(0), alignToScore(0), scale(100000), sagaGapEx(12), sagaGapOp(8), gapPos1(userParameters->getGapPos1()), gapPos2(userParameters->getGapPos2()) { } long ObjectiveScore::getScore(const Alignment* alnToScore) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("In getScore function"); } #endif alignToScore = alnToScore; // If it doesnt point to any object return 0; if(!alignToScore) { return 0; } int maxRes = subMatrix->getAlnScoreMatrix(matrix); if (maxRes == 0) { utilityObject->error("Matrix for alignment scoring not found\n"); return 0; } const vector* seqWeight = alignToScore->getSeqWeights(); vector normalisedSeqWeights; calcNormalisedSeqWeights(seqWeight, &normalisedSeqWeights); int seq1, seq2; float w1, w2 = 1.0; float weight = 1.0; score = 0; float pwScoreLetters = 0, pwScoreGaps = 0, pwScore = 0; float scoreTotal = 0.0; int numSeqs = alignToScore->getNumSeqs(); int sizeNormalSeqWeight = normalisedSeqWeights.size(); for (seq1 = 1; seq1 <= numSeqs && seq1 <= sizeNormalSeqWeight; seq1++) { w1 = normalisedSeqWeights[seq1 - 1]; for (seq2 = seq1 + 1; seq2 <= numSeqs && seq2 <= sizeNormalSeqWeight; seq2++) { w2 = normalisedSeqWeights[seq2 - 1]; weight = w1 * w2; pwScoreLetters = scoreLetters(seq1, seq2); pwScoreGaps = scoreGaps(seq1, seq2); pwScore = pwScoreLetters + pwScoreGaps; scoreTotal += weight * pwScore; #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " weight = " << weight << " scoreLetters = " << pwScoreLetters << " scoreGaps = " << pwScoreGaps << " scorePair = " << pwScore << "\n" << "scoreTotal = " << scoreTotal << "\n"; logObject->logMsg(outs.str()); } #endif } } score = static_cast(scoreTotal); #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " score = " << score; logObject->logMsg(outs.str()); } #endif utilityObject->info("Alignment Score %d\n", score); return score; } float ObjectiveScore::scoreLetters(int seq1, int seq2) { if(!alignToScore) { return 0; } const unsigned numColsSeq1 = alignToScore->getSeqLength(seq1); const unsigned numColsSeq2 = alignToScore->getSeqLength(seq2); if(numColsSeq1 != numColsSeq2) { return 0; // The sequences should be the same length after alignment. } float scoreLetters = 0; unsigned colStart = 1; bool gap1, gap2; for(unsigned col = 1; col < numColsSeq1; col++) { gap1 = alignToScore->isGap(seq1, col); gap2 = alignToScore->isGap(seq2, col); if(!gap1 || !gap2) { colStart = col; break; } } unsigned colEnd = numColsSeq1; for(unsigned col = numColsSeq1; col >= 1; --col) { gap1 = alignToScore->isGap(seq1, col); gap2 = alignToScore->isGap(seq2, col); if(!gap1 || !gap2) { colEnd = col; break; } } const SeqArray* seqArray = alignToScore->getSeqArray(); int scoreMatch = 0; for(unsigned col = colStart; col <= colEnd; col++) { int res1 = (*seqArray)[seq1][col]; int res2 = (*seqArray)[seq2][col]; scoreMatch = matrix[res1][res2]; scoreLetters += scoreMatch; } return scoreLetters; } float ObjectiveScore::scoreGaps(int seq1, int seq2) { if(!alignToScore) { return 0; } const unsigned numColsSeq1 = alignToScore->getSeqLength(seq1); const unsigned numColsSeq2 = alignToScore->getSeqLength(seq2); if(numColsSeq1 != numColsSeq2) { return 0; // The sequences should be the same length after alignment. } unsigned colStart = 1; bool gap1, gap2; for(unsigned col = 1; col < numColsSeq1; col++) { gap1 = alignToScore->isGap(seq1, col); gap2 = alignToScore->isGap(seq2, col); if(!gap1 || !gap2) { colStart = col; break; } } unsigned colEnd = numColsSeq1; for(unsigned col = numColsSeq1; col >= 1; --col) { gap1 = alignToScore->isGap(seq1, col); gap2 = alignToScore->isGap(seq2, col); if(!gap1 || !gap2) { colEnd = col; break; } } bool inGap1 = false; bool inGap2 = false; float gapOpen = userParameters->getGapOpen(); float gapExtend = userParameters->getGapExtend(); float scoreGaps = 0; for(unsigned col = colStart; col <= colEnd; col++) { gap1 = alignToScore->isGap(seq1, col); gap2 = alignToScore->isGap(seq2, col); if(gap1 && gap2) { continue; } if(gap1) { if(!inGap1) { // NOTE I left out the option of having different end gaps stuff. scoreGaps += gapOpen; inGap1 = true; // Opening a gap in seq1 } else { // Already in a gap scoreGaps += gapExtend; } continue; } else if(gap2) { if(!inGap2) { // NOTE I left out the option of having different end gaps stuff. scoreGaps += gapOpen; inGap2 = true; // Opening a gap in seq2 } else { // Already in a gap scoreGaps += gapExtend; } continue; } inGap1 = inGap2 = false; } return scoreGaps; } void ObjectiveScore::calcNormalisedSeqWeights(const vector* seqWeight, vector* normSeqWeight) { if(!seqWeight || !normSeqWeight) { return; } int sumWeights = 0; for(int i = 0; i < (int)seqWeight->size() - 1; i++) { sumWeights += (*seqWeight)[i]; } normSeqWeight->resize(seqWeight->size()); for(int i = 0; i < (int)seqWeight->size() - 1; i++) { (*normSeqWeight)[i] = static_cast((*seqWeight)[i]) / static_cast(sumWeights); #if DEBUGFULL if(logObject && DEBUGLOG) { ostringstream outs; outs << " normSeqWeight[i] = " << (*normSeqWeight)[i]; logObject->logMsg(outs.str()); } #endif } } } clustalx-2.1/clustalW/alignment/AlignmentOutput.h0000644000175000017500000000577411470725216022117 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /* * The class AlignmentOutput is used to output the Alignment in all the different * formats that have been selected. It will output all the different file types if * these have been selected from the menu or the commandline. * To use this class we must call openAlignmentOutput first. Then we call the function * createAlignmentOutput with an Alignment to be output and the first and last sequence * to be output as well. */ #ifndef ALIGNMENTOUTPUT_H #define ALIGNMENTOUTPUT_H #include #include #include #include #include #include #include #include "Alignment.h" namespace clustalw { typedef struct rangeNum { int start; int end; } rangeNum; typedef struct outputRegion { int _firstSeq; int _lastSeq; int _firstRes; int _lastRes; } outputRegion; class AlignmentOutput { public: /* Functions */ AlignmentOutput(); bool openAlignmentOutput(string path); bool QTOpenFilesForOutput(AlignmentFileNames fileNames); void createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq); void printSecStructMask(int prfLength, vector* mask, vector* structMask); /* Attributes */ private: /* Functions */ void fastaOut(Alignment* alignPtr, outputRegion partToOutput); void clustalOut(Alignment* alignPtr, outputRegion partToOutput); void gcgOut(Alignment* alignPtr, outputRegion partToOutput); void nexusOut(Alignment* alignPtr, outputRegion partToOutput); void phylipOut(Alignment* alignPtr, outputRegion partToOutput); void nbrfOut(Alignment* alignPtr, outputRegion partToOutput); void gdeOut(Alignment* alignPtr, outputRegion partToOutput); string nameonly(string s); void findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes, int lastRes, int firstSeq); bool openExplicitFile(auto_ptr& outFile, string fileName); string openOutputFile(auto_ptr& outFile, string prompt, string path, string fileExtension); int SeqGCGCheckSum(vector* sequence, int length); void showAlign(); /* Attributes */ auto_ptr clustalOutFile; auto_ptr gcgOutFile; auto_ptr nbrfOutFile; auto_ptr phylipOutFile; auto_ptr gdeOutFile; auto_ptr nexusOutFile; auto_ptr fastaOutFile; string clustalOutName; string gcgOutName; string phylipOutName; string nbrfOutName; string gdeOutName; string nexusOutName; string fastaOutName; vector strongGroup; vector weakGroup; int clusSecStructOffset; int clusSequenceOffset; }; } #endif clustalx-2.1/clustalW/alignment/AlignmentOutput.cpp0000644000175000017500000024572711470725216022456 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Output routines ported from clustalx C code in 'interface.c'. * * Changes: * * 18-04-07,Nigel Brown(EMBL): clustalx code used (firsRes, length) convention * to define output columns, while the port uses (firsRes, lastRes). Fixed * problems in all 7 output routines where the conventions were mixed giving * over-long output blocks. * * 18-7-07: Mark (UCD), made changes to fastaOut, clustalOut, nbrfOut and gdeOut * * 9-2-08: Paul (UCD), changes to clustalOut to make output the same as 1.83 * added const NAMESWIDTH which is calculated based on the * length of the longest sequence name and the upper and lower * limits specified in MAXNAMESTODISPLAY and MINNAMESTODISPLAY */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "AlignmentOutput.h" namespace clustalw { AlignmentOutput::AlignmentOutput() : clusSecStructOffset(9), clusSequenceOffset(15+1) // MARK made a change here for test case findRangeValues 10seqs { // NOTE in the old clustal these arrays ended with NULL as a value. try { strongGroup.resize(9); strongGroup[0] = string("STA"); strongGroup[1] = string("NEQK"); strongGroup[2] = string("NHQK"); strongGroup[3] = string("NDEQ"); strongGroup[4] = string("QHRK"); strongGroup[5] = string("MILV"); strongGroup[6] = string("MILF"); strongGroup[7] = string("HY"); strongGroup[8] = string("FYW"); weakGroup.resize(11); weakGroup[0] = string("CSA"); weakGroup[1] = string("ATV"); weakGroup[2] = string("SAG"); weakGroup[3] = string("STNK"); weakGroup[4] = string("STPA"); weakGroup[5] = string("SGND"); weakGroup[6] = string("SNDEQK"); weakGroup[7] = string("NDEQHK"); weakGroup[8] = string("NEQHRK"); weakGroup[9] = string("FVLIM"); weakGroup[10] = string("HFY"); } catch(const exception& e) { cerr << "An exception has occured in the contructor of AlignmentOutput.\n" << e.what() << "\n"; exit(1); } } /* * The function createAlignmentOutput is used to call all the individual functions * for the different file types. It is possible to output the alignment in all file * types at the same time. */ void AlignmentOutput::createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq) { int length; int firstRes; // starting sequence range - Ramu int lastRes; // ending sequence range bool rangeOK; if((firstSeq <= 0) || (lastSeq < firstSeq)) { utilityObject->error("Cannot produce alignment output." " Incorrect call to createAlignmentOutput." " firstSeq = %d" " lastSeq = %d\n", firstSeq, lastSeq); return; } length = 0; firstRes = 1; lastRes = 0; rangeOK = false; try { length = alignPtr->getLengthLongestSequence(); lastRes = length; if (userParameters->getRangeFromToSet()) { firstRes = userParameters->getRangeFrom(); lastRes = userParameters->getRangeTo(); // Check if the numbers are ok. if ((firstRes > lastRes) || (firstRes == -1) || (lastRes == -1)) { cerr << "seqrange numbers are not set properly, using default....\n"; firstRes = 1; lastRes = length; } else { rangeOK = true; } } if (rangeOK && (lastRes > length)) { lastRes = length; cout << "Seqrange " << lastRes << " is more than the " << length << " setting it to " << length << " \n"; } if (userParameters->getMenuFlag()) { cout << "Consensus length = " << lastRes << " \n"; } outputRegion partToOutput; partToOutput._firstSeq = firstSeq; partToOutput._lastSeq = lastSeq; partToOutput._firstRes = firstRes; partToOutput._lastRes = lastRes; if(userParameters->getOutputClustal()) { if(clustalOutFile.get() != 0 && clustalOutFile->is_open()) { clustalOut(alignPtr, partToOutput); if(clustalOutFile->is_open()) { clustalOutFile->close(); } utilityObject->info("CLUSTAL-Alignment file created [%s]\n", clustalOutName.c_str()); } } if(userParameters->getOutputNbrf()) { if(nbrfOutFile.get() != 0 && nbrfOutFile->is_open()) { nbrfOut(alignPtr, partToOutput); if(nbrfOutFile->is_open()) { nbrfOutFile->close(); } utilityObject->info("NBRF/PIR-Alignment file created [%s]\n", nbrfOutName.c_str()); } } if(userParameters->getOutputGCG()) { if(gcgOutFile.get() != 0 && gcgOutFile->is_open()) { gcgOut(alignPtr, partToOutput); if(gcgOutFile->is_open()) { gcgOutFile->close(); } utilityObject->info("GCG-Alignment file created [%s]\n", gcgOutName.c_str()); } } if(userParameters->getOutputPhylip()) { if(phylipOutFile.get() != 0 && phylipOutFile->is_open()) { phylipOut(alignPtr, partToOutput); if(phylipOutFile->is_open()) { phylipOutFile->close(); } utilityObject->info("PHYLIP-Alignment file created [%s]\n", phylipOutName.c_str()); } } if(userParameters->getOutputGde()) { if(gdeOutFile.get() != 0 && gdeOutFile->is_open()) { gdeOut(alignPtr, partToOutput); if(gdeOutFile->is_open()) { gdeOutFile->close(); } utilityObject->info("GDE-Alignment file created [%s]\n", gdeOutName.c_str()); } } if(userParameters->getOutputNexus()) { if(nexusOutFile.get() != 0 && nexusOutFile->is_open()) { nexusOut(alignPtr, partToOutput); if(nexusOutFile->is_open()) { nexusOutFile->close(); } utilityObject->info("NEXUS-Alignment file created [%s]\n", nexusOutName.c_str()); } } if(userParameters->getOutputFasta()) { if(fastaOutFile.get() != 0 && fastaOutFile->is_open()) { fastaOut(alignPtr, partToOutput); if(fastaOutFile->is_open()) { fastaOutFile->close(); } utilityObject->info("Fasta-Alignment file created [%s]\n", fastaOutName.c_str()); } } // Output the alignment to the screen if it is required. if (userParameters->getShowAlign() && userParameters->getMenuFlag()) { showAlign(); } } catch(const exception& e) { cerr << "An exception has occured in the createAlignmentOutput function.\n" << e.what() << "\n"; exit(1); } } bool AlignmentOutput::QTOpenFilesForOutput(AlignmentFileNames fileNames) { if(!userParameters->getOutputClustal() && !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() && !userParameters->getOutputPhylip() && !userParameters->getOutputGde() && !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) { utilityObject->error("You must select an alignment output format\n"); return false; } if(fileNames.clustalFile == "" && fileNames.fastaFile == "" && fileNames.gcgFile == "" && fileNames.gdeFile == "" && fileNames.nexusFile == "" && fileNames.nrbfFile == "" && fileNames.phylipFile == "") { utilityObject->error("No names for output files. Cannot output alignment.\n"); return false; } if(fileNames.clustalFile != "") { clustalOutName = fileNames.clustalFile; if(!openExplicitFile(clustalOutFile, clustalOutName)) { return false; } } if(fileNames.fastaFile != "") { fastaOutName = fileNames.fastaFile; if(!openExplicitFile(fastaOutFile, fastaOutName)) { return false; } } if(fileNames.gcgFile != "") { gcgOutName = fileNames.gcgFile; if(!openExplicitFile(gcgOutFile, gcgOutName)) { return false; } } if(fileNames.gdeFile != "") { gdeOutName = fileNames.gdeFile; if(!openExplicitFile(gdeOutFile, gdeOutName)) { return false; } } if(fileNames.nexusFile != "") { nexusOutName = fileNames.nexusFile; if(!openExplicitFile(nexusOutFile, nexusOutName)) { return false; } } if(fileNames.nrbfFile != "") { nbrfOutName = fileNames.nrbfFile; if(!openExplicitFile(nbrfOutFile, nbrfOutName)) { return false; } } if(fileNames.phylipFile != "") { phylipOutName = fileNames.phylipFile; if(!openExplicitFile(phylipOutFile, phylipOutName)) { return false; } } return true; } /* * The function openAlignmentOutput opens a file for output. It returns true if it * has been successful and false if it has not been successful */ bool AlignmentOutput::openAlignmentOutput(string path) { if(!userParameters->getOutputClustal() && !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() && !userParameters->getOutputPhylip() && !userParameters->getOutputGde() && !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) { utilityObject->error("You must select an alignment output format\n"); return false; } string _fileNameToOutput = path; if(_fileNameToOutput == "") { /*_fileNameToOutput = userParameters->getSeqName();*/ /* BUG 166 , file extension, FS, 2009-01-26 */ utilityObject->getPath(userParameters->getSeqName(), &_fileNameToOutput); } if(userParameters->getOutputClustal()) { if (userParameters->getOutfileName() != "") { clustalOutName = userParameters->getOutfileName(); if(!openExplicitFile(clustalOutFile, clustalOutName)) { return false; } } else { clustalOutName = openOutputFile(clustalOutFile, "\nEnter a name for the CLUSTAL output file ", _fileNameToOutput, "aln"); if(clustalOutName == "") { return false; // We have not been successful. } } } if(userParameters->getOutputNbrf()) { if (userParameters->getOutfileName() != "") { nbrfOutName = userParameters->getOutfileName(); if(!openExplicitFile(nbrfOutFile, nbrfOutName)) { return false; } } else { nbrfOutName = openOutputFile(nbrfOutFile, "\nEnter a name for the NBRF/PIR output file", _fileNameToOutput, "pir"); if(nbrfOutName == "") { return false; // We have not been successful. } } } if(userParameters->getOutputGCG()) { if (userParameters->getOutfileName() != "") { gcgOutName = userParameters->getOutfileName(); if(!openExplicitFile(gcgOutFile, gcgOutName)) { return false; } } else { gcgOutName = openOutputFile(gcgOutFile, "\nEnter a name for the GCG output file", _fileNameToOutput, "msf"); if(gcgOutName == "") { return false; // We have not been successful. } } } if(userParameters->getOutputPhylip()) { if (userParameters->getOutfileName() != "") { phylipOutName = userParameters->getOutfileName(); if(!openExplicitFile(phylipOutFile, phylipOutName)) { return false; } } else { phylipOutName = openOutputFile(phylipOutFile, "\nEnter a name for the PHYLIP output file", _fileNameToOutput, "phy"); if(phylipOutName == "") { return false; // We have not been successful. } } } if(userParameters->getOutputGde()) { if (userParameters->getOutfileName() != "") { gdeOutName = userParameters->getOutfileName(); if(!openExplicitFile(gdeOutFile, gdeOutName)) { return false; } } else { gdeOutName = openOutputFile(gdeOutFile, "\nEnter a name for the GDE output file ", _fileNameToOutput, "gde"); if(gdeOutName == "") { return false; // We have not been successful. } } } if(userParameters->getOutputNexus()) { if (userParameters->getOutfileName() != "") { nexusOutName = userParameters->getOutfileName(); if(!openExplicitFile(nexusOutFile, nexusOutName)) { return false; } } else { nexusOutName = openOutputFile(nexusOutFile, "\nEnter a name for the NEXUS output file ", _fileNameToOutput, "nxs"); if(nexusOutName == "") { return false; // We have not been successful. } } } if(userParameters->getOutputFasta()) { if (userParameters->getOutfileName() != "") { fastaOutName = userParameters->getOutfileName(); if(!openExplicitFile(fastaOutFile, fastaOutName)) { return false; } } else { fastaOutName = openOutputFile(fastaOutFile, "\nEnter a name for the Fasta output file ", _fileNameToOutput, "fasta"); if(fastaOutName == "") { return false; // We have not been successful. } } } return true; } /* * The function openExplicitFile is used to open a file when we have the name already. * It returns true if the file has been opened correctly, false otherwise. */ bool AlignmentOutput::openExplicitFile(auto_ptr& outFile, string fileName) { if (fileName == "") { cerr << "Bad output file [" << fileName << "]\n"; utilityObject->error("Bad output file [%s]\n", fileName.c_str()); return false; } outFile.reset(new ofstream(fileName.c_str(), ofstream::trunc)); if(!outFile->is_open()) { utilityObject->error("Cannot open output file [%s]\n", fileName.c_str()); return false; } return true; } /* * The function openOutputFile is used to open a file when we dont have the name yet. * This function returns the name if it has been successful. If it has not been * successful it returns a blank string "" */ string AlignmentOutput::openOutputFile(auto_ptr& outFile, string prompt, string path, string fileExtension) { string temp; string _fileName; // Will return this name. string message; _fileName = path + fileExtension; if(_fileName.compare(userParameters->getSeqName()) == 0) { cout << "Output file name is the same as input file.\n"; if (userParameters->getMenuFlag()) { message = "\n\nEnter new name to avoid overwriting [" + _fileName + "]"; utilityObject->getStr(message, temp); if(temp != "") { _fileName = temp; } } } else if (userParameters->getMenuFlag()) { message = prompt + " [" + _fileName + "]"; utilityObject->getStr(message, temp); if(temp != "") { _fileName = temp; } } outFile.reset(new ofstream(_fileName.c_str(), ofstream::trunc)); if(!outFile->is_open()) { utilityObject->error("Cannot open output file [%s]\n", _fileName.c_str()); return ""; } return _fileName; } /* * fastaOut: output the alignment in FASTA format */ void AlignmentOutput::fastaOut(Alignment* alignPtr, outputRegion partToOutput) { char residue; int val; int i, ii; int j, slen; int lineLength; int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; rangeNum rnum; cout << "firstres = " << firstRes << " lastres = " << lastRes << "\n"; try { const SeqArray* alignment = alignPtr->getSeqArray(); //NOTE june29 vector sequence; sequence.assign(lastRes + 1, '0'); lineLength = PAGEWIDTH - alignPtr->getMaxNames(); lineLength = lineLength - lineLength % 10; // round to a multiple of 10 if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07 { lineLength = LINELENGTH; } for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); slen = 0; //for(j = firstRes; j < firstRes + lastRes; j++) //- nige for(j = firstRes; j <= lastRes; j++) //- nige { if (j <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j); val = (*alignment)[i][j]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '-'; } else { residue = userParameters->getAminoAcidCode(val); } if (userParameters->getLowercase()) { sequence[j - firstRes] = residue; } else { sequence[j - firstRes] = residue; } slen++; } (*fastaOutFile) << ">" << nameonly(alignPtr->getName(i)); if(userParameters->getSeqRange()) { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); (*fastaOutFile) << "/" << rnum.start << "-" << rnum.end; } (*fastaOutFile) << "\n"; for(j = 1; j <= slen; j++) { (*fastaOutFile) << sequence[j-1]; if((j % lineLength == 0) || (j == slen)) { (*fastaOutFile) << "\n"; } } } fastaOutFile->close(); cout << "FASTA file created!\n"; } catch(const bad_alloc& e) { fastaOutFile->close(); cerr << "A bad_alloc exception has occured in the fastaOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { fastaOutFile->close(); cerr << "An exception has occured in the fastaOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { fastaOutFile->close(); cerr << "An exception has occured in the fastaOut function.\n"; exit(1); } } /* * gcgOut: output the alignment in gcg file format */ void AlignmentOutput::gcgOut(Alignment* alignPtr, outputRegion partToOutput) { char residue; int val; int i, ii, chunks, block; int j, k, pos1, pos2; long grandChecksum; try { int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; rangeNum rnum; vector sequence; vector allChecks; int _maxLength = alignPtr->getMaxAlnLength(); sequence.assign(_maxLength + 1, '0'); allChecks.assign(lastSeq + 1, 0); const SeqArray* alignment = alignPtr->getSeqArray(); for(i = firstSeq; i <= lastSeq; i++) { //for(j = firstRes; j <= firstRes + lastRes - 1; j++) //- nige for(j = firstRes; j <= lastRes; j++) //- nige { if (j <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j); val = (*alignment)[i][j]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '.'; } else { residue = userParameters->getAminoAcidCode(val); } sequence[j - firstRes + 1] = residue; } // pad any short sequences with gaps, to make all sequences the same length for(; j <= firstRes + lastRes - 1; j++) { sequence[j - firstRes + 1] = '.'; } allChecks[i] = SeqGCGCheckSum(&sequence, lastRes); } int _index; grandChecksum = 0; for(i = 1; i <= alignPtr->getNumSeqs(); i++) { _index = alignPtr->getOutputIndex(i - 1); grandChecksum += allChecks[_index]; } grandChecksum = grandChecksum % 10000; (*gcgOutFile) << "PileUp\n\n"; (*gcgOutFile) << "\n\n MSF:" << setw(5) << lastRes << " Type: "; if(userParameters->getDNAFlag()) { (*gcgOutFile) << "N"; } else { (*gcgOutFile) << "P"; } (*gcgOutFile) << " Check:" << setw(6) << grandChecksum << " .. \n\n"; float _seqWeight; int length = lastRes - firstRes + 1; //- nige for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); _seqWeight = (alignPtr->getSeqWeight(i - 1) * 100) / (float) INT_SCALE_FACTOR; (*gcgOutFile) << " Name: " << alignPtr->getName(i) << " oo Len:" //<< setw(5) << lastRes << " Check:" << setw(6) << allChecks[i] //- nige << setw(5) << length << " Check:" << setw(6) << allChecks[i] << " Weight: " << fixed << setprecision(1) << _seqWeight << "\n"; } (*gcgOutFile) << "\n//\n"; chunks = length / GCG_LINELENGTH; //- nige if(length % GCG_LINELENGTH != 0) //- nige { ++chunks; } for(block = 1; block <= chunks; block++) { (*gcgOutFile) << "\n\n"; pos1 = ((block - 1) * GCG_LINELENGTH) + 1; pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;//- nige for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); if (!userParameters->getSeqRange()) { (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 5) << left << alignPtr->getName(i) << " "; } else { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); std::stringstream ss; std::stringstream ss2; string rangeStart; string rangeEnd; ss << rnum.start; ss >> rangeStart; ss2 << rnum.end; ss2 >> rangeEnd; string nameAndRange = nameonly(alignPtr->getName(i)) + "/" + rangeStart; nameAndRange += "-" + rangeEnd; (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left << nameAndRange; } for(j = pos1, k = 1; j <= pos2; j++, k++) { //JULIE - //check for int sequences - pad out with '.' characters to end if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j + firstRes - 1); val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { residue = '.'; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '.'; } else { residue = userParameters->getAminoAcidCode(val); } (*gcgOutFile) << residue; if(j % 10 == 0) { (*gcgOutFile) << " "; } } } } (*gcgOutFile) << "\n\n"; gcgOutFile->close(); } catch(const bad_alloc& e) { gcgOutFile->close(); cerr << "A bad_alloc exception has occured in the gcgOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { gcgOutFile->close(); cerr << "An exception has occured in the gcgOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { gcgOutFile->close(); cerr << "An exception has occured in the gcgOut function.\n"; exit(1); } } void AlignmentOutput::phylipOut(Alignment* alignPtr, outputRegion partToOutput) { int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; try { char residue; int val; int i, ii, chunks, block; int j, k, pos1, pos2; int nameLen; bool warn; vector _seqNames; const SeqArray* alignment = alignPtr->getSeqArray(); rangeNum rnum; // Push on a blank string. This is because the index starts at 1 for the seqs etc.. _seqNames.push_back(""); nameLen = 0; for(i = firstSeq; i <= lastSeq; i++) { _seqNames.push_back(alignPtr->getName(i)); ii = _seqNames.size(); if(nameLen < ii) { nameLen = ii; } } if(nameLen > 10) { warn = false; for(i = 0; i < (int)_seqNames.size() - 1; i++) { for(j = i + 1; j < (int)_seqNames.size(); j++) { if (_seqNames[i].compare(_seqNames[j]) == 0) { warn = true; } } } if(warn) { utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.\nNames in the PHYLIP format file are NOT unambiguous."); } else { utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output."); } } int length = lastRes - firstRes + 1; //- nige chunks = length / GCG_LINELENGTH; //- nige if(length % GCG_LINELENGTH != 0) //- nige { ++chunks; } (*phylipOutFile) << setw(6) << alignPtr->getNumSeqs() << " " << setw(6) << length; //-nige for(block = 1; block <= chunks; block++) { pos1 = ((block - 1) * GCG_LINELENGTH) + 1; //pos2 = (lastRes < pos1 + GCG_LINELENGTH - 1)? lastRes : pos1 + GCG_LINELENGTH - 1; //- nige pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); if(block == 1) { if(!userParameters->getSeqRange()) { string name = alignPtr->getName(i); (*phylipOutFile) << "\n" << setw(10) << left << name.substr(0,10) << " "; } else { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); std::stringstream ss; std::stringstream ss2; string rangeStart; string rangeEnd; ss << rnum.start; ss >> rangeStart; ss2 << rnum.end; ss2 >> rangeEnd; string nameAndRange = nameonly(alignPtr->getName(i)) + "/"; nameAndRange += rangeStart + "-" + rangeEnd; (*phylipOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left << nameAndRange; } } else { (*phylipOutFile) << "\n "; } for(j = pos1, k = 1; j <= pos2; j++, k++) { if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j + firstRes - 1); val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '-'; } else { residue = userParameters->getAminoAcidCode(val); } (*phylipOutFile) << residue; if(j % 10 == 0) { (*phylipOutFile) << " "; } } } (*phylipOutFile) << "\n"; } } catch(const bad_alloc& e) { phylipOutFile->close(); cerr << "A bad_alloc exception has occured in the phylipOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { phylipOutFile->close(); cerr << "An exception has occured in the phylipOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { phylipOutFile->close(); cerr << "An exception has occured in the phylipOut function.\n"; exit(1); } } void AlignmentOutput::nexusOut(Alignment* alignPtr, outputRegion partToOutput) { int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; try { char residue; int val; int i, ii, chunks, block; int j, k, pos1, pos2; const SeqArray* alignment = alignPtr->getSeqArray(); rangeNum rnum; int length = lastRes - firstRes + 1; //- nige chunks = length / GCG_LINELENGTH; //- nige if(length % GCG_LINELENGTH != 0) //- nige { ++chunks; } (*nexusOutFile) << "#NEXUS\n"; (*nexusOutFile) << "BEGIN DATA;\n"; (*nexusOutFile) << "dimensions ntax=" << alignPtr->getNumSeqs() << " nchar=" << length << ";\n"; //- nige (*nexusOutFile) << "format missing=?\n"; // removed - bugzilla bug 204 /* (*nexusOutFile) << "symbols=\""; for(i = 0; i <= userParameters->getMaxAA(); i++) { (*nexusOutFile) << userParameters->getAminoAcidCode(i); } (*nexusOutFile) << "\"\n"; */ (*nexusOutFile) << "interleave datatype="; bool _dnaFlag = userParameters->getDNAFlag(); string _type = _dnaFlag ? "DNA " : "PROTEIN "; (*nexusOutFile) << _type; (*nexusOutFile) << "gap= -;\n"; (*nexusOutFile) << "\nmatrix"; for(block = 1; block <= chunks; block++) { pos1 = ((block - 1) * GCG_LINELENGTH) + 1; pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; //- nige for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); if (!userParameters->getSeqRange()) { (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 1) << left << alignPtr->getName(i) << " "; } else { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); std::stringstream ss; std::stringstream ss2; string rangeStart; string rangeEnd; ss << rnum.start; ss >> rangeStart; ss2 << rnum.end; ss2 >> rangeEnd; string nameAndRange = nameonly(alignPtr->getName(i)) + "/"; nameAndRange += rangeStart + "-" + rangeEnd; (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left << nameAndRange; } for(j = pos1, k = 1; j <= pos2; j++, k++) { if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j + firstRes - 1); val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '-'; } else { residue = userParameters->getAminoAcidCode(val); } (*nexusOutFile) << residue; } } (*nexusOutFile) << "\n"; } (*nexusOutFile) << ";\nend;\n"; } catch(const bad_alloc& e) { nexusOutFile->close(); cerr << "A bad_alloc exception has occured in the nexusOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { nexusOutFile->close(); cerr << "An exception has occured in the nexusOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { nexusOutFile->close(); cerr << "An exception has occured in the nexusOut function.\n"; exit(1); } } /* * gdeOut: print out the alignment in gde file format. */ void AlignmentOutput::gdeOut(Alignment* alignPtr, outputRegion partToOutput) { int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; int length = lastRes - firstRes + 1; //- nige try { char residue; int val; vector _ssMask1, _ssMask2, seq; int i, ii; int j, slen; int lineLength; const SeqArray* alignment = alignPtr->getSeqArray(); rangeNum rnum; seq.assign(alignPtr->getMaxAlnLength() + 1, '0'); // decide the line length for this alignment - maximum is LINELENGTH lineLength = PAGEWIDTH - alignPtr->getMaxNames(); lineLength = lineLength - lineLength % 10; // round to a multiple of 10 if (lineLength > LINELENGTH || lineLength <= 0) { lineLength = LINELENGTH; } // If we are using the secondary structure info from profile 1, set it up if (userParameters->getStructPenalties1() == SECST && userParameters->getUseSS1() == true) { int _lengthSeq1 = alignPtr->getSeqLength(1); vector* _secStructMask1 = alignPtr->getSecStructMask1(); _ssMask1.assign(_lengthSeq1 + 10, 0); for (i = 0;i < _lengthSeq1;i++) { _ssMask1[i] = _secStructMask1->at(i); } printSecStructMask(_lengthSeq1, _secStructMask1, &_ssMask1); } // If we are using the secondary structure info from profile 2, set it up if (userParameters->getStructPenalties2() == SECST && userParameters->getUseSS2() == true) { int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1; int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq); _ssMask2.assign(lengthSeqProfile2 + 10, 0); vector* _secStructMask2 = alignPtr->getSecStructMask2(); for (i=0;i < lengthSeqProfile2;i++) { _ssMask2[i] = _secStructMask2->at(i); } printSecStructMask(lengthSeqProfile2, _secStructMask2, &_ssMask2); } bool _dnaFlag = userParameters->getDNAFlag(); string _prefix = _dnaFlag ? "#" : "%"; for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); (*gdeOutFile) << _prefix; if(!userParameters->getSeqRange()) { (*gdeOutFile) << alignPtr->getName(i) << "\n"; } else { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); (*gdeOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start << "-" << rnum.end << "\n"; } slen = 0; //for(j = firstRes; j < firstRes + lastRes; j++) //- nige for(j = firstRes; j <= lastRes; j++) //- nige { if (j <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j); val = (*alignment)[i][j]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '-'; } else { residue = userParameters->getAminoAcidCode(val); } if (userParameters->getLowercase()) { seq[j - firstRes] = (char)tolower((int)residue); } else { seq[j - firstRes] = residue; } slen++; } for(j = 1; j <= slen; j++) { (*gdeOutFile) << seq[j-1]; if((j % lineLength == 0) || (j == slen)) { (*gdeOutFile) << "\n"; } } } if (userParameters->getOutputStructPenalties() == 0 || userParameters->getOutputStructPenalties() == 2) { if (userParameters->getStructPenalties1() == SECST && userParameters->getUseSS1() == true) { (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left << alignPtr->getSecStructName1() << "\n"; //for(i = firstRes; i < firstRes + lastRes; i++) //- nige for(i = firstRes; i <= lastRes; i++) //- nige { val = _ssMask1[i - 1]; if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { seq[i - firstRes] = '-'; } else { seq[i - firstRes] = val; } } for(i = 1; i <= lastRes; i++) { (*gdeOutFile) << seq[i-1]; if((i % lineLength == 0) || (i == lastRes)) { (*gdeOutFile) << "\n"; } } } if (userParameters->getStructPenalties2() == SECST && userParameters->getUseSS2() == true) { (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left << alignPtr->getSecStructName2() << "\n"; //for(i = firstRes; i < firstRes + lastRes; i++) //- nige for(i = firstRes; i <= lastRes; i++) //- nige { val = _ssMask2[i - 1]; if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { seq[i - firstRes] = '-'; } else { seq[i - firstRes] = val; } } //for(i = 1; i <= lastRes; i++) //- nige for(i = 1; i <= length; i++) //- nige { (*gdeOutFile) << seq[i - 1]; //if((i % lineLength == 0) || (i == lastRes)) //- nige if((i % lineLength == 0) || (i == length)) //- nige { (*gdeOutFile) << "\n"; } } } } if (userParameters->getOutputStructPenalties() == 1 || userParameters->getOutputStructPenalties() == 2) { if (userParameters->getStructPenalties1() != NONE && userParameters->getUseSS1() == true) { (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left << alignPtr->getSecStructName1() << "\n"; vector* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1(); //for(i = firstRes; i < firstRes + lastRes; i++) //- nige for(i = firstRes; i <= lastRes; i++) //- nige { val = _gapPenaltyMask1->at(i - 1); if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { seq[i - firstRes] = '-'; } else { seq[i - firstRes] = val; } } //for(i = 1; i <= lastRes; i++) //- nige for(i = 1; i <= length; i++) //- nige { (*gdeOutFile) << seq[i - 1]; //if((i % lineLength == 0) || (i == lastRes)) //- nige if((i % lineLength == 0) || (i == length)) //- nige { (*gdeOutFile) << "\n"; } } } if (userParameters->getStructPenalties2() != NONE && userParameters->getUseSS2() == true) { (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left << alignPtr->getSecStructName2() << "\n"; vector* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2(); //for(i = firstRes; i < firstRes + lastRes; i++) //- nige for(i = firstRes; i < length; i++) //- nige { val = _gapPenaltyMask2->at(i-1); if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { seq[i - firstRes] = '-'; } else { seq[i - firstRes] = val; } } //for(i = 1; i <= lastRes; i++) //- nige for(i = 1; i <= length; i++) //- nige { (*gdeOutFile) << seq[i - 1]; //if((i % lineLength == 0) || (i == lastRes)) //- nige if((i % lineLength == 0) || (i == length)) //- nige { (*gdeOutFile) << "\n"; } } } } gdeOutFile->close(); } catch(const bad_alloc& e) { gdeOutFile->close(); cerr << "A bad_alloc exception has occured in the gdeOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { gdeOutFile->close(); cerr << "An exception has occured in the gdeOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { gdeOutFile->close(); cerr << "An exception has occured in the gdeOut function.\n"; exit(1); } } void AlignmentOutput::nbrfOut(Alignment* alignPtr, outputRegion partToOutput) { char residue; int val; int i, ii; int j, slen; int lineLength; rangeNum rnum; int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; try { int _maxLength = alignPtr->getMaxAlnLength(); vector sequence; sequence.assign(_maxLength + 1, '0'); const SeqArray* alignment = alignPtr->getSeqArray(); // decide the line length for this alignment - maximum is LINELENGTH lineLength = PAGEWIDTH - alignPtr->getMaxNames(); lineLength = lineLength - lineLength % 10; // round to a multiple of 10 if (lineLength > LINELENGTH || lineLength <= 0) // Mark, 18-7-07 { lineLength = LINELENGTH; } // Get name prefix. DL if DNA, P1 if protein string namePrefix; bool _dnaFlag = userParameters->getDNAFlag(); namePrefix = _dnaFlag ? ">DL;" : ">P1;"; //int length = lastRes - firstRes + 1; //- nige for(ii = firstSeq; ii <= lastSeq; ii++) { i = alignPtr->getOutputIndex(ii - 1); (*nbrfOutFile) << namePrefix; if (!userParameters->getSeqRange()) { (*nbrfOutFile) << alignPtr->getName(i) << "\n" << alignPtr->getTitle(i) << "\n"; } else { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); (*nbrfOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start << "-"<< rnum.end << "\n" << alignPtr->getTitle(i) << "\n"; } slen = 0; //for(j = firstRes; j < firstRes + lastRes; j++) //- nige for(j = firstRes; j <= lastRes; j++) //- nige { // NOTE I changed this here!!!!! if (j <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j); val = (*alignment)[i][j]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { residue = '-'; } else { residue = userParameters->getAminoAcidCode(val); } sequence[j - firstRes] = residue; slen++; } for(j = 1; j <= slen; j++) { (*nbrfOutFile) << sequence[j - 1]; if((j % lineLength == 0) || (j == slen)) { (*nbrfOutFile) << "\n"; } } (*nbrfOutFile) << "*\n"; } nbrfOutFile->close(); } catch(const bad_alloc& e) { nbrfOutFile->close(); cerr << "A bad_alloc exception has occured in the nbrfOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { nbrfOutFile->close(); cerr << "An exception has occured in the nbrfOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { nbrfOutFile->close(); cerr << "An exception has occured in the nbrfOut function.\n"; exit(1); } } /* * The function clustalOut is used to ouput the Alignment into a clustal format * file. */ void AlignmentOutput::clustalOut(Alignment* alignPtr, outputRegion partToOutput) { int firstRes = partToOutput._firstRes; int lastRes = partToOutput._lastRes; int firstSeq = partToOutput._firstSeq; int lastSeq = partToOutput._lastSeq; try { vector seq1; vector seqNo; vector printSeqNo; vector ss_mask1, ss_mask2; const SeqArray* alignment = alignPtr->getSeqArray(); char temp[MAXLINE]; char c; int val; int ii, lv1, catident1[NUMRES], catident2[NUMRES], ident, chunks; int i, j, k, l; int pos, ptr; int lineLength; rangeNum rnum; string tmpStr = ""; string sequenceLine = ""; int _numSequences = alignPtr->getNumSeqs(); // PMcG revert to Clustalw1.83 output format for name width const int NAMESWIDTH=std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY); seqNo.assign(_numSequences + 1, 0); printSeqNo.assign(_numSequences + 1, 0); // check that lastSeq <= _numSequences if(lastSeq > _numSequences) { lastSeq = _numSequences; } for (i = firstSeq; i <= lastSeq; i++) { printSeqNo[i] = seqNo[i] = 0; for(j = 1; j < firstRes; j++) { //val = alignPtr.getResidue(i, j); val = (*alignment)[i][j]; // NOTE june29 if((val >= 0) || (val <= userParameters->getMaxAA())) { seqNo[i]++; } } } seq1.assign(alignPtr->getMaxAlnLength() + 1, 0); // Check if we have secondary structure in file 1 and if we want to output it. if (userParameters->getStructPenalties1() == SECST && userParameters->getUseSS1() == true) { int lengthSeq1 = alignPtr->getSeqLength(1); ss_mask1.assign(lengthSeq1 + 10, 0); vector* _secStructMask1 = alignPtr->getSecStructMask1(); for (i = 0; i < lengthSeq1; i++) { ss_mask1[i] = _secStructMask1->at(i); } printSecStructMask(lengthSeq1, _secStructMask1, &ss_mask1); } // Check if we have secondary structure in file 2 and if we want to output it. if (userParameters->getStructPenalties2() == SECST && userParameters->getUseSS2() == true && userParameters->getProfile2Empty() == false) { // AW added test getProfile2Empty was meant to fix bug // 141, but only prevents segfault, output still faulty. // Has to be fixed somewhere else int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1; int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq); ss_mask2.assign(lengthSeqProfile2 + 10, 0); vector* _secStructMask2 = alignPtr->getSecStructMask2(); for (i = 0; i < lengthSeqProfile2; i++) { ss_mask2[i] = _secStructMask2->at(i); } printSecStructMask(lengthSeqProfile2, _secStructMask2, &ss_mask2); } (*clustalOutFile) << "CLUSTAL "<< userParameters->getRevisionLevel() << " multiple sequence alignment\n\n"; // decide the line length for this alignment - maximum is LINELENGTH //PMcG 9-2-2008 make line output same as 1.83 //lineLength = PAGEWIDTH - alignPtr->getMaxNames(); lineLength = PAGEWIDTH - NAMESWIDTH; lineLength = lineLength - lineLength % 10; // round to a multiple of 10 if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07 { lineLength = LINELENGTH; } int length = lastRes - firstRes + 1; //- nige chunks = length / lineLength; //- nige if(length % lineLength != 0) //- nige { ++chunks; } //printf("firstRes=%d,lastRes=%d,length=%d,chunks=%d\n", // firstRes, lastRes, length, chunks); // This will loop through each of the blocks. for(lv1 = 1; lv1 <= chunks; ++lv1) { // pos is begining of chunk, ptr is the end of the chunk to be displayed. pos = ((lv1 - 1) * lineLength) + 1; ptr = (length < pos + lineLength - 1) ? length : pos + lineLength - 1; //- nige (*clustalOutFile) << "\n"; int _outStructPenalty = userParameters->getOutputStructPenalties(); string secStructName1 = alignPtr->getSecStructName1(); // Mark 18-7-07 if((int)secStructName1.size() > MAXNAMESTODISPLAY) { //secStructName1 = secStructName1.substr(0, MAXNAMESTODISPLAY); secStructName1 = secStructName1.substr(0, NAMESWIDTH); } if (_outStructPenalty == 0 || _outStructPenalty == 2) { if (userParameters->getStructPenalties1() == SECST && userParameters->getUseSS1() == true) { for(i = pos; i <= ptr; ++i) { // Check if we can access mask position first if((int)ss_mask1.size() > i + firstRes - 2) { val = ss_mask1[i + firstRes - 2]; if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { temp[i - pos] = '-'; } else { temp[i - pos] = val; } } else { break; } } temp[i - pos] = EOS; if(userParameters->getSeqRange()) { //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY + (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH + clusSecStructOffset) << left << secStructName1 << " " << temp << "\n"; } else { //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY) (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH) << left << secStructName1 << " " << temp << "\n"; } } } if (_outStructPenalty == 1 || _outStructPenalty == 2) { if (userParameters->getStructPenalties1() != NONE && userParameters->getUseSS1() == true) { vector* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1(); for(i = pos; i <= ptr; ++i) { if((int)_gapPenaltyMask1->size() > i + firstRes - 2) { val = _gapPenaltyMask1->at(i + firstRes - 2); if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { temp[i - pos] = '-'; } else { temp[i - pos] = val; } } else { break; } } temp[i - pos] = EOS; //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left << secStructName1 << " " << temp << "\n"; } } string secStructName2 = alignPtr->getSecStructName2(); // Mark 18-7-07 //if((int)secStructName2.size() > MAXNAMESTODISPLAY) if((int)secStructName2.size() > NAMESWIDTH) { //secStructName2 = secStructName2.substr(0, MAXNAMESTODISPLAY); secStructName2 = secStructName2.substr(0, NAMESWIDTH); } if (_outStructPenalty == 0 || _outStructPenalty == 2) { if (userParameters->getStructPenalties2() == SECST && userParameters->getUseSS2() == true) { for(i = pos; i <= ptr; ++i) { if((int)ss_mask2.size() > i + firstRes - 2) { val = ss_mask2[i + firstRes - 2]; if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { temp[i - pos] = '-'; } else { temp[i - pos] = val; } } else { break; } } temp[i - pos] = EOS; if (userParameters->getSeqRange()) { //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY + (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH + clusSecStructOffset) << left << secStructName2; (*clustalOutFile) << " " << temp << "\n"; } else { //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY) (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH) << left << secStructName2 << " " << temp << "\n"; } } } if (_outStructPenalty == 1 || _outStructPenalty == 2) { if (userParameters->getStructPenalties2() != NONE && userParameters->getUseSS2() == true) { vector* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2(); for(i = pos; i <= ptr; ++i) { if((int)_gapPenaltyMask2->size() > i + firstRes - 2) { val = _gapPenaltyMask2->at(i + firstRes - 2); if (val == userParameters->getGapPos1() || val == userParameters->getGapPos2()) { temp[i - pos] = '-'; } else { temp[i - pos] = val; } } else { break; } } temp[i - pos] = EOS; //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left << secStructName2 << " " << temp << "\n"; } } for(ii = firstSeq; ii <= lastSeq; ++ii) { i = alignPtr->getOutputIndex(ii - 1); printSeqNo[i] = 0; for(j = pos; j <= ptr; ++j) { if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) { //val = alignPtr.getResidue(i, j + firstRes - 1); val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 } else { val = -3; } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { seq1[j] = '-'; } else { seq1[j] = userParameters->getAminoAcidCode(val); seqNo[i]++; printSeqNo[i] = 1; } } for(; j <= ptr; ++j) { seq1[j]='-'; } sequenceLine = ""; for(int index = pos; index < ptr + 1; index++) { sequenceLine += seq1[index]; } string seqName = alignPtr->getName(i); //if((int)seqName.size() > MAXNAMESTODISPLAY) if((int)seqName.size() > NAMESWIDTH) { //seqName = seqName.substr(0, MAXNAMESTODISPLAY); seqName = seqName.substr(0, NAMESWIDTH); } if (!userParameters->getSeqRange()) { // NOTE I made a change here from + 5, to + 6. //(*clustalOutFile) << setw(alignPtr->getMaxNames() + 6) << left // << alignPtr->getName(i); //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + 6) << left // << seqName; // PMcG changed this to revert to behaviour of clustalw1.83 // //(*clustalOutFile) << setw(std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY) + 6) (*clustalOutFile) << setw(NAMESWIDTH + 6) << left << seqName; } else // Put the sequence range information in! { findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); string nameOnly = nameonly(seqName); std::stringstream ss; std::stringstream ss2; string rangeStart; string rangeEnd; ss << rnum.start; ss >> rangeStart; ss2 << rnum.end; ss2 >> rangeEnd; tmpStr = nameOnly + "/" + rangeStart + "-" + rangeEnd; //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + clusSequenceOffset) (*clustalOutFile) << setw(NAMESWIDTH + clusSequenceOffset) << left << tmpStr; } (*clustalOutFile) << sequenceLine; if (userParameters->getClSeqNumbers() && printSeqNo[i]) { (*clustalOutFile) << " " << seqNo[i]; } (*clustalOutFile) << "\n"; } // Now print out the conservation information! for(i = pos; i <= ptr; ++i) { seq1[i] = ' '; ident = 0; for(j = 1; j <= (int)strongGroup.size(); j++) { catident1[j - 1] = 0; } for(j = 1; j <= (int)weakGroup.size(); j++) { catident2[j - 1] = 0; } for(j = firstSeq; j <= lastSeq; ++j) { if((i + firstRes - 1 <= alignPtr->getSeqLength(j)) && (i + firstRes - 1 <= alignPtr->getSeqLength(firstSeq))) {// NOTE june29 if(((*alignment)[firstSeq][i + firstRes - 1] >= 0) && ((*alignment)[firstSeq][i + firstRes - 1] <= userParameters->getMaxAA())) { // Count how many are identical to the first sequence if((*alignment)[firstSeq][i + firstRes - 1] == (*alignment)[j][i + firstRes - 1]) { ++ident; } // Count how many are in the same category. for(k = 1; k <= (int)strongGroup.size(); k++) { for(l = 0; (c = strongGroup[k - 1][l]); l++) { int resCode = (*alignment)[j][i + firstRes - 1]; if(resCode <= userParameters->getMaxAA() + 1) { if (userParameters->getAminoAcidCode(resCode) == c) { catident1[k - 1]++; break; } } } } for(k = 1; k <= (int)weakGroup.size(); k++) { for(l = 0; (c = weakGroup[k - 1][l]); l++) {//NOTE june29 int resCode = (*alignment)[j][i + firstRes - 1]; if(resCode <= userParameters->getMaxAA() + 1) { if (userParameters->getAminoAcidCode(resCode) == c) { catident2[k - 1]++; break; } } } } } } } // Now do the conservation part for each block. if(ident == lastSeq - firstSeq + 1) { seq1[i] = '*'; // All residues the same! } else if (!userParameters->getDNAFlag()) { for(k = 1; k <= (int)strongGroup.size(); k++) { if (catident1[k - 1] == lastSeq - firstSeq + 1) { seq1[i] = ':'; // All residues member of the same category break; } } if(seq1[i] == ' ') { for(k = 1; k <= (int)weakGroup.size(); k++) { if (catident2[k - 1] == lastSeq - firstSeq + 1) { seq1[i] = '.'; // All residues member of the same category break; } } } } } sequenceLine = ""; for(int index = pos; index < ptr + 1; index++) { sequenceLine += seq1[index]; } //for(k = 0; k < MAXNAMESTODISPLAY + 6; k++) for(k = 0; k < NAMESWIDTH + 6; k++) { (*clustalOutFile) << " "; } if(userParameters->getSeqRange()) { (*clustalOutFile) << " "; } (*clustalOutFile) << sequenceLine << "\n"; } clustalOutFile->close(); } catch(const bad_alloc& e) { clustalOutFile->close(); cerr << "A bad_alloc exception has occured in the clustalOut function.\n" << e.what() << "\n"; exit(1); } catch(VectorOutOfRange e) { clustalOutFile->close(); cerr << "An exception has occured in the clustalOut function.\n" << e.what() << "\n"; exit(1); } catch(exception e) { clustalOutFile->close(); cerr << "An exception has occured in the clustalOut function.\n" << e.what() << "\n"; exit(1); } catch(...) { clustalOutFile->close(); cerr << "An exception has occured in the clustalOut function.\n"; exit(1); } } /* * The function printSecStructMask takes in the 2 mask vectors. It makes changes to * mask and puts the result in structMask. This is the one that will be printed out. * NOTE I have had to use (*structMask)[i] syntax. This is not good. But there is no * way to do random access setting in vectors otherwise. */ void AlignmentOutput::printSecStructMask(int prfLength, vector* mask, vector* structMask) { int i, j; // calculate the gap penalty mask from the secondary structures i = 0; try { while (i < prfLength) { if (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') { for (j = 0; j < userParameters->getHelixEndMinus(); j++) { if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'a' && mask->at(i+j) != '$')) { break; } (*structMask)[i+j] = 'a'; } i += j; while (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') { if (i >= prfLength) break; if (mask->at(i) == '$') { (*structMask)[i] = 'A'; i++; break; } else (*structMask)[i] = (*mask)[i]; i++; } for (j = 0; j < userParameters->getHelixEndMinus(); j++) { if ((i-j-1>=0) && (tolower(mask->at(i-j-1)) == 'a' || mask->at(i-j-1) == '$')) { (*structMask)[i - j - 1] = 'a'; } } } else if (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') { for (j = 0; j < userParameters->getStrandEndMinus(); j++) { if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'b' && mask->at(i+j) != '%')) { break; } (*structMask)[i+j] = 'b'; } i += j; while (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') { if (i >= prfLength) break; if (mask->at(i) == '%') { (*structMask)[i] = 'B'; i++; break; } else { (*structMask)[i] = mask->at(i); } i++; } for (j = 0; j < userParameters->getStrandEndMinus(); j++) { if ((i - j - 1 >= 0) && (tolower(mask->at(i - j - 1)) == 'b' || mask->at(i-j-1) == '%')) { (*structMask)[i-j-1] = 'b'; } } } else { i++; } } } catch(const exception& e) { // Catch all the exceptions cerr << "There has been an exception in the function printSecStructMask\n" << e.what() << "\n"; exit(1); } } /* * The function findRangeValues is used to find the range to be printed out. * * */ void AlignmentOutput::findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes, int len, int firstSeq) { assert(rnum); assert(firstRes > 0); assert(len > 0); assert(firstSeq > 0); try { int val; int i, ii; int j, slen; char tmpName[FILENAMELEN + 15]; int iStart = 0; int iEnd = 0; // to print sequence start-end with names int found = 0; int ngaps = 0; int tmpStart = 0; int tmpEnd = 0; int ntermgaps = 0; int pregaps = 0; int tmpk = 0; int isRange = 0; int formula = 0; const SeqArray* alignment = alignPtr->getSeqArray(); tmpName[0] = '\0'; slen = 0; ii = firstSeq ; i = alignPtr->getOutputIndex(ii - 1); // NOTE Same as elsewhere! string name = alignPtr->getName(i); if( (sscanf(name.c_str(),"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3)) { isRange = 1; } for(tmpk = 1; tmpk < firstRes; tmpk++) { // do this irrespective of above sscanf //val = alignPtr.getResidue(i, tmpk); val = (*alignment)[i][tmpk]; // NOTE june29 if ((val < 0) || (val > userParameters->getMaxAA())) { //it is gap pregaps++; } } for(j = firstRes; j < firstRes + len; j++) { if(j > alignPtr->getSeqLength(i)) { val = -3; // Cant get it so break out. } else { //val = alignPtr.getResidue(i, j); val = (*alignment)[i][j]; // NOTE june29 } if((val == -3) || (val == 253)) { break; } else if((val < 0) || (val > userParameters->getMaxAA())) { ngaps++; } else { found = j; } if ( found && (iStart == 0) ) { iStart = found; ntermgaps = ngaps; } slen++; } if(userParameters->getSeqRange()) { cout << "Name : " << alignPtr->getName(i) << " " << "\n firstRes = "<< firstRes << " " << " len = " << len << " " << "\n iStart = " << iStart << " " << "\n tmpStart = " << tmpStart << " " << "\n ngaps = " << ngaps << " " << "\n pregaps = " << pregaps << " "; if (!isRange) { formula = iStart - pregaps; } else { formula = iStart - pregaps + ( tmpStart == 1 ? 0: tmpStart-1) ; } cout << "\n\nsuggestion iStart - pregaps + tmpStart - ntermgaps = " << iStart << " - " << pregaps << " + " << tmpStart << " - " << ntermgaps << " formula " << formula << " "; } else { cerr << "\n no range found .... strange, iStart = " << iStart; formula = 1; } if (pregaps == firstRes - 1) // all gaps - now the conditions........ { formula = tmpStart ; // keep the previous start... } formula = (formula <= 0) ? 1: formula; if (pregaps == 0 && tmpStart == 0) { formula = firstRes; } iEnd = formula + len - ngaps -1; rnum->start = formula; rnum->end = iEnd; cout << "\n check... " << alignPtr->getName(i) << " " << rnum->start << " - " << rnum->end; cout << " Done checking........."; } catch(VectorOutOfRange e) { cerr << "An exception has occured in the findRangeValues function.\n" << e.what() << "\n"; exit(1); } catch(...) { cerr << "An exception has occured in findRangeValues function\n"; exit(1); } } string AlignmentOutput::nameonly(string s) { string tmp; try { int i = 0; while (i < (int)s.size()) { if(s.at(i) != '/') { tmp += s.at(i); i++; } else { break; } } } catch(const exception& e) { cerr << "An exception has occured in the function nameonly\n" << e.what(); exit(1); } return tmp; } int AlignmentOutput::SeqGCGCheckSum(vector* seq, int length) { int i; long check; int seqResIndex = 1; //int seqLength = seq->size(); for (i = 0, check = 0; i < length; i++, seqResIndex++) { char _toUpperCase = (*seq)[seqResIndex]; check += ((i % 57) + 1) * toupper(_toUpperCase); } return (check % 10000); } void AlignmentOutput::showAlign() { //FILE *file; int numLines; char temp[MAXLINE + 1]; temp[0] = '\0'; string fileName; string answer; if(userParameters->getOutputClustal()) { fileName = clustalOutName; } else if(userParameters->getOutputNbrf()) { fileName = nbrfOutName; } else if(userParameters->getOutputGCG()) { fileName = gcgOutName; } else if(userParameters->getOutputPhylip()) { fileName = phylipOutName; } else if(userParameters->getOutputGde()) { fileName = gdeOutName; } else if(userParameters->getOutputNexus()) { fileName = nexusOutName; } else if(userParameters->getOutputFasta()) { fileName = fastaOutName; } else { return; // No file output type! } ifstream _fileIn; _fileIn.open(fileName.c_str(), ios::in); _fileIn.seekg(0, std::ios::beg); // start at the beginning cout << "\n\n"; numLines = 0; while(_fileIn.getline(temp, MAXLINE + 1, '\n')) { //fputs(temp,stdout); cout << temp << "\n"; ++numLines; if(numLines >= PAGE_LEN) { cout << "\n"; utilityObject->getStr(string("Press [RETURN] to continue or X to stop"), answer); if(toupper(answer[0]) == 'X') { _fileIn.close(); return; } else { numLines = 0; } } } _fileIn.close(); cout << "\n"; utilityObject->getStr(string("Press [RETURN] to continue"), answer); } } clustalx-2.1/clustalW/alignment/Alignment.h0000644000175000017500000002051511470725216020664 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * The Alignment class is used to store the alignment that is being constructed. * It also contains other information such as gap penalty masks etc. * An object of this type will be passed by reference to the FileReader. This FileReader * and the FileParsers will then set it up properly from the information given in the file. * I have decided to put everything into vectors, string etc. No more array*'s, gets rid * of the memory allocation problem. * * CHANGE: * Mark Jan 16th 2007. I have changed the pasteSequencesIntoPosition function to allow * explicit pastes into profile2. * Mark 25-1-2007. I have changed the class so that each of the sequences have a unique * identifier. Several functions were changed to allow this. * * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to * process the name vector. * * 23-03-07,Nigel Brown(EMBL): added testUniqueNames() predicate, which * compares new sequence names with those in the alignment vector BEFORE * appending them. */ // NOTE NOTE NOTE Very important! The list of sequences begins from 1 to numSeqs. // This is because of the fact that the code was written in Fortran where arrays begin at // 1. It has become difficult to change this. Ramu has tried before and had problems // so we decided to leave it this way. #ifndef ALIGNMENT_H #define ALIGNMENT_H #include #include #include #include #include #include "Sequence.h" #include "../substitutionMatrix/globalmatrix.h" #include "../general/userparams.h" #include "../general/VectorOutOfRange.h" #include "../general/SequenceNotFoundException.h" // FIXME because this object is used for aligned and unaligned // sequences it would be nice to have a isAligned flag here (AW) using namespace std; namespace clustalw { typedef std::vector > SeqArray; class Alignment { public: /* Functions */ Alignment(); void addSequences(vector* seqVector); void addSequences(SeqArray* seqVector); void appendSequences(vector* seqVector); vector cutSelectedSequencesFromAlignment(vector* selected); void pasteSequencesIntoPosition(vector* seqVector, int pos, bool explicitPasteToProfile2 = false); void resizeSeqArray(int size){seqArray.resize(size); numSeqs = size - 1; outputIndex.resize(size - 1); names.resize(size); titles.resize(size);}; bool addOutputIndex(vector* outputIndexToAdd); bool appendOutputIndex(vector* outputIndexToAppend); void addSecStructMask1(vector* secStructMaskToAdd); void addSecStructMask2(vector* secStructMaskToAdd); void addSeqWeight(vector* _seqWeight); void addGapPenaltyMask1(vector* gapPenaltyMaskToAdd); void addGapPenaltyMask2(vector* gapPenaltyMaskToAdd); vector* getSecStructMask1(); vector* getSecStructMask2(); const vector* getOutputIndex(); vector* getGapPenaltyMask1(); vector* getGapPenaltyMask2(); void addSecStructName1(string nameToAdd); void addSecStructName2(string nameToAdd); int alignScore(void); int countGaps(int s1, int s2, int l); void resetAlign(); void fixGaps(); float countid(int s1, int s2); const vector* getSequence(int index){return &seqArray[index];}; // For Pairwise! const vector* getSequence(int index) const {return &seqArray[index];}; const vector* getSequenceFromUniqueId(unsigned long id); // For iteration const SeqArray* getSeqArray() const {return &seqArray;}; // For multiple align! SeqArray* getSeqArrayForRealloc(){return &seqArray;}; void updateSequence(int index, const vector* seq); bool checkAllNamesDifferent(string *offendingSeq); bool testUniqueNames(vector* seqVector, string *offendingSeq); void clearAlignment(); void clearSecStruct1(); void clearSecStruct2(); void printSequencesAddedInfo(); string getSecStructName1(); string getSecStructName2(); int getNumSeqs() const {return numSeqs;}; int getMaxNames(); int getMaxAlnLength(){return maxAlignmentLength;}; void setMaxAlnLength(int len){maxAlignmentLength = len;}; int getLengthLongestSequence(); int getLengthLongestSequence(int firstSeq, int lastSeq); int getSeqLength(int index) const {return seqArray[index].size() - 1;}; int getSecStructMask1Element(int index); int getSecStructMask2Element(int index); int getGapPenaltyMask1Element(int index); int getGapPenaltyMask2Element(int index); int getOutputIndex(int index); int getSeqWeight(int index) const; const vector* getSeqWeights() const{return &seqWeight;} string getName(int index); string getTitle(int index); vector* QTcalcHistColumnHeights(int firstSeq, int nSeqs, Array2D* exceptionalRes); // NOTE July 13, for Qt // NOTE the following functions are to be used when we are doing a profile // alignment. It resets the gaps from fixed. void resetProfile1(); void resetProfile2(); void resetAllSeqWeights(); int searchForString(bool* found, int seq, int beginRes, string search); void removeGapsFromSelectedSeqs(vector* selected); void removeGapOnlyColsFromSelectedSeqs(vector* selected); void removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum); void setDefaultOutputIndex(); bool removeAllOutsideRange(int beginPos, int endPos); bool updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos); bool reloadAlignment(); int getProfile1NumSeqs(){return profile1NumSeqs;}; void setProfile1NumSeqs(int value){profile1NumSeqs = value;} bool isGap(int seq, int col) const; void calculateMaxLengths(); /** * The following functions are for the iteration output order. */ unsigned long getUniqueId(int seq); void debugPrintArray(){debugPrintSeqArray(&seqArray);} void debugPrintSeqArray(SeqArray* arrayToPrint); void debugPrintProfile1(); void debugPrintProfile2(); void debugPrintOutAlignInfo(); void debugPrintAllNames(); void debugPrintSequences(); /* Attributes */ /* Friends */ class NameIterator; friend class NameIterator; class NameIterator { private: Alignment *alignment; vector::iterator i; public: void begin(Alignment *alignment); const string next(); bool end(); }; private: /* Functions */ void addSequencesToVector(vector* seqVector); int getSequenceLength(int index); void sortScores(vector* scores, int f, int l); void swap(vector* scores, int s1, int s2); bool keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex); void clearSeqArray(); /* Attributes */ int maxNames; int maxAlignmentLength; int lengthLongestSequence; int numSeqs; vector outputIndex; vector sequenceIds; // Mark change: To help with output order vector seqWeight; SeqArray seqArray; vector names; vector titles; vector gapPenaltyMask1; vector gapPenaltyMask2; vector secStructMask1; vector secStructMask2; string secStructName1; string secStructName2; vector histogramColumnHeights; // NOTE July 13, for Qt int profile1NumSeqs; int gapPos1, gapPos2; }; } #endif clustalx-2.1/clustalW/alignment/Alignment.cpp0000644000175000017500000015325511470725216021227 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to * process the name vector. * * 05-03-07,Nigel Brown(EMBL): modified searchForString() to skip over gaps in * sequence while matching. * * 26-03-07,Nigel Brown(EMBL): suppressed error message box for name clashes; * added testUniqueNames() predicate, which compares new sequence names with * those in the alignment vector BEFORE appending them. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include "Alignment.h" using namespace std; namespace clustalw { Alignment::Alignment() : gapPos1(userParameters->getGapPos1()), gapPos2(userParameters->getGapPos2()) { maxNames = 0; numSeqs = 0; maxAlignmentLength = 0; lengthLongestSequence = 0; profile1NumSeqs = 0; } // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged? /** remove gaps from older alignments (code = gap_pos1) */ void Alignment::resetProfile1() { register int sl; /* which have code = gap_pos2 */ int i,j; int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); bool _resetAlignNew = userParameters->getResetAlignmentsNew(); bool _resetAlignAll = userParameters->getResetAlignmentsAll(); if (userParameters->getStructPenalties1() != NONE) { sl = 0; for (j = 0; j < (int)gapPenaltyMask1.size(); ++j) { if (gapPenaltyMask1[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll)) { continue; } if (gapPenaltyMask1[j] == _gapPos2 && (_resetAlignAll)) { continue; } gapPenaltyMask1[sl] = gapPenaltyMask1[j]; ++sl; } } if (userParameters->getStructPenalties1() == SECST) { sl = 0; for (j = 0; j < (int)secStructMask1.size(); ++j) { if (secStructMask1[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll)) { continue; } if (secStructMask1[j] == _gapPos2 && (_resetAlignAll)) { continue; } secStructMask1[sl] = secStructMask1[j]; ++sl; } } for(i = 1; i <= profile1NumSeqs; ++i) { sl = 0; for(j = 1; j <= getSeqLength(i); ++j) { if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll)) { continue; } if(seqArray[i][j] == _gapPos2 && (_resetAlignAll)) { continue; } ++sl; seqArray[i][sl] = seqArray[i][j]; } // Andreas Wilm (UCD): added 2008-03-07: // Remove excess bit at end of sequence int numExtraElements = seqArray[i].size() - 1 - sl; for(int k = 0; k < numExtraElements; k++) { seqArray[i].pop_back(); } } } // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged? void Alignment::resetProfile2() { register int sl; /* which have code = gap_pos2 */ int i, j; int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); bool _resetAlignNew = userParameters->getResetAlignmentsNew(); bool _resetAlignAll = userParameters->getResetAlignmentsAll(); int _profile1NumSeqs = profile1NumSeqs; if (userParameters->getStructPenalties2() != NONE) { sl = 0; for (j = 0; j < (int)gapPenaltyMask2.size(); ++j) { if (gapPenaltyMask2[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll)) { continue; } if (gapPenaltyMask2[j] == _gapPos2 && (_resetAlignAll)) { continue; } gapPenaltyMask2[sl] = gapPenaltyMask2[j]; ++sl; } } if (userParameters->getStructPenalties2() == SECST) { sl = 0; for (j = 0; j < (int)secStructMask2.size(); ++j) { if (secStructMask2[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll)) { continue; } if (secStructMask2[j] == _gapPos2 && (_resetAlignAll)) { continue; } secStructMask2[sl] = secStructMask2[j]; ++sl; } } for(i = _profile1NumSeqs + 1; i <= numSeqs; ++i) { sl = 0; for(j = 1; j <= getSeqLength(i); ++j) { if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll)) { continue; } if(seqArray[i][j] == _gapPos2 && (_resetAlignAll)) { continue; } ++sl; seqArray[i][sl] = seqArray[i][j]; } // Andreas Wilm (UCD) added 2008-03-07: // Remove excess bit at end of sequence int numExtraElements = seqArray[i].size() - 1 - sl; for(int k = 0; k < numExtraElements; k++) { seqArray[i].pop_back(); } } } void Alignment::resetAllSeqWeights() { seqWeight.clear(); seqWeight.resize(numSeqs + 1, 100); } /* * The function addOutputIndex must check that the number of outputindex * Not sure what to do with this one. */ bool Alignment::addOutputIndex(vector* outputIndexToAdd) { // First need to clear the old outputIndex outputIndex.clear(); // Check if the size is correct if((int)outputIndexToAdd->size() == numSeqs) { // Add the output index outputIndex = *outputIndexToAdd; return true; } else { clearAlignment(); return false; // Could not add them } } /* * The function appendOutputIndex is used when we are appending the outputIndex * to the current one. We do this when we are adding a profile2. */ bool Alignment::appendOutputIndex(vector* outputIndexToAppend) { // Check if the size is correct if((int)(outputIndexToAppend->size() + outputIndex.size()) == numSeqs) { // Append the outputIndex vector::iterator outIndexIter = outputIndexToAppend->begin(); while(outIndexIter != outputIndexToAppend->end()) { outputIndex.push_back(*outIndexIter); outIndexIter++; } if((int)outputIndex.size() == numSeqs) { return true; } else { clearAlignment(); cerr << "There is a problem with adding the sequences\n"; return false; } } else { clearAlignment(); return false; } } /* * The function addSequences takes a vector of sequences and adds it to the Alignment. * This is used if there is nothing in memory that we need. * */ void Alignment::addSequences(vector* seqVector) { clearAlignment(); numSeqs = seqVector->size(); vector emptyVec; // NOTE push dummy sequences on first! /******************************************************** * It was decided to stay with the seqs and seqArray * * index begining at 1. This is because it is difficult * * to change in the code, so we push dummy seqs on * ********************************************************/ seqArray.push_back(emptyVec); // EMPTY sequence names.push_back(string("")); titles.push_back(string("")); sequenceIds.push_back(0); addSequencesToVector(seqVector); calculateMaxLengths(); seqWeight.resize(numSeqs + 1, 100); } /* * The function appendSequences is used when we have a profile2. * */ void Alignment::appendSequences(vector* seqVector) { numSeqs += seqVector->size(); // Add to the number we already have! addSequencesToVector(seqVector); seqWeight.clear(); seqWeight.resize(numSeqs + 1, 100); calculateMaxLengths(); } void Alignment::pasteSequencesIntoPosition(vector* seqVector, int pos, bool explicitPasteToProfile2) { SeqArray::iterator seqArrayIterator; vector::iterator namesIterator; vector::iterator titlesIterator; vector::iterator sequenceIdsIterator; int profNum = userParameters->getProfileNum(); int numSeqsToInsert = seqVector->size(); if(numSeqsToInsert == 0 || pos < 0) { return; } if(pos == numSeqs) { seqArrayIterator = seqArray.end(); namesIterator = names.end(); titlesIterator = titles.end(); sequenceIdsIterator = sequenceIds.end(); } else { seqArrayIterator = seqArray.begin() + pos + 1; namesIterator = names.begin() + pos + 1; titlesIterator = titles.begin() + pos + 1; sequenceIdsIterator = sequenceIds.begin() + pos + 1; } int prof1NumSeqs = profile1NumSeqs; for(int i = numSeqsToInsert - 1; i >= 0; i--) { seqArray.insert(seqArrayIterator, *(*seqVector)[i].getSequence()); names.insert(namesIterator, (*seqVector)[i].getName()); titles.insert(titlesIterator, (*seqVector)[i].getTitle()); sequenceIds.insert(sequenceIdsIterator, (*seqVector)[i].getIdentifier()); numSeqs++; if(profNum != 0 && !explicitPasteToProfile2 && pos <= prof1NumSeqs) { prof1NumSeqs++; } } if(profNum != 0 && pos <= prof1NumSeqs) { profile1NumSeqs = prof1NumSeqs; } resetAllSeqWeights(); setDefaultOutputIndex(); } void Alignment::debugPrintAllNames() { vector::iterator nameIter = names.begin(); while(nameIter != names.end()) { cout << *nameIter << endl; nameIter++; } } void Alignment::NameIterator::begin(Alignment *a) { //cout << "begin()\n"; if (a) { alignment = a; i = alignment->names.begin(); } } const string Alignment::NameIterator::next() { //cout << "next()\n"; if (!alignment) return string(); if (i == alignment->names.end()) return string(); return *i++; } bool Alignment::NameIterator::end() { //cout << "end()\n"; if (!alignment) return true; if (i == alignment->names.end()) return true; return false; } // 26-03-03,nige: test before appending seqVector bool Alignment::testUniqueNames(vector* seqVector, string *offendingSeq) { vector::iterator oldName; vector::iterator newName; bool unique = true; //iterate over new candidate names for (newName = seqVector->begin(); unique && newName != seqVector->end(); newName++) { //iterate over old stored names for (oldName = names.begin() + 1; unique && oldName != names.end(); oldName++) { if (*oldName == newName->getName()) { offendingSeq->assign(*oldName); unique = false; } } } return unique; } /* * The function addSequencesToVector is used to add sequences to our seqVector * It is used by both addSequences and appendSequences. This is to reduce code * duplication. */ void Alignment::addSequencesToVector(vector* seqVector) { std::vector::iterator itSeq; for(itSeq = seqVector->begin(); itSeq != seqVector->end(); ++itSeq) { seqArray.push_back(*(*itSeq).getSequence()); names.push_back((*itSeq).getName()); titles.push_back((*itSeq).getTitle()); sequenceIds.push_back((*itSeq).getIdentifier()); } if(!(((int)seqArray.size() == numSeqs + 1) && ((int)names.size() == numSeqs + 1) && ((int)titles.size() == numSeqs + 1) && ((int)sequenceIds.size() == numSeqs + 1))) { cerr << "There has been an error adding the sequences to Alignment.\n" << "Must terminate the program. EaddSequencesrror occured in addSequences.\n"; exit(1); } } void Alignment::clearAlignment() { // Erase all the elements from the vector! clearSeqArray(); names.clear(); titles.clear(); outputIndex.clear(); sequenceIds.clear(); clearSecStruct1(); clearSecStruct2(); seqWeight.clear(); maxNames = 0; numSeqs = 0; maxAlignmentLength = 0; lengthLongestSequence = 0; userParameters->setProfileNum(0); userParameters->setProfile1Empty(true); userParameters->setProfile2Empty(true); } void Alignment::clearSeqArray() { for(int i = 0; i < (int)seqArray.size(); i++) { seqArray[i].clear(); } seqArray.clear(); } void Alignment::clearSecStruct1() { gapPenaltyMask1.clear(); secStructMask1.clear(); secStructName1 = ""; } void Alignment::clearSecStruct2() { gapPenaltyMask2.clear(); secStructMask2.clear(); secStructName2 = ""; } /* * The function alignScore is used to score the alignment! * This is used by other classes to see what score the alignment has gotten. */ int Alignment::alignScore(void) { int maxRes; int seq1, seq2, res1, res2; int ngaps; int i, len1, len2; int score; int _maxAA = userParameters->getMaxAA(); float _gapOpen = userParameters->getGapOpen(); int matrix[NUMRES][NUMRES]; // // calculate an overall score for the alignment by summing the // scores for each pairwise alignment // maxRes = subMatrix->getAlnScoreMatrix(matrix); if (maxRes == 0) { utilityObject->error("Matrix for alignment scoring not found\n"); return 0; } score = 0; for (seq1 = 1; seq1 <= numSeqs; seq1++) { for (seq2 = 1; seq2 < seq1; seq2++) { len1 = seqArray[seq1].size() - 1; len2 = seqArray[seq2].size() - 1; for (i = 1; i < len1 && i < len2; i++) { res1 = seqArray[seq1][i]; res2 = seqArray[seq2][i]; if ((res1 >= 0) && (res1 <= _maxAA) && (res2 >= 0) && (res2 <= _maxAA)) { score += matrix[res1][res2]; } } ngaps = countGaps(seq1, seq2, len1); score = static_cast(score - (100 * _gapOpen * ngaps)); // Mark change 17-5-07 } } score /= 100; utilityObject->info("Alignment Score %d\n", score); return score; } int Alignment::countGaps(int seq1, int seq2, int len) { int i, g; int q, r;//, *Q, *R; vector Q, R; Q.resize(len + 2, 0); R.resize(len + 2, 0); int _maxAA = userParameters->getMaxAA(); try { Q[0] = R[0] = g = 0; for (i = 1; i < len; i++) { if (seqArray[seq1][i] > _maxAA) { q = 1; } else { q = 0; } if (seqArray[seq2][i] > _maxAA) { r = 1; } else { r = 0; } // NOTE I havent a clue what this does!!!! if (((Q[i - 1] <= R[i - 1]) && (q != 0) && (1-r != 0)) || ((Q[i - 1] >= R[i - 1]) && (1-q != 0) && (r != 0))) { g += 1; } if (q != 0) { Q[i] = Q[i - 1] + 1; } else { Q[i] = 0; } if (r != 0) { R[i] = R[i - 1] + 1; } else { R[i] = 0; } } } catch(const exception &ex) { cerr << ex.what() << endl; cerr << "Terminating program. Cannot continue. Function = countGaps\n"; exit(1); } return (g); } void Alignment::resetAlign() { /* remove gaps from older alignments (code = gap_pos1) EXCEPT for gaps that were INPUT with the seqs. which have code = gap_pos2 */ register int sl; int i, j; int _gapPos1 = userParameters->getGapPos1(); int _gapPos2 = userParameters->getGapPos2(); bool _resetAlignNew = userParameters->getResetAlignmentsNew(); bool _resetAlignAll = userParameters->getResetAlignmentsAll(); for(i = 1; i <= numSeqs;++i) { sl = 0; for(j = 1; j <= getSeqLength(i); ++j) { if(seqArray[i][j] == _gapPos1 && ( _resetAlignNew || _resetAlignAll)) { continue; } if(seqArray[i][j] == _gapPos2 && (_resetAlignAll)) { continue; } ++sl; seqArray[i][sl] = seqArray[i][j]; } // Andreas Wilm (UCD) added 2008-03-07: // Remove excess bit at end of sequence int numExtraElements = seqArray[i].size() - 1 - sl; for(int k = 0; k < numExtraElements; k++) { seqArray[i].pop_back(); } } } void Alignment::fixGaps() { int i,j; if (userParameters->getStructPenalties1() != NONE) { for (j = 0; j < getSeqLength(1); ++j) { if (gapPenaltyMask1[j] == userParameters->getGapPos1()) { gapPenaltyMask1[j] = userParameters->getGapPos2(); } } } if (userParameters->getStructPenalties1() == SECST) { for (j = 0; j < getSeqLength(1); ++j) { if (secStructMask1[j] == userParameters->getGapPos1()) { secStructMask1[j] = userParameters->getGapPos2(); } } } for(i = 1; i <= numSeqs; ++i) { for(j = 1; j <= getSeqLength(i); ++j) { if(seqArray[i][j] == userParameters->getGapPos1()) { seqArray[i][j] = userParameters->getGapPos2(); } } } } float Alignment::countid(int s1, int s2) { int c1, c2; int i; int count, total; float score; int shorterSeqLength = (getSeqLength(s1) < getSeqLength(s2)) ? getSeqLength(s1) : getSeqLength(s2); count = total = 0; for (i = 1; i <= shorterSeqLength; i++) // NOTE june29 { c1 = seqArray[s1][i]; c2 = seqArray[s2][i]; if ((c1 >= 0) && (c1 < userParameters->getMaxAA())) { total++; if (c1 == c2) { count++; } } } if (total == 0) { score = 0; } else { score = 100.0 *(float)count / (float)total; } return (score); } void Alignment::debugPrintSequences() { cout << std::endl; for(int i = 0; i < (int)seqArray.size(); i++) { for(int j = 0; j < (int)seqArray[i].size(); j++) { if(seqArray[i][j] > 9) cout << " " << seqArray[i][j]; else cout << " " << seqArray[i][j]; } cout << std::endl; } } /* * Note the max_aln_length is now called maxAlignmentLength, and it will be stored * and calculated in this class. Mostly it is used for allocating arrays. But not always. */ void Alignment::calculateMaxLengths() { maxAlignmentLength = 0; lengthLongestSequence = 0; if(seqArray.size() > 0) { SeqArray::iterator seqArrayIter = seqArray.begin(); while(seqArrayIter != seqArray.end()) { // NOTE I needed to change this to >= for a bug I had!!!!!!! if((int)(*seqArrayIter).size() - 1 >= lengthLongestSequence) { lengthLongestSequence = (*seqArrayIter).size(); } ++seqArrayIter; } if(lengthLongestSequence > 0) { maxAlignmentLength = (lengthLongestSequence * 2) - 2; lengthLongestSequence -= 1; // MADE A CHANGE HERE AS WELL!! } else { lengthLongestSequence = 0; maxAlignmentLength = 0; } } else { maxAlignmentLength = 0; } maxNames = 0; if(names.size() > 0) { vector::iterator nameVecIter = names.begin(); while(nameVecIter != names.end()) { if((int)(*nameVecIter).size() > maxNames) { maxNames = (*nameVecIter).size(); } ++nameVecIter; } if(maxNames < 10) { maxNames = 10; } } else { maxNames = 0; } } /* * This function checks to see if all names are different. It returns true if they * are all different, and false if there are 2 the same. * currently quadratic - would be nice to speed up */ bool Alignment::checkAllNamesDifferent(string *offendingSeq) { bool different = true; // NOTE I added the + 1 here because, if we had a sequence with a name as blank // this would be the same as the first one! vector::iterator namesIter1 = names.begin() + 1; vector::iterator namesIter2; int counter1 = 1; int counter2 = 2; while(namesIter1 != names.end()) { namesIter2 = namesIter1 + 1; while(namesIter2 != names.end()) { if((*namesIter1).compare(*namesIter2) == 0) // If we have 2 strings the same. { different = false; /* 23-03-2007,nige: let someone up the stack deal with this - GUI is too deeply entangled. * utilityObject->error("Multiple sequences found with same name '%s' (first %d chars are significant)\n", namesIter1->c_str(), MAXNAMES); */ offendingSeq->assign((*namesIter1)); clearAlignment(); return different; // Not all different! } namesIter2++; counter2++; } namesIter1++; counter1++; counter2 = counter1 + 1; } return different; } void Alignment::addSecStructMask1(vector* secStructMaskToAdd) { secStructMask1 = *secStructMaskToAdd; } void Alignment::addSecStructMask2(vector* secStructMaskToAdd) { secStructMask2 = *secStructMaskToAdd; } void Alignment::addGapPenaltyMask1(vector* gapPenaltyMaskToAdd) { gapPenaltyMask1 = *gapPenaltyMaskToAdd; } void Alignment::addGapPenaltyMask2(vector* gapPenaltyMaskToAdd) { gapPenaltyMask2 = *gapPenaltyMaskToAdd; } void Alignment::addSecStructName1(string nameToAdd) { secStructName1 = nameToAdd; } void Alignment::addSecStructName2(string nameToAdd) { secStructName2 = nameToAdd; } void Alignment::addSeqWeight(vector* _seqWeight) { if(seqWeight.size() == _seqWeight->size()) { int size = seqWeight.size(); for(int i = 0; i < size; i++) { seqWeight[i] = (*_seqWeight)[i]; } } } void Alignment::printSequencesAddedInfo() { if(userParameters->getDisplayInfo()) { int startSeq = userParameters->getProfile2Empty() ? 1: profile1NumSeqs + 1; string dnaFlag = userParameters->getDNAFlag() ? "bp" : "aa"; for(int i = startSeq; i <= numSeqs; i++) { cout << "Sequence " << i << ": " << std::left << setw(maxNames) << names.at(i) << std::right << setw(6) << getSequenceLength(i) << " " << dnaFlag << std::endl; } } } void Alignment::debugPrintOutAlignInfo() { for(int i = 1; i <= numSeqs; i++) { cout << "seq-no=" << i << ": name=" << std::left << setw(maxNames) << names.at(i) << " length=" << std::right << setw(6) << getSequenceLength(i) << std::endl; } } int Alignment::getSequenceLength(int index) { return seqArray.at(index).size() - 1; } int Alignment::getLengthLongestSequence() { int _lengthLongestSequence = 0; for(int i = 1; i <= numSeqs; i++) { if(getSeqLength(i) > _lengthLongestSequence) { _lengthLongestSequence = getSeqLength(i); } } return _lengthLongestSequence; } /** * This function is used to find the length of the longest sequence in the range. */ int Alignment::getLengthLongestSequence(int firstSeq, int lastSeq) { int _lengthLongestSequence = 0; if(firstSeq >= 1 && lastSeq <= numSeqs) { for(int i = firstSeq; i <= lastSeq; i++) { if(getSeqLength(i) > _lengthLongestSequence) { _lengthLongestSequence = getSeqLength(i); } } } return _lengthLongestSequence; // Will return 0 if cant check seqs } int Alignment::getMaxNames() { return maxNames; } string Alignment::getSecStructName1() { return secStructName1; } string Alignment::getSecStructName2() { return secStructName2; } vector* Alignment::getGapPenaltyMask1() { return &gapPenaltyMask1; } vector* Alignment::getGapPenaltyMask2() { return &gapPenaltyMask2; } vector* Alignment::getSecStructMask1() { return &secStructMask1; } vector* Alignment::getSecStructMask2() { return &secStructMask2; } int Alignment::getSecStructMask1Element(int index) { if(index > 0 && index < (int)secStructMask1.size()) { return secStructMask1[index]; } else { throw VectorOutOfRange(string("secStructMask1"), index, secStructMask1.size() - 1); } } int Alignment::getSecStructMask2Element(int index) { if(index > 0 && index < (int)secStructMask2.size()) { return secStructMask2[index]; } else { throw VectorOutOfRange(string("secStructMask2"), index, secStructMask2.size() - 1); } } int Alignment::getGapPenaltyMask1Element(int index) { if(index > 0 && index < (int)gapPenaltyMask1.size()) { return gapPenaltyMask1[index]; } else { throw VectorOutOfRange(string("gapPenaltyMask1"), index, gapPenaltyMask1.size() - 1); } } int Alignment::getGapPenaltyMask2Element(int index) { if(index > 0 && index < (int)gapPenaltyMask2.size()) { return gapPenaltyMask2[index]; } else { throw VectorOutOfRange(string("gapPenaltyMask2"), index, gapPenaltyMask2.size() - 1); } } string Alignment::getName(int index) { if(index > 0 && index < (int)names.size()) { return names[index]; } else { throw VectorOutOfRange(string("names"), index, names.size() - 1); } } /** * Change: * Mark 25-1-2007: This function was added to allow access to the unique id. */ unsigned long Alignment::getUniqueId(int seq) { if(seq > 0 && seq < (int)sequenceIds.size()) { return sequenceIds[seq]; } else { throw VectorOutOfRange(string("sequenceIds"), seq, sequenceIds.size() - 1); } } string Alignment::getTitle(int index) { if(index > 0 && index < (int)titles.size()) { return titles[index]; } else { throw VectorOutOfRange(string("titles"), index, titles.size() - 1); } } int Alignment::getOutputIndex(int index) { if(index >= 0 && index < (int)outputIndex.size()) { return outputIndex[index]; } else { throw VectorOutOfRange(string("outputIndex"), index, outputIndex.size() - 1); } } int Alignment::getSeqWeight(int index) const { if(index >= 0 && index < (int)seqWeight.size()) { return seqWeight[index]; } else { throw VectorOutOfRange(string("seqWeight"), index, seqWeight.size() - 1); } } /** * This function is used by Qt. It is used to calculate the histogram values for the * widget in clustalQt. The function is in here because it needs access to the SubMatrix * class. * @param matNum The number of the matrix to be used in the comparison. * @return A pointer to a vector containing the histogram values. */ vector* Alignment::QTcalcHistColumnHeights(int firstSeq, int nSeqs, Array2D* exceptionalRes) { int n, i, s, p, r, r1; int numColumns = getLengthLongestSequence(); int scoreScale = userParameters->getQTScorePlotScale();//5; // From ClustalX. int scoreCutOff = userParameters->getQTResExceptionCutOff(); bool includeGaps = false; //short *mat_xref, *matptr; float median, mean; float t, q1, q3, ul; vector seqdist, sorteddist; float diff; vector seqvector; vector freq; vector > profile; int matrix[NUMRES][NUMRES]; int _maxAA = userParameters->getMaxAA(); int _gapPos1 = userParameters->getGapPos1(); histogramColumnHeights.resize(numColumns); //panel_data data1; subMatrix->getQTMatrixForHistogram(matrix); profile.resize(numColumns + 2, vector(_maxAA + 2)); freq.resize(_maxAA + 2); for(p = 0; p < numColumns; p++) { for(r = 0; r < _maxAA; r++) { freq[r] = 0; } for(s = firstSeq; s < firstSeq + nSeqs; s++) { if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 && seqArray[s + 1][p + 1] < _maxAA) { freq[seqArray[s + 1][p + 1]]++; } } for(r = 0; r < _maxAA; r++) { profile[p][r] = 0; for(r1 = 0; r1 < _maxAA; r1++) { profile[p][r] += freq[r1] * matrix[r1][r]; } profile[p][r] = static_cast(profile[p][r] / static_cast(nSeqs)); // Mark change 17-5-07 } } seqvector.resize(_maxAA + 2); seqdist.resize(nSeqs + 1); sorteddist.resize(nSeqs + 1); for(p = 0; p < numColumns; p++) { for(s = firstSeq; s < firstSeq + nSeqs; s++) { if (p < getSeqLength(s + 1)) { for (r = 0; r < _maxAA; r++) { seqvector[r] = matrix[r][seqArray[s + 1][p + 1]]; } } else { for (r = 0; r < _maxAA; r++) { seqvector[r] = matrix[r][_gapPos1]; } } seqdist[s - firstSeq] = 0.0; for(r = 0; r < _maxAA; r++) { diff = profile[p][r] - seqvector[r]; diff /= 1000.0; seqdist[s - firstSeq] += diff * diff; } seqdist[s - firstSeq] = sqrt((double)seqdist[s - firstSeq]); } // calculate mean,median and rms of seq distances mean = median = 0.0; if(includeGaps) { for(s = 0; s < nSeqs; s++) { mean += seqdist[s]; } mean /= nSeqs; n = nSeqs; for(s = 0; s < nSeqs; s++) { sorteddist[s] = seqdist[s]; } } else { n = 0; for(s = firstSeq; s < firstSeq + nSeqs; s++) { if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 && seqArray[s + 1][p + 1] < _maxAA) { mean += seqdist[s - firstSeq]; n++; } } if(n > 0) { mean /= n; } for(s = firstSeq, i = 0; s < firstSeq + nSeqs; s++) { if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 && seqArray[s + 1][p + 1] < _maxAA) { sorteddist[i++] = seqdist[s - firstSeq]; } } } sortScores(&sorteddist, 0, n - 1); if(n == 0) { median = 0; } else if(n % 2 == 0) { median = (sorteddist[n / 2 - 1] + sorteddist[n / 2]) / 2.0; } else { median = sorteddist[n / 2]; } if(scoreScale <= 5) { histogramColumnHeights[p] = static_cast(exp((double)(-mean * (6 - scoreScale) / 4.0)) * 100.0 * n / nSeqs); } else { histogramColumnHeights[p] = static_cast(exp((double)(-mean / (4.0 * (scoreScale - 4)))) * 100.0 * n / nSeqs); } if(n == 0) { ul = 0; } else { t = n/4.0 + 0.5; if(t - (int)t == 0.5) { q3 = (sorteddist[(int)t] + sorteddist[(int)t + 1]) / 2.0; q1 = (sorteddist[n-(int)t] + sorteddist[n - (int)t - 1]) / 2.0; } else if(t - (int)t > 0.5) { q3 = sorteddist[(int)t + 1]; q1 = sorteddist[n - (int)t - 1]; } else { q3 = sorteddist[(int)t]; q1 = sorteddist[n - (int)t]; } if (n < 4) { ul = sorteddist[0]; } else { ul = q3 + (q3 - q1) * ((float)scoreCutOff / 2.0); } } if((exceptionalRes->getRowSize() >= nSeqs) && exceptionalRes->getColSize() >= numColumns) { for(int s = firstSeq; s < firstSeq + nSeqs; s++) { if(seqdist[s - firstSeq] > ul && p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 && seqArray[s + 1][p + 1] < userParameters->getMaxAA()) { (*exceptionalRes)[s - firstSeq][p] = 1; } else { (*exceptionalRes)[s - firstSeq][p] = 0; } } } } return &histogramColumnHeights; } void Alignment::sortScores(vector* scores, int f, int l) { int i,last; if(f >= l) return; swap(scores, f, (f + l) / 2); last = f; for(i = f + 1; i <= l; i++) { if((*scores)[i] > (*scores)[f]) { swap(scores, ++last, i); } } swap(scores, f, last); sortScores(scores, f, last - 1); sortScores(scores, last + 1, l); } void Alignment::swap(vector* scores, int s1, int s2) { float temp; temp = (*scores)[s1]; (*scores)[s1] = (*scores)[s2]; (*scores)[s2] = temp; } int Alignment::searchForString(bool* found, int seq, int beginRes, string search) { int lengthSeq = getSeqLength(seq); if(beginRes > lengthSeq) { *found = false; return beginRes; } int res = beginRes; // First need to convert search into a vector of ints! vector codedSearch; int size = search.size(); codedSearch.resize(size); int code; for(int i = 0; i < size; i++) { code = userParameters->resIndex(userParameters->getAminoAcidCodes(), search[i]); codedSearch[i] = code; } int numSame = 0; int startPos = -1; int searchSize = codedSearch.size(); // Now check for the string of ints!!!! for(; res <= lengthSeq; res++) //- nige: res starts at 1 { if(seqArray[seq][res] == codedSearch[0]) { startPos = res; //- nige for(int i = 0; i < searchSize && (i + res) <= lengthSeq; i++) //- nige: res starts at 1 { if(seqArray[seq][res + i] == codedSearch[i]) { numSame++; } //nige: hack: encoded gap character: see also AlignmentScroll.cpp else if (seqArray[seq][res + i] == 31 || seqArray[seq][res + i] == 30) { res++; i--; } else { break; // Not the same } } if(numSame == searchSize) { *found = true; return startPos; } else { numSame = 0; } } } *found = false; return startPos; // Not found!!!!!!! } void Alignment::removeGapsFromSelectedSeqs(vector* selected) { //getNumSeqs() int size = selected->size(); int lengthOfSelectedSeq = 0; int gapPos1 = userParameters->getGapPos1(); int gapPos2 = userParameters->getGapPos2(); int s1; for(int i = 1; i <= getNumSeqs() && i < size; i++) { if((*selected)[i] == 1) { // remove gaps from this seq! lengthOfSelectedSeq = getSeqLength(i); s1 = 0; for(int j = 1; j <= lengthOfSelectedSeq; j++) { if(seqArray[i][j] == gapPos1 || seqArray[i][j] == gapPos2) { continue; } ++s1; seqArray[i][s1] = seqArray[i][j]; } // Then remove the excess bit at the end of the array int numExtraElements = lengthOfSelectedSeq - s1; if((int)seqArray[i].size() > numExtraElements) { for(int k = 0; k < numExtraElements; k++) { seqArray[i].pop_back(); } } } } } void Alignment::removeGapOnlyColsFromSelectedSeqs(vector* selected) { int numGaps = 0; int NoneSelected = -1; int numColumns = 0; int sizeSelected = selected->size(); int firstSeqSelected = NoneSelected; int gapPos1 = userParameters->getGapPos1(); int gapPos2 = userParameters->getGapPos2(); int k; for(int i = 1; i < sizeSelected; i++) { if((*selected)[i] == 1) { numColumns++; if(firstSeqSelected == -1) { firstSeqSelected = i; } } } if(firstSeqSelected == NoneSelected) { cout << "No Sequences have been selected\n"; return; } for(int i = 1; i <= getSeqLength(firstSeqSelected);) { numGaps = 0; for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++) { if(getSeqLength(j) >= i) { if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2) { numGaps++; } } } if(numGaps == numColumns) { //cout << " removing a gap column\n\n"; for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++) { for(k = i + 1; k <= getSeqLength(j) + 1 && (int)seqArray[j].size() > k; k++) { seqArray[j][k - 1] = seqArray[j][k]; } seqArray[j].pop_back(); // Remove the last element!! if(getSeqLength(firstSeqSelected) <= 0) { break; } } } else { i++; } } } void Alignment::removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum) { if(fSeq >= lSeq) { return; } int gapPos1 = userParameters->getGapPos1(); int gapPos2 = userParameters->getGapPos2(); int numGaps = 0; int numColumns = lSeq - fSeq + 1; int k; // We must cheack each column to see if it consists of only '-' for(int i = 1; i <= getSeqLength(fSeq);) { numGaps = 0; for(int j = fSeq; j <= lSeq; j++) { if(getSeqLength(j) >= i) { if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2) { numGaps++; } } } if(numGaps == numColumns) { for(int j = fSeq; j <= lSeq; j++) { for(k = i + 1; k <= getSeqLength(j) + 1; k++) { seqArray[j][k - 1] = seqArray[j][k]; } seqArray[j].pop_back(); // Remove the last element!! if(profileNum == 1) { int lengthSecStruct = secStructMask1.size(); int lengthGapMask = gapPenaltyMask1.size(); for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++) { secStructMask1[k - 1] = secStructMask1[k]; } for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++) { gapPenaltyMask1[k - 1] = gapPenaltyMask1[k]; } } if(profileNum == 2) { int lengthSecStruct = secStructMask2.size(); int lengthGapMask = gapPenaltyMask2.size(); for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++) { secStructMask2[k - 1] = secStructMask2[k]; } for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++) { gapPenaltyMask2[k - 1] = gapPenaltyMask2[k]; } } if(getSeqLength(fSeq) <= 0) { break; } } } else { i++; } } } vector Alignment::cutSelectedSequencesFromAlignment(vector* selected) { vector cutSequences; int sizeOfSelected = selected->size(); SeqArray::iterator seqArrayIterator; vector::iterator namesIterator; vector::iterator titlesIterator; vector::iterator seqWeightIterator; vector::iterator sequenceIdsIterator; int newProfile1NumSeqs = profile1NumSeqs; int profNum = userParameters->getProfileNum(); int prof1NumSeqs = profile1NumSeqs; int numCutSoFar = 0; int intialNumSeqs = numSeqs; for(int i = 1; i < sizeOfSelected && i <= intialNumSeqs; i++) { if((*selected)[i] == 1) { // Cut the sequence from the alignment! seqArrayIterator = seqArray.begin() + i - numCutSoFar; namesIterator = names.begin() + i - numCutSoFar; titlesIterator = titles.begin() + i - numCutSoFar; seqWeightIterator = seqWeight.begin() + i - numCutSoFar; sequenceIdsIterator = sequenceIds.begin() + i - numCutSoFar; Sequence SeqToCut(&seqArray[i - numCutSoFar], *namesIterator, *titlesIterator, *sequenceIdsIterator); numCutSoFar++; seqArrayIterator->clear(); seqArray.erase(seqArrayIterator); names.erase(namesIterator); titles.erase(titlesIterator); seqWeight.erase(seqWeightIterator); sequenceIds.erase(sequenceIdsIterator); if(numSeqs > 0) { numSeqs--; } else { numSeqs = 0; break; } if(profNum > 0 && i <= prof1NumSeqs) { if(newProfile1NumSeqs > 0) { newProfile1NumSeqs--; } else { newProfile1NumSeqs = 0; } } cutSequences.push_back(SeqToCut); } } profile1NumSeqs = newProfile1NumSeqs; setDefaultOutputIndex(); resetAllSeqWeights(); return cutSequences; } void Alignment::setDefaultOutputIndex() { outputIndex.clear(); outputIndex.resize(numSeqs); for(int iseq = 1; iseq <= numSeqs; iseq++) { outputIndex[iseq - 1] = iseq; } } bool Alignment::removeAllOutsideRange(int beginPos, int endPos) { bool ok; if(beginPos < 0 || endPos > getLengthLongestSequence()) { return false; // cannot do it!!!! } // trim the seqArray ok = keepPortionOfSeqArray(beginPos, endPos); if(!ok) { cerr << "There was a problem removing a portion of the array\n"; return false; } // recalculate the maxLengths calculateMaxLengths(); // Clear the histogram columns histogramColumnHeights.clear(); // reset the weights resetAllSeqWeights(); return true; } bool Alignment::keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex) { SeqArray sectionToRealign; vector emptyVec; sectionToRealign.push_back(emptyVec); // EMPTY sequence SeqArray::iterator posToAddTo = sectionToRealign.begin(); // erase from all sequences the range specified here!!!!! if(beginRangeIndex < 0 || endRangeIndex < 0) { return false; } SeqArray::iterator mainBeginIt = seqArray.begin() + 1; SeqArray::iterator mainEndIt = seqArray.end(); vector::iterator begin, end, beginRange, endRange, beginCopyRange, endCopyRange; for(; mainBeginIt != mainEndIt; mainBeginIt++) { vector vecToAdd; begin = mainBeginIt->begin() + 1; end = mainBeginIt->end(); beginRange = begin + beginRangeIndex; endRange = begin + endRangeIndex + 1; beginCopyRange = beginRange; endCopyRange = endRange; // We need to copy all of this into another vector. if(endCopyRange < end && beginCopyRange < end) { vecToAdd.push_back(0); for(; beginCopyRange != endCopyRange; beginCopyRange++) { vecToAdd.push_back(*beginCopyRange); } sectionToRealign.push_back(vecToAdd); } else { return false; } if(endRange < end && beginRange < end) { mainBeginIt->erase(beginRange, endRange); } else { return false; } } clearSeqArray(); seqArray = sectionToRealign; return true; } void Alignment::debugPrintSeqArray(SeqArray* arrayToPrint) { // I need to use iterators for everything here. SeqArray::iterator mainBeginIt = arrayToPrint->begin(); SeqArray::iterator mainEndIt = arrayToPrint->end(); vector::iterator begin, end; string aaCodes = userParameters->getAminoAcidCodes(); for(; mainBeginIt != mainEndIt; mainBeginIt++) { if(mainBeginIt->size() > 0) { begin = mainBeginIt->begin() + 1; end = mainBeginIt->end(); for(; begin != end; begin++) { if(*begin < (int)aaCodes.size()) { cout << aaCodes[*begin]; } else { cout << "-"; } } cout << "\n"; } } } void Alignment::debugPrintProfile1() { cout << "************** PROFILE1 *********************\n"; SeqArray::iterator mainBeginIt = seqArray.begin() + 1; SeqArray::iterator mainEndIt = mainBeginIt + profile1NumSeqs; vector::iterator begin, end; string aaCodes = userParameters->getAminoAcidCodes(); for(; mainBeginIt != mainEndIt; mainBeginIt++) { cout << "PROFILE1 SEQ: "; if(mainBeginIt->size() > 0) { begin = mainBeginIt->begin() + 1; end = mainBeginIt->end(); for(; begin != end; begin++) { if(*begin < (int)aaCodes.size()) { cout << aaCodes[*begin]; } else { cout << "-"; } } cout << "\n"; } } } void Alignment::debugPrintProfile2() { cout << "************** PROFILE2 *********************\n"; SeqArray::iterator mainBeginIt = seqArray.begin() + 1 + profile1NumSeqs; SeqArray::iterator mainEndIt = seqArray.end(); vector::iterator begin, end; string aaCodes = userParameters->getAminoAcidCodes(); for(; mainBeginIt != mainEndIt; mainBeginIt++) { cout << "PROFILE2 SEQ: "; if(mainBeginIt->size() > 0) { begin = mainBeginIt->begin() + 1; end = mainBeginIt->end(); for(; begin != end; begin++) { if(*begin < (int)aaCodes.size()) { cout << aaCodes[*begin]; } else { cout << "-"; } } cout << "\n"; } } } bool Alignment::updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos) { if(realignedSeqs.size() != seqArray.size()) { return false; } if(beginPos < 0 || endPos < 0) { return false; } // erase from all sequences the range specified here!!!!! SeqArray::iterator mainBeginIt = seqArray.begin() + 1; SeqArray::iterator mainEndIt = seqArray.end(); SeqArray::iterator pasteBeginIt = realignedSeqs.begin() + 1; SeqArray::iterator pasteEndIt = realignedSeqs.end(); vector::iterator begin, end, beginRange, endRange; for(; mainBeginIt != mainEndIt && pasteBeginIt != pasteEndIt; mainBeginIt++) { vector vecToAdd; begin = mainBeginIt->begin() + 1; end = mainBeginIt->end(); beginRange = begin + beginPos; endRange = begin + endPos + 1; if(endRange < end && beginRange < end) { mainBeginIt->erase(beginRange, endRange); mainBeginIt->insert(beginRange, pasteBeginIt->begin() + 1, pasteBeginIt->end()); } else { return false; } pasteBeginIt++; } return true; } bool Alignment::reloadAlignment() { if(getNumSeqs() <= 0) { return false; } if(userParameters->getOutputOrder() == INPUT) { return true; } if((int)outputIndex.size() != getNumSeqs()) { outputIndex.clear(); return false; } vector emptyVec; string emptyString = ""; SeqArray outputOrderSeqArray; outputOrderSeqArray.resize(getNumSeqs() + 1); outputOrderSeqArray[0] = emptyVec; vector outputOrderNames; outputOrderNames.resize(getNumSeqs() + 1); outputOrderNames[0] = emptyString; vector outputOrderTitles; outputOrderTitles.resize(getNumSeqs() + 1); outputOrderTitles[0] = emptyString; vector outputOrderSequenceIds; outputOrderSequenceIds.resize(getNumSeqs() + 1); outputOrderSequenceIds[0] = 0; int size = seqArray.size(); if((seqArray.size() != names.size()) || (seqArray.size() != titles.size()) || sequenceIds.size() != names.size()) { return false; } int _outIndex; // Now for each seq, for(int i = 1; i < size; i++) { if(i < (int)outputOrderSeqArray.size() && i - 1 < (int)outputIndex.size() && outputIndex[i - 1] < size) { _outIndex = outputIndex[i - 1]; outputOrderSeqArray[i] = seqArray[_outIndex]; outputOrderNames[i] = names[_outIndex]; outputOrderTitles[i] = titles[_outIndex]; outputOrderSequenceIds[i] = sequenceIds[_outIndex]; } else { return false; } } // Now we have a copy in the correct order. // Remove all the elements from the old ones and set them to be these arrays clearSeqArray(); seqArray = outputOrderSeqArray; names.clear(); names = outputOrderNames; titles.clear(); titles = outputOrderTitles; sequenceIds.clear(); sequenceIds = outputOrderSequenceIds; return true; } const vector* Alignment::getSequenceFromUniqueId(unsigned long id) { for(int i = 0; i < (int)sequenceIds.size(); i++) { if(sequenceIds[i] == id) { return getSequence(i); } } // We have not found it, throw an exception!!! throw SequenceNotFoundException(); } /** * Change: * Mark 26-1-2007: This function was added to allow access to the unique id. */ void Alignment::updateSequence(int index, const vector* seq) { if(index >= 1 && index < (int)seqArray.size()) { seqArray[index] = *seq; } } /** * Change: * Mark 17-2-2007: This function was added to check if a res is a gap or not. */ bool Alignment::isGap(int seq, int col) const { int res = seqArray[seq][col]; if(res == gapPos1 || res == gapPos2) { return true; } else { return false; } } /** * This function will be used so that we can create an alignment object from a seqArray. * This will be used for the tree iteration. */ void Alignment::addSequences(SeqArray* seqVector) { clearAlignment(); numSeqs = seqVector->size() - 1; vector emptyVec; seqArray.push_back(emptyVec); // EMPTY sequence names.push_back(string("")); titles.push_back(string("")); sequenceIds.push_back(0); cout << "\nThere are " << numSeqs << " in the alignment obj\n"; for(int i = 1; i <= numSeqs; i++) { ostringstream name; seqArray.push_back((*seqVector)[i]); titles.push_back(string("")); sequenceIds.push_back(utilityObject->getUniqueSequenceIdentifier()); name << "name" << numSeqs; names.push_back(name.str()); } calculateMaxLengths(); seqWeight.resize(numSeqs + 1, 100); } } clustalx-2.1/clustalW/alignment/0000755000175000017500000000000011470725216016572 5ustar ddineenddineenclustalx-2.1/clustalW/Help.h0000644000175000017500000000135311470725216015657 0ustar ddineenddineen/** * Author: Andreas Wilm * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * This is the clustalw help class which replaces the old help file * */ #ifndef HELP_H #define HELP_H #include #include using namespace std; typedef struct { string marker; string title; string content; } section; class Help { public: /* Functions */ Help(); ~Help(); string GetSection(string marker); string GetSection(char marker); string GetSectionTitle(string marker); string GetSectionTitle(char marker); vector ListSectionMarkers(); /* Attributes */ private: /* Functions */ /* Attributes */ vector
sections; }; #endif clustalx-2.1/clustalW/Help.cpp0000644000175000017500000011437711470725216016225 0ustar ddineenddineen/** * Author: Andreas Wilm * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * 2007-12-12: Andreas Wilm (UCD): initial implementation * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include "clustalw_version.h" #include "Help.h" using namespace std; Help::Help() { section s; string version = CLUSTALW_VERSION; // all sections were added in order of appearance in original // clustalw_help // To add new entries use the template below. // Paste content as text, // escape all " with \", // add " at the beginning of the line, // and \n" at the end of the line // template: // s.marker = ""; // s.title = ""; // s.content = ""; // sections.push_back(s); s.marker = "NEW"; s.title = "NEW FEATURES/OPTIONS"; s.content = "==UPGMA==" " \n" " The UPGMA algorithm has been added to allow faster tree construction. The user now\n" " has the choice of using Neighbour Joining or UPGMA. The default is still NJ, but the\n" " user can change this by setting the clustering parameter.\n" " \n" " -CLUSTERING= :NJ or UPGMA\n" " \n" "==ITERATION==\n" "\n" " A remove first iteration scheme has been added. This can be used to improve the final\n" " alignment or improve the alignment at each stage of the progressive alignment. During the \n" " iteration step each sequence is removed in turn and realigned. If the resulting alignment \n" " is better than the previous alignment it is kept. This process is repeated until the score\n" " converges (the score is not improved) or until the maximum number of iterations is \n" " reached. The user can iterate at each step of the progressive alignment by setting the \n" " iteration parameter to TREE or just on the final alignment by seting the iteration \n" " parameter to ALIGNMENT. The default is no iteration. The maximum number of iterations can \n" " be set using the numiter parameter. The default number of iterations is 3.\n" " \n" " -ITERATION= :NONE or TREE or ALIGNMENT\n" " \n" " -NUMITER=n :Maximum number of iterations to perform\n" " \n" "==HELP==\n" " \n" " -FULLHELP :Print out the complete help content\n" " \n" "==MISC==\n" "\n" " -MAXSEQLEN=n :Maximum allowed sequence length\n" " \n" " -QUIET :Reduce console output to minimum\n" " \n" " -STATS=file :Log some alignents statistics to file\n" ""; sections.push_back(s); s.marker = "1"; s.title = "General help for CLUSTAL W (" + version + ")"; s.content = "Clustal W is a general purpose multiple alignment program for DNA or proteins.\n" "\n" "SEQUENCE INPUT: all sequences must be in 1 file, one after another. \n" "7 formats are automatically recognised: NBRF-PIR, EMBL-SWISSPROT, \n" "Pearson (Fasta), Clustal (*.aln), GCG-MSF (Pileup), GCG9-RSF and GDE flat file.\n" "All non-alphabetic characters (spaces, digits, punctuation marks) are ignored\n" "except \"-\" which is used to indicate a GAP (\".\" in MSF-RSF). \n" "\n" "To do a MULTIPLE ALIGNMENT on a set of sequences, use item 1 from this menu to \n" "INPUT them; go to menu item 2 to do the multiple alignment.\n" "\n" "PROFILE ALIGNMENTS (menu item 3) are used to align 2 alignments. Use this to\n" "add a new sequence to an old alignment, or to use secondary structure to guide \n" "the alignment process. GAPS in the old alignments are indicated using the \"-\" \n" "character. PROFILES can be input in ANY of the allowed formats; just \n" "use \"-\" (or \".\" for MSF-RSF) for each gap position.\n" "\n" "PHYLOGENETIC TREES (menu item 4) can be calculated from old alignments (read in\n" "with \"-\" characters to indicate gaps) OR after a multiple alignment while the \n" "alignment is still in memory.\n" "\n" "\n" "The program tries to automatically recognise the different file formats used\n" "and to guess whether the sequences are amino acid or nucleotide. This is not\n" "always foolproof.\n" "\n" "FASTA and NBRF-PIR formats are recognised by having a \">\" as the first \n" "character in the file. \n" "\n" "EMBL-Swiss Prot formats are recognised by the letters\n" "ID at the start of the file (the token for the entry name field). \n" "\n" "CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.\n" "\n" "GCG-MSF format is recognised by one of the following:\n" " - the word PileUp at the start of the file. \n" " - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT\n" " at the start of the file.\n" " - the word MSF on the first line of the line, and the characters ..\n" " at the end of this line.\n" "\n" "GCG-RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of\n" "the file.\n" "\n" "\n" "If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the\n" "sequence will be assumed to be nucleotide. This works in 97.3% of cases\n" "but watch out!\n" ""; sections.push_back(s); s.marker = "2"; s.title = "Help for multiple alignments"; s.content = "If you have already loaded sequences, use menu item 1 to do the complete\n" "multiple alignment. You will be prompted for 2 output files: 1 for the \n" "alignment itself; another to store a dendrogram that describes the similarity\n" "of the sequences to each other.\n" "\n" "Multiple alignments are carried out in 3 stages (automatically done from menu\n" "item 1 ...Do complete multiple alignments now):\n" "\n" "1) all sequences are compared to each other (pairwise alignments);\n" "\n" "2) a dendrogram (like a phylogenetic tree) is constructed, describing the\n" "approximate groupings of the sequences by similarity (stored in a file).\n" "\n" "3) the final multiple alignment is carried out, using the dendrogram as a guide.\n" "\n" "\n" "PAIRWISE ALIGNMENT parameters control the speed-sensitivity of the initial\n" "alignments.\n" "\n" "MULTIPLE ALIGNMENT parameters control the gaps in the final multiple alignments.\n" "\n" "\n" "RESET GAPS (menu item 7) will remove any new gaps introduced into the sequences\n" "during multiple alignment if you wish to change the parameters and try again.\n" "This only takes effect just before you do a second multiple alignment. You\n" "can make phylogenetic trees after alignment whether or not this is ON.\n" "If you turn this OFF, the new gaps are kept even if you do a second multiple\n" "alignment. This allows you to iterate the alignment gradually. Sometimes, the \n" "alignment is improved by a second or third pass.\n" "\n" "SCREEN DISPLAY (menu item 8) can be used to send the output alignments to the \n" "screen as well as to the output file.\n" "\n" "You can skip the first stages (pairwise alignments; dendrogram) by using an\n" "old dendrogram file (menu item 3); or you can just produce the dendrogram\n" "with no final multiple alignment (menu item 2).\n" "\n" "\n" "OUTPUT FORMAT: Menu item 9 (format options) allows you to choose from 6 \n" "different alignment formats (CLUSTAL, GCG, NBRF-PIR, PHYLIP, GDE, NEXUS, and FASTA). \n" "\n" ""; sections.push_back(s); s.marker = "3"; s.title = "Help for pairwise alignment parameters"; s.content = "A distance is calculated between every pair of sequences and these are used to\n" "construct the dendrogram which guides the final multiple alignment. The scores\n" "are calculated from separate pairwise alignments. These can be calculated using\n" "2 methods: dynamic programming (slow but accurate) or by the method of Wilbur\n" "and Lipman (extremely fast but approximate). \n" "\n" "You can choose between the 2 alignment methods using menu option 8. The\n" "slow-accurate method is fine for short sequences but will be VERY SLOW for \n" "many (e.g. >100) long (e.g. >1000 residue) sequences. \n" "\n" "SLOW-ACCURATE alignment parameters:\n" " These parameters do not have any affect on the speed of the alignments. \n" "They are used to give initial alignments which are then rescored to give percent\n" "identity scores. These % scores are the ones which are displayed on the \n" "screen. The scores are converted to distances for the trees.\n" "\n" "1) Gap Open Penalty: the penalty for opening a gap in the alignment.\n" "2) Gap extension penalty: the penalty for extending a gap by 1 residue.\n" "3) Protein weight matrix: the scoring table which describes the similarity\n" " of each amino acid to each other.\n" "4) DNA weight matrix: the scores assigned to matches and mismatches \n" " (including IUB ambiguity codes).\n" "\n" "\n" "FAST-APPROXIMATE alignment parameters:\n" "\n" "These similarity scores are calculated from fast, approximate, global align-\n" "ments, which are controlled by 4 parameters. 2 techniques are used to make\n" "these alignments very fast: 1) only exactly matching fragments (k-tuples) are\n" "considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)\n" "are used.\n" "\n" "K-TUPLE SIZE: This is the size of exactly matching fragment that is used. \n" "INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.\n" "For longer sequences (e.g. >1000 residues) you may need to increase the default.\n" "\n" "GAP PENALTY: This is a penalty for each gap in the fast alignments. It has\n" "little affect on the speed or sensitivity except for extreme values.\n" "\n" "TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary\n" "dot-matrix plot) is calculated. Only the best ones (with most matches) are\n" "used in the alignment. This parameter specifies how many. Decrease for speed;\n" "increase for sensitivity.\n" "\n" "WINDOW SIZE: This is the number of diagonals around each of the 'best' \n" "diagonals that will be used. Decrease for speed; increase for sensitivity.\n" ""; sections.push_back(s); s.marker = "4"; s.title = "Help for multiple alignment parameters"; s.content = "These parameters control the final multiple alignment. This is the core of the\n" "program and the details are complicated. To fully understand the use of the\n" "parameters and the scoring system, you will have to refer to the documentation.\n" "\n" "Each step in the final multiple alignment consists of aligning two alignments \n" "or sequences. This is done progressively, following the branching order in \n" "the GUIDE TREE. The basic parameters to control this are two gap penalties and\n" "the scores for various identical-non-indentical residues. \n" "\n" "1) and 2) The GAP PENALTIES are set by menu items 1 and 2. These control the \n" "cost of opening up every new gap and the cost of every item in a gap. \n" "Increasing the gap opening penalty will make gaps less frequent. Increasing \n" "the gap extension penalty will make gaps shorter. Terminal gaps are not \n" "penalised.\n" "\n" "3) The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most\n" "distantly related sequences until after the most closely related sequences have \n" "been aligned. The setting shows the percent identity level required to delay\n" "the addition of a sequence; sequences that are less identical than this level\n" "to any other sequences will be aligned later.\n" "\n" "\n" "\n" "4) The TRANSITION WEIGHT gives transitions (A <--> G or C <--> T \n" "i.e. purine-purine or pyrimidine-pyrimidine substitutions) a weight between 0\n" "and 1; a weight of zero means that the transitions are scored as mismatches,\n" "while a weight of 1 gives the transitions the match score. For distantly related\n" "DNA sequences, the weight should be near to zero; for closely related sequences\n" "it can be useful to assign a higher score.\n" "\n" "\n" "5) PROTEIN WEIGHT MATRIX leads to a new menu where you are offered a choice of\n" "weight matrices. The default for proteins in version 1.8 is the PAM series \n" "derived by Gonnet and colleagues. Note, a series is used! The actual matrix\n" "that is used depends on how similar the sequences to be aligned at this \n" "alignment step are. Different matrices work differently at each evolutionary\n" "distance. \n" "\n" "6) DNA WEIGHT MATRIX leads to a new menu where a single matrix (not a series)\n" "can be selected. The default is the matrix used by BESTFIT for comparison of\n" "nucleic acid sequences.\n" "\n" "Further help is offered in the weight matrix menu.\n" "\n" "\n" "7) In the weight matrices, you can use negative as well as positive values if\n" "you wish, although the matrix will be automatically adjusted to all positive\n" "scores, unless the NEGATIVE MATRIX option is selected.\n" "\n" "8) PROTEIN GAP PARAMETERS displays a menu allowing you to set some Gap Penalty\n" "options which are only used in protein alignments.\n" ""; sections.push_back(s); s.marker = "A"; s.title = "Help for protein gap parameters."; s.content = "1) RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce\n" "or increase the gap opening penalties at each position in the alignment or\n" "sequence. See the documentation for details. As an example, positions that \n" "are rich in glycine are more likely to have an adjacent gap than positions that\n" "are rich in valine.\n" "\n" "2) 3) HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within\n" "a run (5 or more residues) of hydrophilic amino acids; these are likely to\n" "be loop or random coil regions where gaps are more common. The residues that \n" "are \"considered\" to be hydrophilic are set by menu item 3.\n" "\n" "4) GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too\n" "close to each other. Gaps that are less than this distance apart are penalised\n" "more than other gaps. This does not prevent close gaps; it makes them less\n" "frequent, promoting a block-like appearance of the alignment.\n" "\n" "5) END GAP SEPARATION treats end gaps just like internal gaps for the purposes\n" "of avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above).\n" "If you turn this off, end gaps will be ignored for this purpose. This is\n" "useful when you wish to align fragments where the end gaps are not biologically\n" "meaningful.\n" ""; sections.push_back(s); s.marker = "5"; s.title = "Help for output format options."; s.content = "Several output formats are offered. You can choose any (or all 6 if you wish). \n" "\n" "CLUSTAL format output is a self explanatory alignment format. It shows the\n" "sequences aligned in blocks. It can be read in again at a later date to\n" "(for example) calculate a phylogenetic tree or add a new sequence with a \n" "profile alignment.\n" "\n" "GCG output can be used by any of the GCG programs that can work on multiple\n" "alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG\n" ".msf format files (multiple sequence file); new in version 7 of GCG.\n" "\n" "Fasta output cis widely used because of it's simplicity. Each sequence name is\n" "preceeded by a '>'-sign. The sequence itself is printed out in the following lines\n" "\n" "PHYLIP format output can be used for input to the PHYLIP package of Joe \n" "Felsenstein. This is an extremely widely used package for doing every \n" "imaginable form of phylogenetic analysis (MUCH more than the the modest intro-\n" "duction offered by this program).\n" "\n" "NBRF-PIR: this is the same as the standard PIR format with ONE ADDITION. Gap\n" "characters \"-\" are used to indicate the positions of gaps in the multiple \n" "alignment. These files can be re-used as input in any part of clustal that\n" "allows sequences (or alignments or profiles) to be read in. \n" "\n" "GDE: this is the flat file format used by the GDE package of Steven Smith.\n" "\n" "NEXUS: the format used by several phylogeny programs, including PAUP and\n" "MacClade.\n" "\n" "GDE OUTPUT CASE: sequences in GDE format may be written in either upper or\n" "lower case.\n" "\n" "CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the\n" "alignment lines in clustalw format.\n" "\n" "OUTPUT ORDER is used to control the order of the sequences in the output\n" "alignments. By default, the order corresponds to the order in which the\n" "sequences were aligned (from the guide tree-dendrogram), thus automatically\n" "grouping closely related sequences. This switch can be used to set the order\n" "to the same as the input file.\n" "\n" "PARAMETER OUTPUT: This option allows you to save all your parameter settings\n" "in a parameter file. This file can be used subsequently to rerun Clustal W\n" "using the same parameters.\n" ""; sections.push_back(s); s.marker = "6"; s.title = "Help for profile and structure alignments"; s.content = "By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile \n" "alignments allow you to store alignments of your favourite sequences and add\n" "new sequences to them in small bunches at a time. A profile is simply an\n" "alignment of one or more sequences (e.g. an alignment output file from CLUSTAL\n" "W). Each input can be a single sequence. One or both sets of input sequences\n" "may include secondary structure assignments or gap penalty masks to guide the\n" "alignment. \n" "\n" "The profiles can be in any of the allowed input formats with \"-\" characters\n" "used to specify gaps (except for MSF-RSF where \".\" is used).\n" "\n" "You have to specify the 2 profiles by choosing menu items 1 and 2 and giving\n" "2 file names. Then Menu item 3 will align the 2 profiles to each other. \n" "Secondary structure masks in either profile can be used to guide the alignment.\n" "\n" "Menu item 4 will take the sequences in the second profile and align them to\n" "the first profile, 1 at a time. This is useful to add some new sequences to\n" "an existing alignment, or to align a set of sequences to a known structure. \n" "In this case, the second profile would not be pre-aligned.\n" "\n" "\n" "The alignment parameters can be set using menu items 5, 6 and 7. These are\n" "EXACTLY the same parameters as used by the general, automatic multiple\n" "alignment procedure. The general multiple alignment procedure is simply a\n" "series of profile alignments. Carrying out a series of profile alignments on\n" "larger and larger groups of sequences, allows you to manually build up a\n" "complete alignment, if necessary editing intermediate alignments.\n" "\n" "SECONDARY STRUCTURE OPTIONS. Menu Option 0 allows you to set 2D structure\n" "parameters. If a solved structure is available, it can be used to guide the \n" "alignment by raising gap penalties within secondary structure elements, so \n" "that gaps will preferentially be inserted into unstructured surface loops.\n" "Alternatively, a user-specified gap penalty mask can be supplied directly.\n" "\n" "A gap penalty mask is a series of numbers between 1 and 9, one per position in \n" "the alignment. Each number specifies how much the gap opening penalty is to be \n" "raised at that position (raised by multiplying the basic gap opening penalty\n" "by the number) i.e. a mask figure of 1 at a position means no change\n" "in gap opening penalty; a figure of 4 means that the gap opening penalty is\n" "four times greater at that position, making gaps 4 times harder to open.\n" "\n" "The format for gap penalty masks and secondary structure masks is explained\n" "in the help under option 0 (secondary structure options).\n" ""; sections.push_back(s); s.marker = "B"; s.title = "Help for secondary structure - gap penalty masks"; s.content = "The use of secondary structure-based penalties has been shown to improve the\n" "accuracy of multiple alignment. Therefore CLUSTAL W now allows gap penalty \n" "masks to be supplied with the input sequences. The masks work by raising gap \n" "penalties in specified regions (typically secondary structure elements) so that\n" "gaps are preferentially opened in the less well conserved regions (typically \n" "surface loops).\n" "\n" "Options 1 and 2 control whether the input secondary structure information or\n" "gap penalty masks will be used.\n" "\n" "Option 3 controls whether the secondary structure and gap penalty masks should\n" "be included in the output alignment.\n" "\n" "Options 4 and 5 provide the value for raising the gap penalty at core Alpha \n" "Helical (A) and Beta Strand (B) residues. In CLUSTAL format, capital residues \n" "denote the A and B core structure notation. The basic gap penalties are\n" "multiplied by the amount specified.\n" "\n" "Option 6 provides the value for the gap penalty in Loops. By default this \n" "penalty is not raised. In CLUSTAL format, loops are specified by \".\" in the \n" "secondary structure notation.\n" "\n" "Option 7 provides the value for setting the gap penalty at the ends of \n" "secondary structures. Ends of secondary structures are observed to grow \n" "and-or shrink in related structures. Therefore by default these are given \n" "intermediate values, lower than the core penalties. All secondary structure \n" "read in as lower case in CLUSTAL format gets the reduced terminal penalty.\n" "\n" "Options 8 and 9 specify the range of structure termini for the intermediate \n" "penalties. In the alignment output, these are indicated as lower case. \n" "For Alpha Helices, by default, the range spans the end helical turn. For \n" "Beta Strands, the default range spans the end residue and the adjacent loop \n" "residue, since sequence conservation often extends beyond the actual H-bonded\n" "Beta Strand.\n" "\n" "CLUSTAL W can read the masks from SWISS-PROT, CLUSTAL or GDE format input\n" "files. For many 3-D protein structures, secondary structure information is\n" "recorded in the feature tables of SWISS-PROT database entries. You should\n" "always check that the assignments are correct - some are quite inaccurate.\n" "CLUSTAL W looks for SWISS-PROT HELIX and STRAND assignments e.g.\n" "\n" "FT HELIX 100 115\n" "FT STRAND 118 119\n" "\n" "The structure and penalty masks can also be read from CLUSTAL alignment format \n" "as comment lines beginning \"!SS_\" or \"!GM_\" e.g.\n" "\n" "!SS_HBA_HUMA ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA\n" "!GM_HBA_HUMA 112224444444444222122244444444442222224222111111111222444444\n" "HBA_HUMA VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK\n" "\n" "Note that the mask itself is a set of numbers between 1 and 9 each of which is \n" "assigned to the residue(s) in the same column below. \n" "\n" "In GDE flat file format, the masks are specified as text and the names must\n" "begin with \"SS_ or \"GM_.\n" "\n" "Either a structure or penalty mask or both may be used. If both are included in\n" "an alignment, the user will be asked which is to be used.\n" ""; sections.push_back(s); s.marker = "C"; s.title = "Help for secondary structure - gap penalty mask output options"; s.content = "The options in this menu let you choose whether or not to include the masks\n" "in the CLUSTAL W output alignments. Showing both is useful for understanding\n" "how the masks work. The secondary structure information is itself very useful\n" "in judging the alignment quality and in seeing how residue conservation\n" "patterns vary with secondary structure.\n" ""; sections.push_back(s); s.marker = "7"; s.title = "Help for phylogenetic trees"; s.content = "1) Before calculating a tree, you must have an ALIGNMENT in memory. This can be\n" "input in any format or you should have just carried out a full multiple\n" "alignment and the alignment is still in memory. \n" "\n" "\n" "*************** Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!! ***************\n" "\n" "\n" "The methods used are NJ (Neighbour Joining) and UPGMA. First\n" "you calculate distances (percent divergence) between all pairs of sequence from\n" "a multiple alignment; second you apply the NJ or UPGMA method to the distance matrix.\n" "\n" "2) EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where\n" "ANY of the sequences have a gap will be ignored. This means that 'like' will be\n" "compared to 'like' in all distances, which is highly desirable. It also\n" "automatically throws away the most ambiguous parts of the alignment, which are\n" "concentrated around gaps (usually). The disadvantage is that you may throw away\n" "much of the data if there are many gaps (which is why it is difficult for us to\n" "make it the default). \n" "\n" "\n" "\n" "3) CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this\n" "option makes no difference. For greater divergence, it corrects for the fact\n" "that observed distances underestimate actual evolutionary distances. This is\n" "because, as sequences diverge, more than one substitution will happen at many\n" "sites. However, you only see one difference when you look at the present day\n" "sequences. Therefore, this option has the effect of stretching branch lengths\n" "in trees (especially long branches). The corrections used here (for DNA or\n" "proteins) are both due to Motoo Kimura. See the documentation for details. \n" "\n" "Where possible, this option should be used. However, for VERY divergent\n" "sequences, the distances cannot be reliably corrected. You will be warned if\n" "this happens. Even if none of the distances in a data set exceed the reliable\n" "threshold, if you bootstrap the data, some of the bootstrap distances may\n" "randomly exceed the safe limit. \n" "\n" "4) To calculate a tree, use option 4 (DRAW TREE NOW). This gives an UNROOTED\n" "tree and all branch lengths. The root of the tree can only be inferred by\n" "using an outgroup (a sequence that you are certain branches at the outside\n" "of the tree .... certain on biological grounds) OR if you assume a degree\n" "of constancy in the 'molecular clock', you can place the root in the 'middle'\n" "of the tree (roughly equidistant from all tips).\n" "\n" "5) TOGGLE PHYLIP BOOTSTRAP POSITIONS\n" "By default, the bootstrap values are correctly placed on the tree branches of\n" "the phylip format output tree. The toggle allows them to be placed on the\n" "nodes, which is incorrect, but some display packages (e.g. TreeTool, TreeView\n" "and Phylowin) only support node labelling but not branch labelling. Care\n" "should be taken to note which branches and labels go together.\n" "\n" "6) OUTPUT FORMATS: four different formats are allowed. None of these displays\n" "the tree visually. Useful display programs accepting PHYLIP format include\n" "NJplot (from Manolo Gouy and supplied with Clustal W), TreeView (Mac-PC), and\n" "PHYLIP itself - OR get the PHYLIP package and use the tree drawing facilities\n" "there. (Get the PHYLIP package anyway if you are interested in trees). The\n" "NEXUS format can be read into PAUP or MacClade.\n" ""; sections.push_back(s); s.marker = "8"; s.title = "Help for choosing a weight matrix"; s.content = "For protein alignments, you use a weight matrix to determine the similarity of\n" "non-identical amino acids. For example, Tyr aligned with Phe is usually judged \n" "to be 'better' than Tyr aligned with Pro.\n" "\n" "There are three 'in-built' series of weight matrices offered. Each consists of\n" "several matrices which work differently at different evolutionary distances. To\n" "see the exact details, read the documentation. Crudely, we store several\n" "matrices in memory, spanning the full range of amino acid distance (from almost\n" "identical sequences to highly divergent ones). For very similar sequences, it\n" "is best to use a strict weight matrix which only gives a high score to\n" "identities and the most favoured conservative substitutions. For more divergent\n" "sequences, it is appropriate to use \"softer\" matrices which give a high score\n" "to many other frequent substitutions.\n" "\n" "1) BLOSUM (Henikoff). These matrices appear to be the best available for \n" "carrying out database similarity (homology searches). The matrices used are:\n" "Blosum 80, 62, 45 and 30. (BLOSUM was the default in earlier Clustal W\n" "versions)\n" "\n" "2) PAM (Dayhoff). These have been extremely widely used since the late '70s.\n" "We use the PAM 20, 60, 120 and 350 matrices.\n" "\n" "3) GONNET. These matrices were derived using almost the same procedure as the\n" "Dayhoff one (above) but are much more up to date and are based on a far larger\n" "data set. They appear to be more sensitive than the Dayhoff series. We use the\n" "GONNET 80, 120, 160, 250 and 350 matrices. This series is the default for\n" "Clustal W version 1.8.\n" "\n" "We also supply an identity matrix which gives a score of 1.0 to two identical \n" "amino acids and a score of zero otherwise. This matrix is not very useful.\n" "Alternatively, you can read in your own (just one matrix, not a series).\n" "\n" "A new matrix can be read from a file on disk, if the filename consists only\n" "of lower case characters. The values in the new weight matrix must be integers\n" "and the scores should be similarities. You can use negative as well as positive\n" "values if you wish, although the matrix will be automatically adjusted to all\n" "positive scores.\n" "\n" "\n" "\n" "For DNA, a single matrix (not a series) is used. Two hard-coded matrices are \n" "available:\n" "\n" "\n" "1) IUB. This is the default scoring matrix used by BESTFIT for the comparison\n" "of nucleic acid sequences. X's and N's are treated as matches to any IUB\n" "ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.\n" " \n" " \n" "2) CLUSTALW(1.6). The previous system used by Clustal W, in which matches score\n" "1.0 and mismatches score 0. All matches for IUB symbols also score 0.\n" "\n" "INPUT FORMAT The format used for a new matrix is the same as the BLAST program.\n" "Any lines beginning with a # character are assumed to be comments. The first\n" "non-comment line should contain a list of amino acids in any order, using the\n" "1 letter code, followed by a * character. This should be followed by a square\n" "matrix of integer scores, with one row and one column for each amino acid. The\n" "last row and column of the matrix (corresponding to the * character) contain\n" "the minimum score over the whole matrix.\n" ""; sections.push_back(s); s.marker = "9"; s.title = "Help for command line parameters"; s.content = " DATA (sequences)\n" "\n" "-INFILE=file.ext :input sequences.\n" "-PROFILE1=file.ext and -PROFILE2=file.ext :profiles (old alignment).\n" "\n" "\n" " VERBS (do things)\n" "\n" "-OPTIONS :list the command line parameters\n" "-HELP or -CHECK :outline the command line params.\n" "-FULLHELP :output full help content.\n" "-ALIGN :do full multiple alignment.\n" "-TREE :calculate NJ tree.\n" "-PIM :output percent identity matrix (while calculating the tree)\n" "-BOOTSTRAP(=n) :bootstrap a NJ tree (n= number of bootstraps; def. = 1000).\n" "-CONVERT :output the input sequences in a different file format.\n" "\n" "\n" " PARAMETERS (set things)\n" "\n" "***General settings:****\n" "-INTERACTIVE :read command line, then enter normal interactive menus\n" "-QUICKTREE :use FAST algorithm for the alignment guide tree\n" "-TYPE= :PROTEIN or DNA sequences\n" "-NEGATIVE :protein alignment with negative values in matrix\n" "-OUTFILE= :sequence alignment file name\n" "-OUTPUT= :CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA\n" "-OUTORDER= :INPUT or ALIGNED\n" "-CASE :LOWER or UPPER (for GDE output only)\n" "-SEQNOS= :OFF or ON (for Clustal output only)\n" "-SEQNO_RANGE=:OFF or ON (NEW: for all output formats)\n" "-RANGE=m,n :sequence range to write starting m to m+n\n" "-MAXSEQLEN=n :maximum allowed input sequence length\n" "-QUIET :Reduce console output to minimum\n" "-STATS= :Log some alignents statistics to file\n" "\n" "***Fast Pairwise Alignments:***\n" "-KTUPLE=n :word size\n" "-TOPDIAGS=n :number of best diags.\n" "-WINDOW=n :window around best diags.\n" "-PAIRGAP=n :gap penalty\n" "-SCORE :PERCENT or ABSOLUTE\n" "\n" "\n" "***Slow Pairwise Alignments:***\n" "-PWMATRIX= :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename\n" "-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename\n" "-PWGAPOPEN=f :gap opening penalty \n" "-PWGAPEXT=f :gap opening penalty\n" "\n" "\n" "***Multiple Alignments:***\n" "-NEWTREE= :file for new guide tree\n" "-USETREE= :file for old guide tree\n" "-MATRIX= :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename\n" "-DNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename\n" "-GAPOPEN=f :gap opening penalty \n" "-GAPEXT=f :gap extension penalty\n" "-ENDGAPS :no end gap separation pen. \n" "-GAPDIST=n :gap separation pen. range\n" "-NOPGAP :residue-specific gaps off \n" "-NOHGAP :hydrophilic gaps off\n" "-HGAPRESIDUES= :list hydrophilic res. \n" "-MAXDIV=n :% ident. for delay\n" "-TYPE= :PROTEIN or DNA\n" "-TRANSWEIGHT=f :transitions weighting\n" "-ITERATION= :NONE or TREE or ALIGNMENT\n" "-NUMITER=n :maximum number of iterations to perform\n" "-NOWEIGHTS :disable sequence weighting\n" "\n" "\n" "***Profile Alignments:***\n" "-PROFILE :Merge two alignments by profile alignment\n" "-NEWTREE1= :file for new guide tree for profile1\n" "-NEWTREE2= :file for new guide tree for profile2\n" "-USETREE1= :file for old guide tree for profile1\n" "-USETREE2= :file for old guide tree for profile2\n" "\n" "\n" "***Sequence to Profile Alignments:***\n" "-SEQUENCES :Sequentially add profile2 sequences to profile1 alignment\n" "-NEWTREE= :file for new guide tree\n" "-USETREE= :file for old guide tree\n" "\n" "\n" "***Structure Alignments:***\n" "-NOSECSTR1 :do not use secondary structure-gap penalty mask for profile 1 \n" "-NOSECSTR2 :do not use secondary structure-gap penalty mask for profile 2\n" "-SECSTROUT=STRUCTURE or MASK or BOTH or NONE :output in alignment file\n" "-HELIXGAP=n :gap penalty for helix core residues \n" "-STRANDGAP=n :gap penalty for strand core residues\n" "-LOOPGAP=n :gap penalty for loop regions\n" "-TERMINALGAP=n :gap penalty for structure termini\n" "-HELIXENDIN=n :number of residues inside helix to be treated as terminal\n" "-HELIXENDOUT=n :number of residues outside helix to be treated as terminal\n" "-STRANDENDIN=n :number of residues inside strand to be treated as terminal\n" "-STRANDENDOUT=n:number of residues outside strand to be treated as terminal \n" "\n" "\n" "***Trees:***\n" "-OUTPUTTREE=nj OR phylip OR dist OR nexus\n" "-SEED=n :seed number for bootstraps.\n" "-KIMURA :use Kimura's correction. \n" "-TOSSGAPS :ignore positions with gaps.\n" "-BOOTLABELS=node OR branch :position of bootstrap values in tree display\n" "-CLUSTERING= :NJ or UPGMA\n" ""; sections.push_back(s); s.marker = "0"; s.title = "Help for tree output format options"; s.content = "Four output formats are offered: 1) Clustal, 2) Phylip, 3) Just the distances\n" "4) Nexus\n" "\n" "None of these formats displays the results graphically. Many packages can\n" "display trees in the the PHYLIP format 2) below. It can also be imported into\n" "the PHYLIP programs RETREE, DRAWTREE and DRAWGRAM for graphical display. \n" "NEXUS format trees can be read by PAUP and MacClade.\n" "\n" "1) Clustal format output. \n" "This format is verbose and lists all of the distances between the sequences and\n" "the number of alignment positions used for each. The tree is described at the\n" "end of the file. It lists the sequences that are joined at each alignment step\n" "and the branch lengths. After two sequences are joined, it is referred to later\n" "as a NODE. The number of a NODE is the number of the lowest sequence in that\n" "NODE. \n" "\n" "2) Phylip format output.\n" "This format is the New Hampshire format, used by many phylogenetic analysis\n" "packages. It consists of a series of nested parentheses, describing the\n" "branching order, with the sequence names and branch lengths. It can be used by\n" "the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see the\n" "trees graphically. This is the same format used during multiple alignment for\n" "the guide trees. \n" "\n" "Use this format with NJplot (Manolo Gouy), supplied with Clustal W. Some other\n" "packages that can read and display New Hampshire format are TreeView (Mac/PC),\n" "TreeTool (UNIX), and Phylowin.\n" "\n" "3) The distances only.\n" "This format just outputs a matrix of all the pairwise distances in a format\n" "that can be used by the Phylip package. It used to be useful when one could not\n" "produce distances from protein sequences in the Phylip package but is now\n" "redundant (Protdist of Phylip 3.5 now does this).\n" "\n" "4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,\n" "including PAUP and MacClade. The format is described fully in:\n" "Maddison, D. R., D. L. Swofford and W. P. Maddison. 1997.\n" "NEXUS: an extensible file format for systematic information.\n" "Systematic Biology 46:590-621.\n" "\n" "5) TOGGLE PHYLIP BOOTSTRAP POSITIONS\n" "By default, the bootstrap values are placed on the nodes of the phylip format\n" "output tree. This is inaccurate as the bootstrap values should be associated\n" "with the tree branches and not the nodes. However, this format can be read and\n" "displayed by TreeTool, TreeView and Phylowin. An option is available to\n" "correctly place the bootstrap values on the branches with which they are\n" "associated.\n" ""; sections.push_back(s); // mark // replace-string " \" // replace-regexp ^ " // replace-regexp $ \\n" // std::cout << "exiting Help::Help" << "\n"; } Help::~Help() { sections.clear(); } vector Help::ListSectionMarkers() { vector markers; for (unsigned int i=0; isections.size(); i++) { markers.push_back(this->sections[i].marker); } return markers; } string Help::GetSection(char marker) { string s(1, marker); return GetSection(s); } string Help::GetSection(string marker) { for (unsigned int i=0; isections.size(); i++) { if (this->sections[i].marker == marker) { return this->sections[i].content; } } return ""; } string Help::GetSectionTitle(char marker) { string s(1, marker); return GetSectionTitle(s); } string Help::GetSectionTitle(string marker) { for (unsigned int i=0; isections.size(); i++) { if (this->sections[i].marker == marker) { return this->sections[i].title; } } return ""; } clustalx-2.1/clustalW/Clustal.h0000644000175000017500000000551211470725216016377 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * The class Clustal is the main class in the program. It is used by the interactive * menu, command line parser and clustal x to perform the algorithmic part of the * program. */ #ifndef CLUSTAL_H #define CLUSTAL_H #include #include "general/clustalw.h" #include "general/utils.h" #include "general/userparams.h" #include "fileInput/FileReader.h" #include "alignment/Alignment.h" #include "alignment/AlignmentOutput.h" using namespace std; namespace clustalw { class Clustal { public: /* Functions */ Clustal(); void align(string* phylipName, bool createOutput = true); void sequencesAlignToProfile(string* phylipName); void profileAlign(string* p1TreeName, string* p2TreeName); void doGuideTreeOnly(string* phylipName); void doAlignUseOldTree(string* phylipName); void getHelp(string helpPointer, bool printTitle = false); void getHelp(char helpPointer, bool printTitle = false); void getFullHelp(); int sequenceInput(bool append, string *offendingSeq); int profile1Input(string profile1Name = ""); int profile2Input(string profile2Name = ""); int commandLineReadSeq(int firstSeq); void outputNow(); void phylogeneticTree(string* phylip_name, string* clustal_name, string* dist_name, string* nexus_name, string pimName); void bootstrapTree(string* phylip_name, string* clustal_name, string* nexus_name); Alignment* getAlignmentPtr(){return &alignmentObj;} void QTcalcLowScoreSegments(LowScoreSegParams* params); void QTcalcWeightsForLowScoreSeg(LowScoreSegParams* params); void QTremoveShortSegments(LowScoreSegParams* params); void QTSetFileNamesForOutput(AlignmentFileNames fileNames); bool QTRealignSelectedRange(AlignmentFileNames fileNames, int beginPos, int endPos, bool realignEndGapPen); void test(); /* Attributes */ private: /* Functions */ void initInterface(); void calcGapPenaltyMask(int prfLength, vector* mask, vector* gapMask); bool useExistingGuideTree(int type, string* phylipName, const string& path); void promptForNewGuideTreeName(int type, string* treeName, const string& path); //bool removeFirstIterate(Alignment* alnPtr, DistMatrix* distMat); /* Attributes */ enum{Sequences, Profile1, Profile2}; string sequencesMsg, profile1Msg, profile2Msg; string newProfile1TreePrompt, newProfile2TreePrompt; Alignment alignmentObj; string helpFileName; int newSeq; bool checkTree; AlignmentFileNames QTFileNames; }; } #endif clustalx-2.1/clustalW/Clustal.cpp0000644000175000017500000014501611470725216016736 0ustar ddineenddineen/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle * cross-platform end-of-lines (for dendrograms, not for help file). Remerged * this change 13-04-07. * * Mark 10-5-2007: Bug fix # 42. call getWeightsForQtLowScore function in * QTcalcWeightsForLowScoreSeg instead of getWeightsFromDistMat. * * Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs * Mark Change 20-6-07, added call to calculateMaxLengths() * Mark, 3-7-07, Changed getHelp. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include "Clustal.h" #include "pairwise/FullPairwiseAlign.h" #include "pairwise/FastPairwiseAlign.h" #include "multipleAlign/MSA.h" #include "multipleAlign/LowScoreSegProfile.h" #include "multipleAlign/Iteration.h" #include "tree/TreeInterface.h" #include "general/debuglogObject.h" #include "general/statsObject.h" #include "alignment/ObjectiveScore.h" #include "general/ClustalWResources.h" #include "Help.h" #include using namespace std; namespace clustalw { Clustal::Clustal() { #ifdef WINDOWS helpFileName = string("clustalw.hlp"); #else helpFileName = string("clustalw_help"); #endif checkTree = true; newSeq = 0; sequencesMsg = "\nUse the existing GUIDE TREE file, "; profile1Msg = "\nUse the existing GUIDE TREE file for Profile 1, " ; profile2Msg = "\nUse the existing GUIDE TREE file for Profile 2, "; newProfile1TreePrompt = "\nEnter name for new GUIDE TREE file for profile 1 ["; newProfile2TreePrompt = "\nEnter name for new GUIDE TREE file for profile 2 ["; initInterface(); } /** *********************************************************************** * The function align is used to do a full multiple sequence alignment. * **************************************************************************/ // FIXME: merge doAlignUseOldTree in here void Clustal::align(string* phylipName, bool createOutput) { //time_t start, end; //double dif; //start = time (NULL); //ObjectiveScore score; //double _score = score.getSagaScore(&alignmentObj); //cout << "SAGA score " << _score << "\n"; string path; int count; AlignmentOutput alignOutput; if(userParameters->getEmpty() && userParameters->getMenuFlag()) { utilityObject->error("No sequences in memory. Load sequences first."); return; } userParameters->setStructPenalties1(NONE); userParameters->setStructPenalties2(NONE); alignmentObj.clearSecStruct1(); alignmentObj.clearSecStruct2(); utilityObject->getPath(userParameters->getSeqName(), &path); if(createOutput && (userParameters->getMenuFlag() || !userParameters->getInteractive())) { if(!alignOutput.openAlignmentOutput(path)) { return; } } else if(createOutput) { // We are using clustalQT. // Open all the files. if(!alignOutput.QTOpenFilesForOutput(QTFileNames)) { return; // could not open the files. } } if (userParameters->getSaveParameters()) { userParameters->createParameterOutput(); } if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll()) { alignmentObj.resetAlign(); } if(userParameters->getDisplayInfo()) { cout << "Start of Pairwise alignments\n"; cout << "Aligning...\n"; } if(userParameters->getDNAFlag()) { userParameters->setDNAParams(); } else { userParameters->setProtParams(); } if (statsObject->isEnabled()) { statsObject->logInputSeqStats(&alignmentObj); } /// STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX SymMatrix distMat; int _numSeqs = alignmentObj.getNumSeqs(); distMat.ResizeRect(_numSeqs + 1); PairwiseAlignBase* pairwiseDist; if (userParameters->getQuickPairAlign()) { pairwiseDist = new FastPairwiseAlign(); } else { pairwiseDist = new FullPairwiseAlign(); } // Generate distance matrix! pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs); delete pairwiseDist; bool success = false; auto_ptr progSteps; vector seqWeight(_numSeqs + 1); #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Calling getWeightsAndStepsFromDistMat\n"); } #endif TreeInterface calcSteps; progSteps = calcSteps.getWeightsAndStepsFromDistMat(&seqWeight, &distMat, &alignmentObj, 1, _numSeqs, phylipName, &success); //cout << "weights and steps calculated!\n"; //end = time (NULL); //dif = difftime(end, start); //cout << "It took " << dif << " seconds so Far\n"; if(!success) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("Unsuccessful!!!\n"); } #endif return; } #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("doing multiSeqAlign\n"); } #endif MSA* msaObj = new MSA(); count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeight, progSteps.get(), 0); delete msaObj; //cout << "alignment finished!\n"; //end = time (NULL); //dif = difftime(end, start); //cout << "It took " << dif << " seconds so Far\n"; if (count <= 0) { return; } if (userParameters->getMenuFlag()) { cout << "\n\n\n"; } /// Do iteration to improve alignment!!! if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT) { //userParameters->setNumIterations(_numSeqs * 2); Iteration iterateObj; iterateObj.removeFirstIterate(&alignmentObj); alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07 if(userParameters->getDisplayInfo()) cout << "Finished iteration\n"; } if (statsObject->isEnabled()) { statsObject->logAlignedSeqStats(&alignmentObj); } /// STEP 4: OUTPUT THE ALIGNMENT if(createOutput) { alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs); } (*phylipName) = ""; //end = time (NULL); //dif = difftime(end, start); //cout << "It took " << dif << " seconds\n"; } /** **************************************************************************** * The function sequencesAlignToProfile is used to align a set of sequences to * * a profile * *******************************************************************************/ void Clustal::sequencesAlignToProfile(string* phylipName) { string path; string treeName; bool useTree; int i, j, count; float dscore; bool saveSS2; AlignmentOutput alignOutput; if(userParameters->getProfile1Empty() && userParameters->getMenuFlag()) { utilityObject->error("No profile in memory. Input 1st profile first.\n"); return; } if(userParameters->getProfile2Empty() && userParameters->getMenuFlag()) { utilityObject->error("No sequences in memory. Input sequences first.\n"); return; } utilityObject->getPath(userParameters->getProfile2Name(), &path); if(userParameters->getMenuFlag() || !userParameters->getInteractive()) { if(!alignOutput.openAlignmentOutput(path)) { return; } } else { // We are using clustalQT. // Open all the files. if(!alignOutput.QTOpenFilesForOutput(QTFileNames)) { return; // could not open the files. } } newSeq = alignmentObj.getProfile1NumSeqs() + 1; // check for secondary structure information for list of sequences saveSS2 = userParameters->getUseSS2(); if (userParameters->getStructPenalties2() != NONE && userParameters->getUseSS2() == true && (alignmentObj.getNumSeqs() - alignmentObj.getProfile1NumSeqs() > 1)) { if (userParameters->getStructPenalties2() == SECST) { utilityObject->warning("\n\nWARNING: ignoring secondary structure for a list of sequences\n\n"); } else if (userParameters->getStructPenalties2() == GMASK) { utilityObject->warning("\n\nWARNING: ignoring gap penalty mask for a list of sequences\n\n"); } userParameters->setUseSS2(false); } DistMatrix distMat; int _numSeqs = alignmentObj.getNumSeqs(); distMat.ResizeRect(_numSeqs + 1); // // Convert to similarities!!!!!!!! // This is calcualting the similarities of the sequences in the profile part // for (i = 1; i <= newSeq; i++) { for (j = i + 1; j <= newSeq; j++) { dscore = alignmentObj.countid(i,j); distMat(i, j) = ((double)100.0 - (double)dscore)/(double)100.0; distMat(j, i) = distMat(i, j); } } //distMat.printArray(); InFileStream _treeFile; //nige useTree = false; // // Note put this into a separate function!!!!! // if (_numSeqs >= 2) { useTree = useExistingGuideTree(Sequences, phylipName, path); } if (userParameters->getSaveParameters()) { userParameters->createParameterOutput(); } if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll()) { alignmentObj.resetProfile2(); } else { alignmentObj.fixGaps(); } int _length = 0; if (userParameters->getStructPenalties1() == SECST) { _length = alignmentObj.getSeqLength(1); calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(), alignmentObj.getGapPenaltyMask1()); } if (userParameters->getStructPenalties2() == SECST) { _length = alignmentObj.getSeqLength(alignmentObj.getProfile1NumSeqs() + 1); calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(), alignmentObj.getGapPenaltyMask2()); } // PROGRESSIVE ALIGNMENT ALGORITHM // vector seqWeights(_numSeqs + 1); bool success = false; if (useTree == false) // create the new tree file, if necessary { if(userParameters->getDisplayInfo()) { cout << "Start of Pairwise alignments\n"; cout << "Aligning...\n"; } if(userParameters->getDNAFlag()) { userParameters->setDNAParams(); } else { userParameters->setProtParams(); } // STEP 1: CALCULATE DISTANCE MATRIX USING PAIRWISE ALIGNMENT // PairwiseAlignBase* pairwiseDist; if (userParameters->getQuickPairAlign()) { pairwiseDist = new FastPairwiseAlign(); } else { pairwiseDist = new FullPairwiseAlign(); } // Generate distance matrix! pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, newSeq-2, _numSeqs); delete pairwiseDist; if(userParameters->getDisplayInfo()) cout << "\n\n"; TreeInterface calcSeqWeights; calcSeqWeights.getWeightsFromDistMat(&seqWeights, &distMat, &alignmentObj, 1, _numSeqs, phylipName, &success); } else { TreeInterface calcSeqWeights; calcSeqWeights.getWeightsFromGuideTree(&alignmentObj, &distMat, phylipName, &seqWeights, 1, _numSeqs, &success); } if(!success) { return; } // If it is true, call the function here to get the seqweights etc from it. // if users request only the guide tree, return if (userParameters->getNewTreeFile()) { return; } MSA* msaObj = new MSA(); count = msaObj->seqsAlignToProfile(&alignmentObj, &distMat, &seqWeights, newSeq - 2, *phylipName); delete msaObj; userParameters->setUseSS2(saveSS2); if (count <= 0) { return; } if (userParameters->getMenuFlag()) { cout << "\n\n\n"; } /// STEP 4: OUTPUT THE ALIGNMENT // alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs); (*phylipName) = ""; } /** **************************************************************************** * The function profileAlign is used to align two profiles * *******************************************************************************/ void Clustal::profileAlign(string* p1TreeName, string* p2TreeName) { string path; //string treeName; bool useTree1, useTree2; int count, i, j, dscore; int _profile1NumSeqs = alignmentObj.getProfile1NumSeqs(); AlignmentOutput alignOutput; if(userParameters->getProfile1Empty() || userParameters->getProfile2Empty()) { utilityObject->error("No sequences in memory. Load sequences first.\n\n"); return; } utilityObject->getPath(userParameters->getProfile1Name(), &path); if(userParameters->getMenuFlag() || !userParameters->getInteractive()) { if(!alignOutput.openAlignmentOutput(path)) { return; } } else { // We are using clustalQT. // Open all the files. if(!alignOutput.QTOpenFilesForOutput(QTFileNames)) { return; // could not open the files. } } if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll()) { alignmentObj.resetProfile1(); alignmentObj.resetProfile2(); } else { alignmentObj.fixGaps(); } // Check if there exists a tree for profile1 useTree1 = false; if (_profile1NumSeqs >= 2) { useTree1 = useExistingGuideTree(Profile1, p1TreeName, path); } // Check if there exists a tree for profile2 useTree2 = false; utilityObject->getPath(userParameters->getProfile2Name(), &path); if (alignmentObj.getNumSeqs() - _profile1NumSeqs >= 2) { useTree2 = useExistingGuideTree(Profile2, p2TreeName, path); } if (userParameters->getSaveParameters()) { userParameters->createParameterOutput(); } int _length = 0; if (userParameters->getStructPenalties1() == SECST) { _length = alignmentObj.getSeqLength(1); calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(), alignmentObj.getGapPenaltyMask1()); } if (userParameters->getStructPenalties2() == SECST) { _length = alignmentObj.getSeqLength(_profile1NumSeqs + 1); calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(), alignmentObj.getGapPenaltyMask2()); } // Declare the distance matrix DistMatrix distMat; int _numSeqs = alignmentObj.getNumSeqs(); distMat.ResizeRect(_numSeqs + 1); if (useTree1 == false) { if (_profile1NumSeqs >= 2) { for (i = 1; i <= _profile1NumSeqs; i++) { for (j = i + 1; j <= _profile1NumSeqs; j++) { dscore = static_cast(alignmentObj.countid(i,j)); distMat(i, j) = (100.0 - dscore)/100.0; distMat(j, i) = distMat(i, j); } } utilityObject->getPath(userParameters->getProfile1Name(), &path); // We need to get the name of the file first because the message is different! if(userParameters->getMenuFlag()) { promptForNewGuideTreeName(Profile1, p1TreeName, path); } else { string treeName; treeName = path + "dnd"; p1TreeName = new string(treeName); } } if (useTree2 == false) { if(_numSeqs - _profile1NumSeqs >= 2) { for (i = 1 + _profile1NumSeqs; i <= _numSeqs; i++) { for (j = i + 1; j <= _numSeqs; j++) { dscore = static_cast(alignmentObj.countid(i,j)); distMat(i, j) = (100.0 - dscore) / 100.0; distMat(j, i) = distMat(i, j); } } utilityObject->getPath(userParameters->getProfile2Name(), &path); if(userParameters->getMenuFlag()) { promptForNewGuideTreeName(Profile2, p2TreeName, path); } else { string treeName; treeName = path + "dnd"; p2TreeName = new string(treeName); } } } } bool success = false; vector prof1Weight, prof2Weight; prof1Weight.resize(_profile1NumSeqs); prof2Weight.resize(_numSeqs); TreeInterface tree; tree.getWeightsForProfileAlign(&alignmentObj, &distMat, p1TreeName, &prof1Weight, p2TreeName, &prof2Weight, _numSeqs, _profile1NumSeqs, useTree1, useTree2, &success); if(!success) { return; } // do an initial alignment to get the pairwise identities between the two // profiles - used to set parameters for the final alignment MSA* msaObj = new MSA(); alignmentObj.resetProfile1(); alignmentObj.resetProfile2(); count = msaObj->doProfileAlign(&alignmentObj, &distMat, &prof1Weight, &prof2Weight); delete msaObj; if (count == 0) { return; } if(userParameters->getMenuFlag()) { cout << "\n\n\n"; } alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs); (*p1TreeName) = ""; (*p2TreeName) = ""; } void Clustal::doGuideTreeOnly(string* phylipName) { string path; if(userParameters->getEmpty()) { utilityObject->error("No sequences in memory. Load sequences first.\n"); return; } userParameters->setStructPenalties1(NONE); userParameters->setStructPenalties2(NONE); alignmentObj.clearSecStruct1(); alignmentObj.clearSecStruct2(); if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll()) { alignmentObj.resetAlign(); } utilityObject->getPath(userParameters->getSeqName(), &path); int _numSeqs = alignmentObj.getNumSeqs(); if (_numSeqs < 2) { utilityObject->error("Less than 2 sequences in memory. Phylogenetic tree cannot be built.\n"); return; } if (userParameters->getSaveParameters()) { userParameters->createParameterOutput(); } if(userParameters->getDisplayInfo()) { cout << "Start of Pairwise alignments\n"; cout << "Aligning...\n"; } if(userParameters->getDNAFlag()) { userParameters->setDNAParams(); } else { userParameters->setProtParams(); } ///STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX // DistMatrix distMat; distMat.ResizeRect(_numSeqs + 1); PairwiseAlignBase* pairwiseDist; if (userParameters->getQuickPairAlign()) { pairwiseDist = new FastPairwiseAlign(); } else { pairwiseDist = new FullPairwiseAlign(); } // Generate distance matrix! pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs); delete pairwiseDist; /* AW DEBUG fprintf(stdout, "\nDEBUG: distance matrix following...\n"); distMat.printArray(); */ bool success = false; TreeInterface tree; tree.generateTreeFromDistMat(&distMat, &alignmentObj, 1, _numSeqs, phylipName, &success); if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll()) { alignmentObj.resetAlign(); } (*phylipName) = ""; } // FIXME this is to 90% identical with align(), please merge void Clustal::doAlignUseOldTree(string* phylipName) { string path; int count; AlignmentOutput alignOutput; if(userParameters->getEmpty()) { utilityObject->error("No sequences in memory. Load sequences first.\n"); return; } userParameters->setStructPenalties1(NONE); userParameters->setStructPenalties2(NONE); alignmentObj.clearSecStruct1(); alignmentObj.clearSecStruct2(); utilityObject->getPath(userParameters->getSeqName(), &path); if(userParameters->getMenuFlag() || !userParameters->getInteractive()) { if(!alignOutput.openAlignmentOutput(path)) { return; } } else { // We are using clustalQT. // Open all the files. if(!alignOutput.QTOpenFilesForOutput(QTFileNames)) { return; // could not open the files. } } if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll()) { alignmentObj.resetAlign(); } int _numSeqs = alignmentObj.getNumSeqs(); DistMatrix distMat; distMat.ResizeRect(_numSeqs + 1); utilityObject->getPath(userParameters->getSeqName(), &path); if (_numSeqs >= 2) { if(userParameters->getMenuFlag()) { phylipName = new string(path); *phylipName = *phylipName + "dnd"; string message, answer; message = "\nEnter a name for the guide tree file [" + *phylipName + "]"; utilityObject->getStr(message, answer); if(!answer.empty()) { phylipName = new string(answer); } } if(userParameters->getMenuFlag() || !userParameters->getInteractive()) { InFileStream _treeFile; //nige _treeFile.open(phylipName->c_str()); if(!_treeFile.is_open()) { utilityObject->error("Cannot open tree file [%s]\n", phylipName->c_str()); return; } _treeFile.close(); } } else { if(userParameters->getDisplayInfo()) { cout << "Start of Pairwise alignments\n"; cout << "Aligning...\n"; } if(userParameters->getDNAFlag()) { userParameters->setDNAParams(); } else { userParameters->setProtParams(); } PairwiseAlignBase* pairwiseDist; if (userParameters->getQuickPairAlign()) { pairwiseDist = new FastPairwiseAlign(); } else { pairwiseDist = new FullPairwiseAlign(); } // Generate distance matrix! pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs); delete pairwiseDist; } if (userParameters->getSaveParameters()) { userParameters->createParameterOutput(); } auto_ptr progSteps; vector seqWeights(_numSeqs + 1); bool success = false; TreeInterface tree; progSteps = tree.getWeightsAndStepsFromTree(&alignmentObj, &distMat, phylipName, &seqWeights, 1, _numSeqs, &success); if(!success) { return; } MSA* msaObj = new MSA(); count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeights, progSteps.get(), 0); delete msaObj; if (count <= 0) { return; } if (userParameters->getMenuFlag()) { cout << "\n\n\n"; } // same as in align() if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT) { //userParameters->setNumIterations(_numSeqs * 2); Iteration iterateObj; iterateObj.removeFirstIterate(&alignmentObj); alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07 if(userParameters->getDisplayInfo()) cout << "Finished iteration\n"; } alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs); phylipName = new string(""); } void Clustal::getFullHelp() { vector markers; Help myhelp; bool showtitle = true; markers = myhelp.ListSectionMarkers(); for (unsigned int i=0; igetMenuFlag()) { cout << helpString; } else { string::size_type lastPos = 0; string::size_type pos = helpString.find_first_of("\n", lastPos); int nlines = 0; while (pos != string::npos) { cout << helpString.substr(lastPos, pos - lastPos); nlines++; if(nlines >= PAGE_LEN) { char tempChar; cout << "\nPress [RETURN] to continue or X to stop "; cin.get(tempChar); if(toupper(tempChar) == 'X') { return; } else { nlines = 0; } } lastPos = pos; //helpString.find_first_not_of("\n", pos); pos = helpString.find_first_of("\n", lastPos+1); //cerr << "DEBUG: pos=" << pos << " lastPos=" << lastPos << "/" << helpString.length() << "\n"; } } } /** * The wrap functions will be used with interface classes to perform the tasks * that were previously done there. */ int Clustal::sequenceInput(bool append, string *offendingSeq) { int code; // If we are not appending, we need to clear the Alignment object. if(!append) { alignmentObj.clearAlignment(); } FileReader readSeqFile; code = readSeqFile.seqInput(&alignmentObj, append, offendingSeq); return code; } /** * profile1Input is a wrapper function for the profileInput. This is because the * other classes dont have access to FileReader */ int Clustal::profile1Input(string profile1Name) { int code; // I need to clear out the Alignment object. alignmentObj.clearAlignment(); userParameters->setProfileNum(1); userParameters->setSeqName(profile1Name); userParameters->setProfile1Name(profile1Name); FileReader readProfileFile; code = readProfileFile.profileInput(&alignmentObj); // If we are using the commandline check if there are seqs! if(!userParameters->getInteractive()) { // AW: FIXME code should be handled higher up the stack and check all codes // also, shouldnt we use utilityObject->error()? if(code != OK) { if (code==NOSEQUENCESINFILE) cerr << "ERROR: There are no sequences in profile2 file." << std::endl; else if (code==ALLNAMESNOTDIFFERENT) cerr << "ERROR: Not all sequence names are different" << std::endl; else cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n"; // AW: should we really exit here? What if called from clustalx? exit(2); } } return code; } /** * profile2Input is a wrapper function for the profileInput. This is because the * other classes dont have access to FileReader */ int Clustal::profile2Input(string profile2Name) { int code; if(userParameters->getProfileNum() == 2) { // Remove the sequences from the previous one. int numSeqsProfile1 = alignmentObj.getProfile1NumSeqs(); alignmentObj.resizeSeqArray(numSeqsProfile1 + 1); // Clear anything from profile2 in alignment. alignmentObj.clearSecStruct2(); } userParameters->setProfileNum(2); userParameters->setSeqName(profile2Name); userParameters->setProfile2Name(profile2Name); FileReader readProfileFile; cout << "before profileInput\n"; code = readProfileFile.profileInput(&alignmentObj); cout << "after profileInput\n"; if(!userParameters->getInteractive()) { // AW: FIXME code should be handled higher up the stack and check all codes // also, shouldnt we use utilityObject->error()? if(code != OK) { if (code==NOSEQUENCESINFILE) cerr << "ERROR: There are no sequences in profile2 file." << std::endl; else if (code==ALLNAMESNOTDIFFERENT) cerr << "ERROR: Not all sequence names are different" << std::endl; else cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n"; // AW: should we really exit here? What if called from clustalx? // DD: fixed if(!userParameters->getGui()) exit(2); } } return code; } /** * The function commandline_readseq is called by the command line to * read in the sequences. It also prints out the names. This was * previously done by the command line parser. */ int Clustal::commandLineReadSeq(int firstSeq) { // Clear the alignment, although obviously there shouldnt be anything in it. alignmentObj.clearAlignment(); userParameters->setProfileNum(0); int code = 0; string offendingSeq; FileReader readInputFile; code = readInputFile.readSeqs(&alignmentObj, firstSeq, &offendingSeq); if(code != OK) { if(code == CANNOTOPENFILE) { utilityObject->error("Cannot open input file. No alignment!\n"); } else if(code == NOSEQUENCESINFILE) { utilityObject->error("No sequences in file. No alignment!\n"); } else if(code == ALLNAMESNOTDIFFERENT) { utilityObject->error("Multiple sequences found with same name (found %s at least twice)!", offendingSeq.c_str()); } else if(code == EMPTYSEQUENCE) { utilityObject->error("Empty sequences found: %s\n", offendingSeq.c_str()); } else if(code == SEQUENCETOOBIG) { utilityObject->error("Sequence(s) too big: %s\n", offendingSeq.c_str()); } else if(code == BADFORMAT) { utilityObject->error("Sequences are badly formatted!\n"); } else { utilityObject->error("\nThere was a problem reading in the file. No alignment!\n"); } exit(-1); } alignmentObj.printSequencesAddedInfo(); userParameters->setEmpty(false); return code; } /* * The function outputNow is used to output the alignment. It can be called by the * menu or command line parser. */ void Clustal::outputNow() { if(alignmentObj.getNumSeqs() > 0) { string path = ""; if(!userParameters->getMenuFlag()) { string _seqName = userParameters->getSeqName(); utilityObject->getPath(_seqName, &path); } AlignmentOutput alignmentOutput; alignmentOutput.openAlignmentOutput(path); alignmentOutput.createAlignmentOutput(&alignmentObj, 1, alignmentObj.getNumSeqs()); } else { utilityObject->error("No sequences have been loaded\n"); } } void Clustal::phylogeneticTree(string* phylipName, string* clustalName, string* distName, string* nexusName, string pimName) { TreeNames treeNames; treeNames.clustalName = *clustalName; treeNames.distName = *distName; treeNames.nexusName = *nexusName; treeNames.phylipName = *phylipName; treeNames.pimName = pimName; TreeInterface tree; tree.treeFromAlignment(&treeNames, &alignmentObj); } void Clustal::bootstrapTree(string* phylipName, string* clustalName, string* nexusName) { TreeNames treeNames; treeNames.clustalName = *clustalName; treeNames.nexusName = *nexusName; treeNames.phylipName = *phylipName; TreeInterface tree; tree.bootstrapTree(&treeNames, &alignmentObj); } void Clustal::initInterface() { userParameters->setEmpty(true); userParameters->setProfile1Empty(true); userParameters->setProfile2Empty(true); } /** * The function calcGapPenaltyMask is used to calculate the gapMask from the secondary * structure information. */ void Clustal::calcGapPenaltyMask(int prfLength, vector* mask, vector* gapMask) { int i,j; vector structMask; structMask.resize(prfLength + 1); int _helixEndPlus = userParameters->getHelixEndPlus(); int _helixEndMinus = userParameters->getHelixEndMinus(); int _strandEndPlus = userParameters->getStrandEndPlus(); int _strandEndMinus = userParameters->getStrandEndMinus(); i = 0; while (i < prfLength) { if (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$') { for (j = -_helixEndPlus; j < 0; j++) { if(i + j < (int)structMask.size()) { if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a') && (tolower(structMask[i + j]) != 'b')) { structMask[i + j] = 'a'; } } } for (j = 0; j < _helixEndMinus; j++) { if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'a' && (*mask)[i + j] != '$')) { break; } structMask[i + j] = 'a'; } i += j; while (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$') { if (i >= prfLength) { break; } if ((*mask)[i] == '$') { structMask[i] = 'A'; i++; break; } else { structMask[i] = (*mask)[i]; } i++; } for (j = 0; j < _helixEndMinus; j++) { if ((i - j - 1 >= 0) && (tolower((*mask)[i - j - 1]) == 'a' || (*mask)[i - j - 1] == '$')) { structMask[i - j - 1] = 'a'; } } for (j = 0; j < _helixEndPlus; j++) { if (i + j >= prfLength) { break; } structMask[i+j] = 'a'; } } else if (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%') { for (j = -_strandEndPlus; j < 0; j++) { if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a') && (tolower(structMask[i + j]) != 'b')) { structMask[i + j] = 'b'; } } for (j = 0; j < _strandEndPlus; j++) { if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'b' && (*mask)[i + j] != '%')) { break; } structMask[i+j] = 'b'; } i += j; while (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%') { if (i >= prfLength) { break; } if ((*mask)[i] == '%') { structMask[i] = 'B'; i++; break; } else { structMask[i] = (*mask)[i]; } i++; } for (j = 0; j < _strandEndMinus; j++) { if ((i-j-1>=0) && (tolower((*mask)[i-j-1]) == 'b' || (*mask)[i-j-1] == '%')) { structMask[i - j - 1] = 'b'; } } for (j = 0; j < _strandEndPlus; j++) { if (i + j >= prfLength) { break; } structMask[i+j] = 'b'; } } else { i++; } } for(i = 0; i < prfLength;i++) { switch (structMask[i]) { case 'A': (*gapMask)[i] = userParameters->getHelixPenalty() + '0'; break; case 'a': (*gapMask)[i] = userParameters->getHelixEndPenalty() + '0'; break; case 'B': (*gapMask)[i] = userParameters->getStrandPenalty() +'0'; break; case 'b': (*gapMask)[i] = userParameters->getStrandEndPenalty() + '0'; break; default: (*gapMask)[i] = userParameters->getLoopPenalty() + '0'; break; } } } /** * The function QTcalcLowScoreSegments is used to calculate the residues in the sequences * that score badly. * @param firstSeq first seq in the alignment or profile * @param nSeqs the number of sequences * @param nCols the length of the longest seq * @param seqWeight a vector to hold the sequence weights * @param lowScoreRes * @param seqWeightCalculated */ void Clustal::QTcalcLowScoreSegments(LowScoreSegParams* params) { int i, j; float sum, prevSum; float gscale; vector weight; int sweight; vector gaps; int matrix[NUMRES][NUMRES]; vector fsum; vector bsum; vector pscore; int _maxAA = userParameters->getMaxAA(); // STEP 1: Calculate the sequence weights QTcalcWeightsForLowScoreSeg(params); subMatrix->getQTMatrixForLowScoreSeg(matrix); const SeqArray* seqArray = alignmentObj.getSeqArray(); gaps.resize(params->nCols + 1); for (j = 1; j <= params->nCols; j++) { gaps[j - 1] = 0; for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs; i++) { if (j < alignmentObj.getSeqLength(i)) { if (((*seqArray)[i][j] < 0) || ((*seqArray)[i][j] > _maxAA)) { gaps[j-1]++; } } } } // STEP 2: Calculate the profile LowScoreSegProfile lowScoreProfile(params->nCols, params->firstSeq, params->firstSeq + params->nSeqs); weight.resize(params->firstSeq + params->nSeqs + 1); for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++) { weight[i] = (*(params->seqWeight))[i - params->firstSeq]; } lowScoreProfile.calcLowScoreSegProfile(seqArray, matrix, &weight); const SeqArray* profile = lowScoreProfile.getProfilePtr(); sweight = 0; for(i = params->firstSeq; i < params->firstSeq + params->nSeqs; i++) { sweight += weight[i]; } //Now, use the profile scores to mark segments of each sequence which score badly. fsum.resize(params->nCols + 2); bsum.resize(params->nCols + 2); pscore.resize(params->nCols + 2); for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs + 1; i++) { // In a forward phase, sum the profile scores. Mark negative sums as exceptions. //If the sum is positive, then it gets reset to 0. sum = 0.0; for(j = 1; j <= alignmentObj.getSeqLength(i); j++) { gscale = (float)(params->nSeqs - gaps[j - 1]) / (float)params->nSeqs; if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA) { pscore[j - 1] = 0.0; sum = 0.0; } else { pscore[j-1]=((*profile)[j][(*seqArray)[i][j]]- weight[i - 1] * matrix[(*seqArray)[i][j]][(*seqArray)[i][j]]) * gscale / sweight; } sum += pscore[j - 1]; if(sum > 0.0) { sum = 0.0; } fsum[j - 1] = sum; } // trim off any positive scoring residues from the end of the segments prevSum = 0; for(j = alignmentObj.getSeqLength(i) - 1; j >= 0; j--) { if(prevSum >= 0.0 && fsum[j] < 0.0 && pscore[j] >= 0.0) { fsum[j] = 0.0; } prevSum = fsum[j]; } // Now, in a backward phase, do the same summing process. sum = 0.0; for(j = alignmentObj.getSeqLength(i); j >= 1; j--) { if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA) { sum = 0; } else { sum += pscore[j - 1]; } if(sum > 0.0) { sum = 0.0; } bsum[j - 1] = sum; } // trim off any positive scoring residues from the start of the segments prevSum = 0; for(j = 0; j < alignmentObj.getSeqLength(i); j++) { if(prevSum >= 0.0 && bsum[j] < 0.0 && pscore[j] >= 0.0) { bsum[j] = 0.0; } prevSum = bsum[j]; } //Mark residues as exceptions if they score negative in the forward AND backward directions. for(j = 1; j <= alignmentObj.getSeqLength(i); j++) { if(fsum[j - 1] < 0.0 && bsum[j - 1] < 0.0) { if((*seqArray)[i][j] >= 0 && (*seqArray)[i][j] < _maxAA) { (*(params->lowScoreRes))[i - params->firstSeq - 1][j - 1] = -1; } } } } // Finally, apply the length cutoff to the segments - removing segments shorter //than the cutoff QTremoveShortSegments(params); } void Clustal::QTremoveShortSegments(LowScoreSegParams* params) { int i,j,k,start; //panel_data data; //GetPanelExtra(p,&data); if(params->nSeqs <= 0) return; // Reset all the exceptions - a value of 1 indicates an exception that // will be displayed. A value of -1 is used to remember exceptions that // are temporarily hidden in the display for(i = 0; i < params->nSeqs; i++) { for(j = 0; j < params->nCols; j++) { if((*(params->lowScoreRes))[i][j] == -1) { (*(params->lowScoreRes))[i][j] = 1; } } } for(i = 0; i < params->nSeqs; i++) { start = -1; for(j = 0; j <= params->nCols; j++) { if(start == -1) { if((*(params->lowScoreRes))[i][j] == 1) start = j; } else { if(j == params->nCols || (*(params->lowScoreRes))[i][j] == 0) { if(j - start < userParameters->getQTminLenLowScoreSegment()) { for(k = start; k < j; k++) { (*(params->lowScoreRes))[i][k] = -1; } } start = -1; } } } } } /** * Change: Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs */ void Clustal::QTcalcWeightsForLowScoreSeg(LowScoreSegParams* params) { int i, j; vector weight; float dscore; DistMatrix distMat(alignmentObj.getNumSeqs() + 1); // Mark: changed size // Aw potential trouble here: what if we don't have write // permission to current directory? #ifdef UNIX char treeName[FILENAMELEN]=".score.ph"; #else char treeName[FILENAMELEN]="tmp.ph"; #endif if(params->nSeqs <= 0) { return; } // if sequence weights have been calculated before - don't bother //doing it again (it takes too long). data.seqweight is set to NULL when // new sequences are loaded. / if(params->seqWeightCalculated == true) { return; } utilityObject->info("Calculating sequence weights..."); // count pairwise percent identities to make a phylogenetic tree // if(params->nSeqs >= 2) { //i=firstSeq + 1; i <= firstSeq + nSeqs for (i = params->firstSeq + 1; i <= params->firstSeq + params->nSeqs; i++) { //j=i + 1; i <= firstSeq + nSeqs for (j = i + 1; j <= params->firstSeq + params->nSeqs; j++) // Mark 22-5-07 { dscore = alignmentObj.countid(i, j); // Mark 22-5-07 distMat(i, j) = (100.0 - dscore) / 100.0; distMat(j, i) = distMat(i, j); } } string name = string(treeName); bool success = false; weight.resize(params->firstSeq + params->nSeqs + 1); TreeInterface tree; tree.getWeightsForQtLowScore(&weight, &distMat, &alignmentObj, params->firstSeq + 1, params->nSeqs, &name, &success); // Mark change 10-5-07 if(!success) { return; } for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++) { (*(params->seqWeight))[i - params->firstSeq] = weight[i]; } utilityObject->info("Done."); } } void Clustal::QTSetFileNamesForOutput(AlignmentFileNames fileNames) { QTFileNames = fileNames; } bool Clustal::QTRealignSelectedRange(AlignmentFileNames fileNames, int beginPos, int endPos, bool realignEndGapPen) { bool alignEndGapPen = userParameters->getEndGapPenalties(); Alignment saveOldAlign = alignmentObj; // Take a copy of it. Note provided copy // constructor is ok. bool ok; ok = alignmentObj.removeAllOutsideRange(beginPos, endPos); if(!ok) { alignmentObj = saveOldAlign; return false; } // Temporarily set the alignment output to be input int saveOutOrder = userParameters->getOutputOrder(); userParameters->setOutputOrder(INPUT); //set end gap penalties to be realignEndGapPenalties userParameters->setEndGapPenalties(realignEndGapPen); //do the alignment if(alignmentObj.getNumSeqs() <= 0) { alignmentObj = saveOldAlign; return false; } QTSetFileNamesForOutput(fileNames); string phylipName = fileNames.treeFile; align(&phylipName, false); userParameters->setOutputOrder(saveOutOrder); // reset the end gap penalties userParameters->setEndGapPenalties(alignEndGapPen); // remove postions that only contain gaps int nSeqs = alignmentObj.getNumSeqs(); alignmentObj.removeAllGapOnlyColumns(1, nSeqs, 0); // save it to a temporary area. SeqArray realignedArea = *(alignmentObj.getSeqArray()); // Paste it back into the original alignment. alignmentObj = saveOldAlign; bool result; result = alignmentObj.updateRealignedRange(realignedArea, beginPos, endPos); if(result == false) { utilityObject->error("something went wrong while updating the realigned range\n"); } // output the alignments AlignmentOutput alignOutput; if(!alignOutput.QTOpenFilesForOutput(QTFileNames)) { return false; // could not open the files. } alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs); return true; } void Clustal::test() { cout << "RUNNING TEST\n"; AlignmentOutput alignOutput; string path; utilityObject->getPath(userParameters->getSeqName(), &path); if(!alignOutput.openAlignmentOutput(path)) { cerr << "could not open the file\n"; return; } vector selected; int nSeqs = alignmentObj.getNumSeqs(); selected.resize(nSeqs + 1, 0); selected[9] = 1; selected[10] = 1; //selected[1] = 1; alignmentObj.removeGapOnlyColsFromSelectedSeqs(&selected); alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs); } /** * This function is used to ask the user if they would like to use an existing guide tree. * Note: It expects that phylipName actually points to a string that has been allocated. */ bool Clustal::useExistingGuideTree(int type, string* phylipName, const string& path) { bool useTree = false; string treeName; InFileStream _treeFile; //nige bool paramUseTree; string* ptrToMsg; if(type == Sequences) { ptrToMsg = &sequencesMsg; paramUseTree = userParameters->getUseTreeFile(); } else if(type == Profile1) { ptrToMsg = &profile1Msg; paramUseTree = userParameters->getUseTree1File();; } else if(type == Profile2) { ptrToMsg = &profile2Msg; paramUseTree = userParameters->getUseTree2File();; } else { ptrToMsg = &sequencesMsg; paramUseTree = userParameters->getUseTreeFile(); } if (checkTree && userParameters->getMenuFlag()) { treeName = path + "dnd"; _treeFile.open(treeName.c_str()); _treeFile.seekg(0, std::ios::beg); if(_treeFile.is_open()) { string message = *ptrToMsg + treeName + " (y/n) ? [y]"; string answer; utilityObject->getStr(message, answer); if(answer[0] != 'n' && answer[0] != 'N') { if(!phylipName) { phylipName = new string(treeName); } else { *phylipName = treeName; } useTree = true; } _treeFile.close(); } } else if (!userParameters->getMenuFlag() && paramUseTree) { useTree = true; } return useTree; } void Clustal::promptForNewGuideTreeName(int type, string* treeName, const string& path) { string* ptrToMsg; if(type == Profile1) { ptrToMsg = &newProfile1TreePrompt; } else if(type == Profile2) { ptrToMsg = &newProfile2TreePrompt; } else { ptrToMsg = &newProfile1TreePrompt; } if(!treeName) { treeName = new string(""); } while(treeName->empty()) { string message = *ptrToMsg + path + "dnd]"; string answer; utilityObject->getStr(message, answer); if(answer.empty()) { answer = path + "dnd"; *treeName = answer; } else { *treeName = answer; } } } } clustalx-2.1/clustalW/0000755000175000017500000000000011470725216014614 5ustar ddineenddineenclustalx-2.1/WritePostscriptFile.h0000644000175000017500000000376311470725216017165 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #ifndef WRITEPOSTSCRIPTFILE_H #define WRITEPOSTSCRIPTFILE_H #include #include #include "ClustalQtParams.h" #include "PostscriptFileParams.h" class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class QVBoxLayout; class QGridLayout; class QLineEdit; class WritePostscriptFile : public QDialog { Q_OBJECT public: WritePostscriptFile(QString inputFileName, int type, int lengthLongest); QString getFileName(){return fileName;} PostscriptFileParams getParams(); protected: private slots: void accept(); void cancel(); void browse(); void changeBlockSize(); private: void setUpLayout(); void setFileBox(); void setUpOtherParams(); int getSuggestedBlockSize(); QVBoxLayout* mainLayout; QGridLayout* fileLayout; QGridLayout* otherParamLayout; QPushButton* okButton; QPushButton* cancelButton; QPushButton* browseButton; QGroupBox* otherParams; QGroupBox* fileBox; QComboBox* pageSize; QComboBox* orientation; QComboBox* printHeader; QComboBox* printQualityCurve; QComboBox* printRuler; QComboBox* printResidueNumbers; QComboBox* resizeToFit; QLabel* writeSeqsToLabel; QLabel* psColorsFileLabel; QLabel* pageSizeLabel; QLabel* orientationLabel; QLabel* printHeaderLabel; QLabel* printQualityCurveLabel; QLabel* printRulerLabel; QLabel* printResidueNumbersLabel; QLabel* resizeToFitLabel; QLabel* printFromPosLabel; QLabel* printToPosLabel; QLabel* useBlockLengthLabel; QLineEdit* writeSeqsTo; QLineEdit* psColorsFile; QLineEdit* printFromPos; QLineEdit* printToPos; QLineEdit* useBlockLength; int type; QString title; QString label; QString fileName; int blockLength; int lengthLongestSeq; auto_ptr params; }; #endif clustalx-2.1/WritePostscriptFile.cpp0000644000175000017500000003037211470725216017514 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. * * 26-11-07,Nigel Brown(EMBL): A bug was noticed on Mac and Linux such * that the default printing colours were never selected. Removed the * default colour parameter file from the dialogue as the default is * internally known. Added a call to the Resources singelton to obtain * the true location of the color parameter file. * */ #include #include #include #include #include #include #include #include #include #include #include "WritePostscriptFile.h" #include "clustalW/general/userparams.h" #include "clustalW/general/utils.h" #include "clustalW/alignment/AlignmentOutput.h" #include "ClustalQtParams.h" #include "FileDialog.h" #include "Resources.h" WritePostscriptFile::WritePostscriptFile(QString inputFileName, int type, int lengthLongest) : blockLength(150), lengthLongestSeq(lengthLongest) { title = "Write Postscript File"; if(type == Sequences) { label = "Write Sequences To: "; } else if(type == Profile1) { label = "Write Profile 1 To: "; } else if(type == Profile2) { label = "Write Profile 2 To: "; } else { label = ""; } if(inputFileName.size() > 0) { std::string path = ""; clustalw::utilityObject->getPath(inputFileName.toStdString(), &path); cout << fileName.toStdString() << "\n"; if(path.size() > 0) { fileName = QString(path.c_str()) + "ps"; } else { fileName = QDir::currentPath() + QString(QDir::separator()) + "temp.ps"; } } std::cout << fileName.toStdString(); otherParams = new QGroupBox; fileBox = new QGroupBox; mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); cancelButton = new QPushButton(tr("Cancel")); cancelButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(cancelButton, SIGNAL(clicked()), this, SLOT(cancel())); browseButton = new QPushButton(tr("Browse")); browseButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(browseButton, SIGNAL(clicked()), this, SLOT(browse())); setUpLayout(); mainLayout->addWidget(fileBox); mainLayout->addWidget(otherParams); setLayout(mainLayout); } void WritePostscriptFile::setUpLayout() { setFileBox(); setUpOtherParams(); } void WritePostscriptFile::setFileBox() { fileLayout = new QGridLayout; writeSeqsToLabel = new QLabel(label); writeSeqsTo = new QLineEdit; writeSeqsTo->setText(fileName); psColorsFileLabel = new QLabel("PS Colors File:"); psColorsFile = new QLineEdit; //26-11-07,nige //psColorsFile->setText(psPrintFile); psColorsFile->setText(""); //26-11-07,nige: end fileLayout->addWidget(writeSeqsToLabel, 0, 0); fileLayout->addWidget(writeSeqsTo, 1, 0); fileLayout->addWidget(browseButton, 1, 1); fileLayout->addWidget(psColorsFileLabel, 2, 0); fileLayout->addWidget(psColorsFile, 3, 0); fileBox->setLayout(fileLayout); } void WritePostscriptFile::setUpOtherParams() { otherParamLayout = new QGridLayout; pageSizeLabel = new QLabel("Page Size"); pageSize = new QComboBox(); pageSize->addItem("A4"); pageSize->addItem("A3"); pageSize->addItem("US Letter"); pageSize->setCurrentIndex(A4); connect(pageSize, SIGNAL(activated(int)), this, SLOT(changeBlockSize())); orientationLabel = new QLabel("Orientation"); orientation = new QComboBox(); orientation->addItem("Landscape"); orientation->addItem("Portrait"); pageSize->setCurrentIndex(LANDSCAPE); connect(orientation, SIGNAL(activated(int)), this, SLOT(changeBlockSize())); printHeaderLabel = new QLabel("Print Header: "); printHeader = new QComboBox(); printHeader->addItem("Yes"); printHeader->addItem("No"); printHeader->setCurrentIndex(0); printQualityCurveLabel = new QLabel("Print Quality Curve: "); printQualityCurve = new QComboBox(); printQualityCurve->addItem("Yes"); printQualityCurve->addItem("No"); printQualityCurve->setCurrentIndex(0); printRulerLabel = new QLabel("Print Ruler: "); printRuler = new QComboBox(); printRuler->addItem("Yes"); printRuler->addItem("No"); printRuler->setCurrentIndex(0); printResidueNumbersLabel = new QLabel("Print Residue Numbers: "); printResidueNumbers = new QComboBox(); printResidueNumbers->addItem("Yes"); printResidueNumbers->addItem("No"); printResidueNumbers->setCurrentIndex(0); resizeToFitLabel = new QLabel("Resize to fit page: "); resizeToFit = new QComboBox(); resizeToFit->addItem("Yes"); resizeToFit->addItem("No"); resizeToFit->setCurrentIndex(0); QString num; printFromPosLabel = new QLabel("Print from position: "); printFromPos = new QLineEdit; num.setNum(1); printFromPos->setText(num); printToPosLabel = new QLabel("to: "); printToPos = new QLineEdit; num.setNum(lengthLongestSeq); printToPos->setText(num); num.setNum(getSuggestedBlockSize()); useBlockLengthLabel = new QLabel("Use block length: "); useBlockLength = new QLineEdit; useBlockLength->setText(num); otherParamLayout->addWidget(pageSizeLabel, 0, 0); otherParamLayout->addWidget(pageSize, 0, 1); otherParamLayout->addWidget(orientationLabel, 0, 2); otherParamLayout->addWidget(orientation, 0, 3); otherParamLayout->addWidget(printHeaderLabel, 1, 0); otherParamLayout->addWidget(printHeader, 1, 1); otherParamLayout->addWidget(printQualityCurveLabel, 1, 2); otherParamLayout->addWidget(printQualityCurve, 1, 3); otherParamLayout->addWidget(printRulerLabel, 2, 0); otherParamLayout->addWidget(printRuler, 2, 1); otherParamLayout->addWidget(printResidueNumbersLabel, 2, 2); otherParamLayout->addWidget(printResidueNumbers, 2, 3); otherParamLayout->addWidget(resizeToFitLabel, 3, 0); otherParamLayout->addWidget(resizeToFit, 3, 1); otherParamLayout->addWidget(printFromPosLabel, 4, 0); otherParamLayout->addWidget(printFromPos, 4, 1); otherParamLayout->addWidget(printToPosLabel, 4, 2); otherParamLayout->addWidget(printToPos, 4, 3); otherParamLayout->addWidget(useBlockLengthLabel, 5, 0); otherParamLayout->addWidget(useBlockLength, 5, 1); otherParamLayout->addWidget(okButton, 6, 0); otherParamLayout->addWidget(cancelButton, 6, 1); otherParams->setLayout(otherParamLayout); } void WritePostscriptFile::accept() { // writeSeqsTo QString writeTo; writeTo = writeSeqsTo->text(); if(writeTo.size() == 0) { writeTo = QDir::currentPath() + "temp.ps"; } // psColorsFile QString psColors = psColorsFile->text(); if(psColors.size() == 0) { //26-11-07,nige: end //psColors = QDir::currentPath() + "usedefaultfile"; Resources *res = Resources::Instance(); psColors = res->searchPathsForFile(psPrintFile); //26-11-07,nige: end } // pageSize int pageSizeChoice; if(pageSize->currentIndex() == 0) { pageSizeChoice = A4; } else if(pageSize->currentIndex() == 1) { pageSizeChoice = A3; } else { pageSizeChoice = USLETTER; } // orientation int orientationChoice; if(orientation->currentIndex() == 0) { orientationChoice = LANDSCAPE; } else { orientationChoice = PORTRAIT; } // printHeader bool printHead; if(printHeader->currentIndex() == 0) { printHead = true; } else { printHead = false; } // printQualityCurve bool printCurve; if(printQualityCurve->currentIndex() == 0) { printCurve = true; } else { printCurve = false; } // printRuler bool printRule; if(printRuler->currentIndex() == 0) { printRule = true; } else { printRule = false; } // printResidueNumbers bool printResNum; if(printResidueNumbers->currentIndex() == 0) { printResNum = true; } else { printResNum = false; } // resizeToFit bool resize; if(resizeToFit->currentIndex() == 0) { resize = true; } else { resize = false; } // printFromPos QString lineText = printFromPos->text(); int from; if(lineText.size() > 0) { bool ok; from = lineText.toInt(&ok); if(!ok) { from = 1; } else if(from < 1 || from > lengthLongestSeq) { from = 1; } } else { from = 1; } // printToPos lineText = printToPos->text(); int to; if(lineText.size() > 0) { bool ok; to = lineText.toInt(&ok); if(!ok) { to = lengthLongestSeq; } else if(to < 1 || to > lengthLongestSeq) { to = lengthLongestSeq; } } else { to = lengthLongestSeq; } // useBlockLength lineText = useBlockLength->text(); int blockLen; if(lineText.size() > 0) { bool ok; blockLen = lineText.toInt(&ok); if(!ok) { blockLen = getSuggestedBlockSize(); } else if(blockLen < 1) { blockLen = getSuggestedBlockSize(); } else if(blockLen > getSuggestedBlockSize()) { int answer; QString myMessage = "Block size is greater than suggested size.\n"; myMessage += "Sequences may not fit on the page. Use suggested size?"; answer = QMessageBox::information(this, "", myMessage, QMessageBox::Yes, QMessageBox::No); if(answer == QMessageBox::Yes) { blockLen = getSuggestedBlockSize(); } } } else { blockLen = getSuggestedBlockSize(); } params.reset(new PostscriptFileParams(writeTo, psColors, pageSizeChoice, orientationChoice, printHead, printCurve, printRule, printResNum, resize, from, to, blockLen)); setResult(QDialog::Accepted); hide(); } void WritePostscriptFile::cancel() { setResult(QDialog::Rejected); hide(); } void WritePostscriptFile::browse() { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getSaveFileName(this, tr("Save as")); if(myFile != "") { writeSeqsTo->setText(myFile); } } void WritePostscriptFile::changeBlockSize() { QString num; num.setNum(getSuggestedBlockSize()); useBlockLength->setText(num); } PostscriptFileParams WritePostscriptFile::getParams() { return *(params.get()); } int WritePostscriptFile::getSuggestedBlockSize() { int _orientation = orientation->currentIndex(); int paperSize = pageSize->currentIndex(); if(_orientation == LANDSCAPE && paperSize == A4) { return 150; } else if(_orientation == LANDSCAPE && paperSize == A3) { return 250; } else if(_orientation == LANDSCAPE && paperSize == USLETTER) { return 150; } else if(_orientation == PORTRAIT && paperSize == A4) { return 80; } else if(_orientation == PORTRAIT && paperSize == A3) { return 150; } else if(_orientation == PORTRAIT && paperSize == USLETTER) { return 150; } else { return 80; // minimum size that will fit any page. } } clustalx-2.1/TreeOutputFileNames.h0000644000175000017500000000325011470725216017073 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #ifndef TREEOUTPUTFILENAMES_H #define TREEOUTPUTFILENAMES_H #include #include #include "ClustalQtParams.h" class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class QVBoxLayout; class QGridLayout; class QLineEdit; class TreeOutputFileNames : public QDialog { Q_OBJECT public: TreeOutputFileNames(QString seqFileName); clustalw::TreeNames getNames(); protected: private slots: void accept(); void cancel(); void browseClustal(); void browseNexus(); void browsePhylip(); void browseDistances(); void browsePim(); private: void browse(QLineEdit* lineEditName); void setUpLayout(); void setAllPtrsToNull(); QString getPathFromFileName(QString fileName); QVBoxLayout* mainLayout; QGridLayout* fileLayout; QPushButton* okButton; QPushButton* treeOutCancelButton; QPushButton* clustalTreeBrowseButton; QPushButton* phylipTreeBrowseButton; QPushButton* distancesBrowseButton; QPushButton* nexusTreeBrowseButton; QPushButton* pimNameBrowseButton; QGroupBox* fileBox; QLabel* clustalTreeNameLabel; QLabel* phylipTreeNameLabel; QLabel* distancesNameLabel; QLabel* nexusTreeNameLabel; QLabel* pimNameLabel; QLineEdit* clustalTreeName; QLineEdit* phylipTreeName; QLineEdit* distancesName; QLineEdit* nexusTreeName; QLineEdit* pimName; QString title; QString filePath; auto_ptr treeFileNames; static const int MinLineEditWidth = 250; }; #endif clustalx-2.1/TreeOutputFileNames.cpp0000644000175000017500000002015711470725216017433 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #include #include #include #include #include #include #include #include #include #include #include "TreeOutputFileNames.h" #include "clustalW/general/userparams.h" #include "clustalW/general/utils.h" #include "ClustalQtParams.h" #include "FileDialog.h" TreeOutputFileNames::TreeOutputFileNames(QString seqFileName) { setAllPtrsToNull(); title = "Draw Tree"; setWindowTitle(title); filePath = getPathFromFileName(seqFileName); mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); treeOutCancelButton = new QPushButton(tr("Cancel")); treeOutCancelButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(treeOutCancelButton, SIGNAL(clicked()), this, SLOT(cancel())); fileBox = new QGroupBox(); setUpLayout(); mainLayout->addWidget(fileBox); setLayout(mainLayout); } QString TreeOutputFileNames::getPathFromFileName(QString fileName) { QString filePath; if(fileName.size() > 0) { std::string path = ""; clustalw::utilityObject->getPath(fileName.toStdString(), &path); if(path.size() > 0) { filePath = QString(path.c_str()); } else { filePath = QDir::currentPath() + QString(QDir::separator()) + "temp"; } } return filePath; } void TreeOutputFileNames::setUpLayout() { int row = 0; fileLayout = new QGridLayout; if(clustalw::userParameters->getOutputTreeClustal()) { clustalTreeNameLabel = new QLabel(tr("Save Clustal Tree As: ")); clustalTreeName = new QLineEdit(); clustalTreeName->setText(filePath + "nj"); clustalTreeName->setMinimumWidth(MinLineEditWidth); clustalTreeBrowseButton = new QPushButton("Browse"); connect(clustalTreeBrowseButton, SIGNAL(clicked()), this, SLOT(browseClustal())); fileLayout->addWidget(clustalTreeNameLabel, row, 0); fileLayout->addWidget(clustalTreeName, row, 1); fileLayout->addWidget(clustalTreeBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputTreePhylip()) { phylipTreeNameLabel = new QLabel(tr("Save Phylip Tree As: ")); phylipTreeName = new QLineEdit(); phylipTreeName->setText(filePath + "ph"); phylipTreeName->setMinimumWidth(MinLineEditWidth); phylipTreeBrowseButton = new QPushButton("Browse"); connect(phylipTreeBrowseButton, SIGNAL(clicked()), this, SLOT(browsePhylip())); fileLayout->addWidget(phylipTreeNameLabel, row, 0); fileLayout->addWidget(phylipTreeName, row, 1); fileLayout->addWidget(phylipTreeBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputTreeDistances()) { distancesNameLabel = new QLabel(tr("Save Distance Matrix As: ")); distancesName = new QLineEdit(); distancesName->setText(filePath + "dst"); distancesName->setMinimumWidth(MinLineEditWidth); distancesBrowseButton = new QPushButton("Browse"); connect(distancesBrowseButton, SIGNAL(clicked()), this, SLOT(browseDistances())); fileLayout->addWidget(distancesNameLabel, row, 0); fileLayout->addWidget(distancesName, row, 1); fileLayout->addWidget(distancesBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputPim()) { pimNameLabel = new QLabel(tr("Save % Identity Matrix As: ")); pimName = new QLineEdit(); pimName->setText(filePath + "pim"); pimName->setMinimumWidth(MinLineEditWidth); pimNameBrowseButton = new QPushButton("Browse"); connect(pimNameBrowseButton, SIGNAL(clicked()), this, SLOT(browsePim())); fileLayout->addWidget(pimNameLabel, row, 0); fileLayout->addWidget(pimName, row, 1); fileLayout->addWidget(pimNameBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputTreeNexus()) { nexusTreeNameLabel = new QLabel(tr("Save Nexus Tree As: ")); nexusTreeName = new QLineEdit(); nexusTreeName->setText(filePath + "nxs"); nexusTreeName->setMinimumWidth(MinLineEditWidth); nexusTreeBrowseButton = new QPushButton("Browse"); connect(nexusTreeBrowseButton, SIGNAL(clicked()), this, SLOT(browseNexus())); fileLayout->addWidget(nexusTreeNameLabel, row, 0); fileLayout->addWidget(nexusTreeName, row, 1); fileLayout->addWidget(nexusTreeBrowseButton, row, 2); row++; } fileLayout->addWidget(okButton, row, 0); fileLayout->addWidget(treeOutCancelButton, row, 1); fileBox->setLayout(fileLayout); } void TreeOutputFileNames::browseClustal() { browse(clustalTreeName); } void TreeOutputFileNames::browseNexus() { browse(nexusTreeName); } void TreeOutputFileNames::browsePhylip() { browse(phylipTreeName); } void TreeOutputFileNames::browseDistances() { browse(distancesName); } void TreeOutputFileNames::browsePim() { browse(pimName); } void TreeOutputFileNames::accept() { treeFileNames.reset(new clustalw::TreeNames); if(clustalTreeName != 0) { if(clustalTreeName->text() == "") { treeFileNames->clustalName = filePath.toStdString() + "nj"; } else { QString file = clustalTreeName->text(); treeFileNames->clustalName = file.toStdString(); } } if(phylipTreeName != 0) { if(phylipTreeName->text() == "") { treeFileNames->phylipName = filePath.toStdString() + "ph"; } else { QString file = phylipTreeName->text(); treeFileNames->phylipName = file.toStdString(); } } if(distancesName != 0) { if(distancesName->text() == "") { treeFileNames->distName = filePath.toStdString() + "dst"; } else { QString file = distancesName->text(); treeFileNames->distName = file.toStdString(); } } if(nexusTreeName != 0) { if(nexusTreeName->text() == "") { treeFileNames->nexusName = filePath.toStdString() + "nxs"; } else { QString file = nexusTreeName->text(); treeFileNames->nexusName = file.toStdString(); } } if(pimName != 0) { if(pimName->text() == "") { treeFileNames->pimName = filePath.toStdString() + "pim"; } else { QString file = pimName->text(); treeFileNames->pimName = file.toStdString(); } } setResult(QDialog::Accepted); hide(); } void TreeOutputFileNames::cancel() { setResult(QDialog::Rejected); hide(); } void TreeOutputFileNames::browse(QLineEdit* lineEditName) { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getSaveFileName(this, tr("Save as")); if(myFile != "") { lineEditName->setText(myFile); } } clustalw::TreeNames TreeOutputFileNames::getNames() { return *(treeFileNames.get()); } void TreeOutputFileNames::setAllPtrsToNull() { nexusTreeBrowseButton = 0; nexusTreeName = 0; nexusTreeNameLabel = 0; pimNameBrowseButton = 0; pimName = 0; pimNameLabel = 0; distancesBrowseButton = 0; distancesName = 0; distancesNameLabel = 0; phylipTreeBrowseButton = 0; phylipTreeName = 0; phylipTreeNameLabel = 0; clustalTreeBrowseButton = 0; clustalTreeName = 0; clustalTreeNameLabel = 0; treeOutCancelButton = 0; okButton = 0; fileLayout = 0; fileBox = 0; mainLayout = 0; } clustalx-2.1/TreeFormatOptions.h0000644000175000017500000000132211470725216016611 0ustar ddineenddineen#ifndef TREEFORMATOPTIONS_H #define TREEFORMATOPTIONS_H #include class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class TreeFormatOptions : public QDialog { Q_OBJECT public: TreeFormatOptions(); protected: private slots: void accept(); private: void setUpLayout(); void setUpRadioButtons(); void setUpOtherParams(); QPushButton* okButton; QGroupBox* fileGridBox; QGroupBox* otherParams; QGroupBox* verticalBox; QCheckBox* clustal; QCheckBox* percIdentMat; QCheckBox* phylip; QCheckBox* phylipDistMat; QCheckBox* nexus; QComboBox* bootStrap; QLabel* bootStrapLabel; }; #endif clustalx-2.1/TreeFormatOptions.cpp0000644000175000017500000000742111470725216017152 0ustar ddineenddineen#include #include #include #include #include #include #include #include #include #include "TreeFormatOptions.h" #include "clustalW/general/userparams.h" TreeFormatOptions::TreeFormatOptions() { fileGridBox = new QGroupBox("Output Files"); otherParams = new QGroupBox; QVBoxLayout* mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpLayout(); mainLayout->addWidget(okButton); mainLayout->addWidget(fileGridBox); mainLayout->addWidget(otherParams); setLayout(mainLayout); } void TreeFormatOptions::setUpLayout() { setUpRadioButtons(); QGridLayout *grid = new QGridLayout; grid->addWidget(clustal, 0, 0); grid->addWidget(phylip, 0, 1); grid->addWidget(phylipDistMat, 1, 0); grid->addWidget(nexus, 1, 1); grid->addWidget(percIdentMat, 2, 0); fileGridBox->setLayout(grid); setUpOtherParams(); QGridLayout *otherParamLayout = new QGridLayout; otherParamLayout->addWidget(bootStrapLabel, 0, 0); otherParamLayout->addWidget(bootStrap, 0, 1); otherParams->setLayout(otherParamLayout); } void TreeFormatOptions::setUpRadioButtons() { clustal = new QCheckBox("CLUSTAL format tree"); percIdentMat = new QCheckBox("% identity matrix"); phylip = new QCheckBox("Phylip format tree"); phylipDistMat = new QCheckBox("Phylip distance matrix"); nexus = new QCheckBox("Nexus format tree"); if(clustalw::userParameters->getOutputTreeClustal()) { clustal->setChecked(true); } if(clustalw::userParameters->getOutputPim()) { percIdentMat->setChecked(true); } if(clustalw::userParameters->getOutputTreePhylip()) { phylip->setChecked(true); } if(clustalw::userParameters->getOutputTreeDistances()) { phylipDistMat->setChecked(true); } if(clustalw::userParameters->getOutputTreeNexus()) { nexus->setChecked(true); } } void TreeFormatOptions::setUpOtherParams() { bootStrap = new QComboBox(); bootStrap->addItem("Branch"); bootStrap->addItem("Node"); if(clustalw::userParameters->getBootstrapFormat() == clustalw::BS_NODE_LABELS) { bootStrap->setCurrentIndex(1); } else { bootStrap->setCurrentIndex(0); } bootStrapLabel = new QLabel(tr("Bootstrap labels on :")); } void TreeFormatOptions::accept() { if(clustal->isChecked()) { clustalw::userParameters->setOutputTreeClustal(true); } else { clustalw::userParameters->setOutputTreeClustal(false); } if(percIdentMat->isChecked()) { clustalw::userParameters->setOutputPim(true); } else { clustalw::userParameters->setOutputPim(false); } if(phylip->isChecked()) { clustalw::userParameters->setOutputTreePhylip(true); } else { clustalw::userParameters->setOutputTreePhylip(false); } if(phylipDistMat->isChecked()) { clustalw::userParameters->setOutputTreeDistances(true); } else { clustalw::userParameters->setOutputTreeDistances(false); } if(nexus->isChecked()) { clustalw::userParameters->setOutputTreeNexus(true); } else { clustalw::userParameters->setOutputTreeNexus(false); } if(bootStrap->currentIndex() == 1) { clustalw::userParameters->setBootstrapFormat(clustalw::BS_NODE_LABELS); } else { clustalw::userParameters->setBootstrapFormat(clustalw::BS_BRANCH_LABELS); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/SeqNameWidget.h0000644000175000017500000000731611470725216015673 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 20-02-07,Nigel Brown(EMBL): made getExtraRowNum() a const member so it can * be called inside sizeHint(); added eventFilter() to catch containing * viewport resize events; added single setMaxNameLength() member to compute * maximum typeset text width; added computeSize() member to determine final * widget size from text size or containing viewport size; added * horizontalMargin member: Widget now fills viewport and resizes neatly. * * 11-04-07,Nigel Brown(EMBL): Added new member 'ctrlPressed'. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. * * 25-04-07,Nigel Brown(EMBL): removed 'shiftPressed', 'ctrlPressed' members * and the keyPressEvent() and keyReleaseEvent() handlers. These are now * handled by the external KeyController class. * * 09-05-07,Nigel Brown(EMBL): added 'mousePressed'. Renamed * rowSelected' to 'firstRow' and added 'secondRow'. 'scrollArea' and * registerScrollArea() allowing the instance to know its own * containing QScrollArea (This is not the same as parentWidget()). * * 27-06-07,Nigel Brown(EMBL): For screen layout balancing: renamed * numSequencesSoFar as numSequences, added numSequences2, which records the * number of sequences in the *other* profile window; Added setNumSequences2(). ****************************************************************************/ #ifndef SEQNAMEWIDGET_H #define SEQNAMEWIDGET_H #include #include #include #include #include #include #include #include "clustalW/alignment/Alignment.h" class QMouseEvent; class QPaintEvent; class QMessageBox; class QScrollArea; //- nige /** * This class is used to display the list of the sequence names. It has funcitons * that deal with the selection of multiple names at a time. */ class SeqNameWidget : public QWidget { Q_OBJECT public: SeqNameWidget(int boxSize, QFont* font, QWidget *parent = 0); void setNumOfSequences(int num); void addSequenceName(QString *seq); void changeBoxSizeAndFontSize(int boxSize, QFont* fontPtr); void updateSizeHint(); void setNames(clustalw::Alignment* align); void updateDisplay(int fSeq, int lSeq, int nhead, QString ssName); void clearSequenceSelection(); void selectAllSequences(); int getNumberOfSeqsSelected(); const std::vector* getSelectedRows(){return &selectedRows;}; int getExtraRowNum() const; //-nige QString getNameSSHeader(){return secStructName;} //bool widgetHasFocus void registerScrollArea(QScrollArea *s) { scrollArea = s; }; //- nige void setNumSequences2(int count); //nige public slots: signals: protected: void mousePressEvent( QMouseEvent *event); void mouseMoveEvent(QMouseEvent *event); void mouseReleaseEvent(QMouseEvent *event); void paintEvent( QPaintEvent *event); int setMaxNameLength(); bool eventFilter(QObject *o, QEvent *e); //- nige QSize computeSize() const; //- nige private: int firstSeq, lastSeq; int nHead; QFont displayFont; int numRowsShowing; bool mousePressed; //- nige int firstRow; //- nige int secondRow; //- nige QSize sizeHint() const; int boxSize; int pixelsize; int alignLength; int numSequences; int numSequences2; int offSetFromTop; int horizontalMargin; //margin for identifiers, nige clustalw::Alignment* alignment; QColor* selectedColor; QColor *backGroundGrey; std::vector selectedRows; int maxNameLength; bool isSet; bool showNames; QString secStructName; QSize viewSize; //containing viewport size, nige QScrollArea *scrollArea; //-nige }; #endif clustalx-2.1/SeqNameWidget.cpp0000644000175000017500000003774011470725216016232 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 20-02-07,Nigel Brown(EMBL): made getExtraRowNum() a const member so it can * be called inside sizeHint(); added eventFilter() to catch containing * viewport resize events; added single setMaxNameLength() member to compute * maximum typeset text width; added computeSize() member to determine final * widget size from text size or containing viewport size; added * horizontalMargin member. Visual effect of all this is to extend widget out * to boundaries of viewport, allowing justification, colouring of whole * region, and highlighting out to full width of viewport. * * 12-03-07,Nigel Brown(EMBL): changed background colour to very pale grey. * * 11-04-07,Nigel Brown(EMBL): Modified mousePressEvent(), keyPressEvent() and * keyReleaseEvent() to extended sequence selection to allow [shift]-select * for a block of sequences and [ctrl]-select to deselect a single sequence * therein. Added new member 'ctrlPressed'. Initialised 'ctrlPressed' and * 'rowSelected' in constructor. Note: on the Mac [ctrl] is achieved using the * "Option" key (handled transparently by Qt). Behaviour on left-click is: * select: select this row. * [shift]-select: set block from last selected row to this row. * [ctrl]-select: deselect this row. * The member 'rowSelected' stores the last selected row. It is reset to an * invalid value on [ctrl]-select. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. * * 25-04-07,Nigel Brown(EMBL): removed 'shiftPressed', 'ctrlPressed' members * and the keyPressEvent() and keyReleaseEvent() handlers. These are now * handled by the external KeyController class. * * 02-05-07,Nigel Brown(EMBL): Changed mousePressEvent() behaviour to add: * [ctrl]-select: *toggle* this row. * * 09-05-07,Nigel Brown(EMBL): Added drag and [shift]-drag selection * of row indentifiers via new member 'mousePressed', mouseMoveEvent() * and mouseReleaseEvent(); Changed 'rowSelected' to 'firstRow' and * added 'secondRow' store row range information. * * 09-05-07,Nigel Brown(EMBL): Added ensureVisible() call on new * 'scrollArea' member inside mouseMoveEvent() to scroll when * selection extends beyond visible. * * 15-05-07,Nigel Brown(EMBL): mousePressEvent(), mouseMoveEvent() now test * numSequencesSoFar before handling event. * * 27-06-07,Nigel Brown(EMBL): For screen layout balancing: renamed * numSequencesSoFar as numSequences, added numSequences2, which records the * number of sequences in the *other* profile window; Added setNumSequences2() * and changed computeSize() to compute height based on the *larger* of * numSequences and numSequences2. * ****************************************************************************/ #include #include "SeqNameWidget.h" #include "KeyController.h" //- nige #include "clustalW/alignment/Alignment.h" const int DEFAULT_MAX_NAME_LENGTH = 15; const QColor DEFAULT_BACKGROUND_COLOUR(0xF9,0xF9,0xF9); /** * This is the constructor for a SeqNameWidget. It sets up all the initial parameters. * @param num The number of sequence names to be displayed. * @param boxsize The size of the box to contain the names. * @param *parent The parent widget. This is optional. */ SeqNameWidget::SeqNameWidget(int boxsize, QFont* font, QWidget *parent) : QWidget(parent), firstSeq(0), lastSeq(0), nHead(0), boxSize(boxsize) { /* * Set up initial values */ isSet = false; showNames = false; mousePressed = false; //- nige setMouseTracking(true); offSetFromTop = 2; //it is 2 in AlignmentWidget horizontalMargin = 5; //- nige firstRow = -1; //- nige secondRow = -1; //- nige alignment = 0; secStructName = ""; numSequences = 0; numSequences2 = 0; //nige viewSize = QSize(0,0); /* * Set the font */ displayFont = *font; setMaxNameLength(); /* * Set up the colors to be used in displaying the alignment */ selectedColor = new QColor; selectedColor->setRgb(180,228,254); backGroundGrey = new QColor; backGroundGrey->setRgb(221,221,221); /* * Set the focus policy to allow the widget to recieve keyboard events */ setFocusPolicy( Qt::StrongFocus ); } void SeqNameWidget::setNames(clustalw::Alignment* align) { alignment = align; isSet = true; if(align != 0) { numSequences = align->getNumSeqs(); selectedRows.clear(); } selectedRows.resize(numSequences); updateSizeHint(); //- may not be necessary now, 20-02-07,nige } void SeqNameWidget::updateDisplay(int fSeq, int lSeq, int nhead, QString ssName) { if(fSeq != 0 && lSeq != 0) { isSet = true; showNames = true; nHead = nhead; secStructName = ssName; firstSeq = fSeq; lastSeq = lSeq; numSequences = lastSeq - firstSeq + 1; selectedRows.clear(); selectedRows.resize(numSequences); setMaxNameLength(); //recompute width } else { isSet = false; showNames = false; nHead = 0; secStructName = ""; firstSeq = fSeq; lastSeq = lSeq; numSequences = 0; selectedRows.clear(); selectedRows.resize(numSequences); updateSizeHint(); //- may not be necessary now, 20-02-07,nige } resize(computeSize()); update(); } QSize SeqNameWidget::computeSize() const { if (! alignment) return viewSize; int extraRow = getExtraRowNum(); int width = DEFAULT_MAX_NAME_LENGTH; int maxNumSeqs = (numSequences2 > numSequences ? numSequences2 : numSequences); int height = (maxNumSeqs + extraRow) * boxSize; if (maxNameLength > width) { width = maxNameLength; } width += horizontalMargin * 2; if (width < viewSize.width()) width = viewSize.width(); if (height < viewSize.height()) height = viewSize.height(); //printf("SNW::computeSize(%0x, w=%d, h=%d) [mnl=%d; bh=%d] [ns=%d:%d/%d]\n", // (int)this, width, height, maxNameLength, boxSize, maxNumSeqs, numSequences, numSequences2); return QSize(width, height); } /** * This function overrides the parents sizeHint function. It gives an idea of * the size this widget needs. */ //- may not be necessary now, 20-02-07,nige QSize SeqNameWidget::sizeHint() const { QSize newSize = computeSize(); //printf("SeqNameWidget::sizeHint(%0x, w=%d, h=%d) [mnl=%d; bh=%d]\n", // (int)this, newSize.width(), newSize.height(), maxNameLength, boxSize); return newSize; } //- may not be necessary now, 20-02-07,nige void SeqNameWidget::updateSizeHint() { QSize newSize = computeSize(); //printf("SeqNameWidget::updateSizeHint(%0x, w=%d, h=%d) [mnl=%d; bh=%d]\n", // (int)this, newSize.width(), newSize.height(), maxNameLength, boxSize); resize(newSize); } /** * This function is an event handler. It finds the sequence name that has been clicked. * @param *event A mouse event. * * 11-04-07,nige: extended to add [shift]-select and [ctrl]-select. */ void SeqNameWidget::mousePressEvent(QMouseEvent *event) { if (numSequences > 0 && event->button() == Qt::LeftButton) { mousePressed = true; KeyController *kbd = KeyController::Instance(); /* find which row has been selected */ int newRow = (event->y() / boxSize); newRow -= getExtraRowNum(); /* starting over? */ if (firstRow < 0) firstRow = secondRow = newRow; if (0 <= newRow && newRow < (int)selectedRows.size()) { if (kbd->shiftPressed()) { /* select a range of rows */ if (firstRow < newRow) { for (int i = firstRow; i <= newRow; i++) { selectedRows[i] = 1; } } else { for (int i = firstRow; i >= newRow; i--) { selectedRows[i] = 1; } } secondRow = newRow; } else if (kbd->ctrlPressed()) { /* nige: toggle */ if (selectedRows[newRow] == 0) { /* select this row and leave firstRow */ selectedRows[newRow] = 1; } else { /* deselect this row and clear firstRow */ selectedRows[newRow] = 0; firstRow = -1; } } else { /* deselect previous selection and select this row */ for (int i = 0; i < numSequences; i++) { selectedRows[i] = 0; } selectedRows[newRow] = 1; firstRow = newRow; } } update(); return; } QWidget::mousePressEvent(event); } void SeqNameWidget::mouseReleaseEvent(QMouseEvent *event) //nige { if (event->button() == Qt::LeftButton) { mousePressed = false; update(); return; } QWidget::mouseReleaseEvent(event); } void SeqNameWidget::mouseMoveEvent(QMouseEvent *event) //nige { KeyController *kbd = KeyController::Instance(); if (numSequences > 0 && mousePressed && !kbd->ctrlPressed()) { //scroll if necessary to continue selection scrollArea->ensureVisible(event->x(), event->y()); //which row? don't overshoot either end int newRow = (event->y() / boxSize) - getExtraRowNum(); if (newRow < 0) newRow = 0; if (newRow >= numSequences) newRow = numSequences - 1; //printf("%d -> %d, %d\n", newRow, firstRow, secondRow); if (firstRow <= secondRow) { if (newRow < secondRow) { //deselect between newRow and secondRow: // F === N === S -> F === N --- S for (int i = newRow+1; i <= secondRow; i++) { //cout << "case 1\n"; selectedRows[i] = 0; } } else { //reselect from firstRow to newRow // F === S --- N -> F === S === N for (int i = firstRow; i <= newRow; i++) { //cout << "case 2\n"; selectedRows[i] = 1; } } } else { if (secondRow < newRow) { //deselect between newRow and secondRow: // S === N === F -> S --- N === F for (int i = secondRow; i < newRow; i++) { //cout << "case 3\n"; selectedRows[i] = 0; } } else { //reselect from firstRow to newRow // N --- S === F -> N === S === F for (int i = newRow; i <= firstRow; i++) { //cout << "case 4\n"; selectedRows[i] = 1; } } } secondRow = newRow; update(); return; } QWidget::mouseMoveEvent(event); } /* 16-02-07,nige: track the containing viewport's size, then pass event on */ bool SeqNameWidget::eventFilter(QObject *o, QEvent *e) { //printf("SeqNameWidget::eventFilter(%0x, %d)\n", (int)this, e->type()); if (parentWidget() && parentWidget() == o) { if (e->type() == QEvent::Resize) { QResizeEvent *resizeEvent = static_cast(e); //record known viewport size viewSize = resizeEvent->size(); QSize newSize = computeSize(); resize(newSize); //printf("SeqNameWidget::eventFilter(%0x, resize) nw=%d nh=%d, mnl=%d\n", // (int)this, newSize.width(), newSize.height(), maxNameLength); //fall through to allow the viewport to respond return false; } } return false; } /** * This is an event handler. It repaints the widget when it needs to be repainted. * @param *event A paint event. */ void SeqNameWidget::paintEvent(QPaintEvent *event) { QPainter painter(this); painter.fillRect(event->rect(), QBrush(DEFAULT_BACKGROUND_COLOUR)); painter.setFont(displayFont); QRect redrawRect = event->rect(); // Set up the begin and end rows. int extra = getExtraRowNum(); int beginRow = 0; int endRow = numSequences - 1 + extra; int currentRow = firstSeq; //width of text drawing area int textWidth = width() - horizontalMargin * 2; QFontMetrics fontMetrics(displayFont); painter.setPen(QPen(Qt::black)); //printf("SeqNameWidget::paintEvent(%0x, w=%d, h=%d) [mnl=%d; bh=%d]\n", // (int)this, width(), height(), maxNameLength, boxSize); if (firstSeq != 0 && isSet && alignment && showNames) { for (int row = beginRow; row <= endRow; ++row) { currentRow = firstSeq + row - extra; if (row == 0) { painter.fillRect(0, 0, width(), boxSize, QBrush(backGroundGrey->rgb())); } else if (nHead == 1 && row == 1) { painter.fillRect(0, row * boxSize, width(), boxSize, QBrush(backGroundGrey->rgb())); painter.drawText(QRect(horizontalMargin, row * boxSize + offSetFromTop, textWidth, boxSize), Qt::AlignRight, secStructName); } else { // If the row has been selected, then highlight it. if ((int)selectedRows.size() > row - extra && selectedRows[row - extra] == 1) { painter.fillRect(0, row * boxSize, width(), boxSize, QBrush(selectedColor->rgb())); } painter.drawText(QRect(horizontalMargin, row * boxSize + offSetFromTop, textWidth, boxSize), Qt::AlignRight, QString(alignment->getName(currentRow).c_str())); } } } } /* * Recompute maxNameLength based on the lengths of the identifier strings * typeset in the current font at the current size, or * DEFAULT_MAX_NAME_LENGTH, whichever is larger. * * Created: 16-02-07,Nigel Brown(EMBL) **/ int SeqNameWidget::setMaxNameLength() { //printf("setMaxNameLength(%0x)\n", (int)this); maxNameLength = 0; if (alignment) { QFontMetrics fontMetrics(displayFont); clustalw::Alignment::NameIterator i; i.begin(alignment); while (! i.end()) { string name = i.next(); //cout << "name: " << name << endl; int length = fontMetrics.width(QString(name.c_str())); if (length > maxNameLength) maxNameLength = length; } } if (maxNameLength < 1) maxNameLength = DEFAULT_MAX_NAME_LENGTH; return maxNameLength; } void SeqNameWidget::changeBoxSizeAndFontSize(int _boxSize, QFont* font) { boxSize = _boxSize; displayFont = *font; setMaxNameLength(); resize(computeSize()); update(); } void SeqNameWidget::clearSequenceSelection() { int sizeOfSelected = selectedRows.size(); for(int i = 0; i < sizeOfSelected; i++) { selectedRows[i] = 0; } update(); } void SeqNameWidget::selectAllSequences() { int sizeOfSelected = selectedRows.size(); for(int i = 0; i < sizeOfSelected; i++) { selectedRows[i] = 1; } update(); } int SeqNameWidget::getNumberOfSeqsSelected() { int num = 0; int sizeOfSelected = selectedRows.size(); for(int i = 0; i < sizeOfSelected; i++) { if(selectedRows[i] == 1) { num++; } } return num++; } int SeqNameWidget::getExtraRowNum() const //-nige { if (nHead != 1) return 1; return 2; } //nige 26-06-07 void SeqNameWidget::setNumSequences2(int count) { //printf("RECV(SNW, setNumSequences2) %0x (%d)\n", (int)this, count); numSequences2 = count; } clustalx-2.1/SecStructOptions.h0000644000175000017500000000246011470725216016464 0ustar ddineenddineen#ifndef SECSTRUCTOPTIONS_H #define SECSTRUCTOPTIONS_H #include class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class QLineEdit; class SecStructOptions : public QDialog { Q_OBJECT public: SecStructOptions(); protected: private slots: void accept(); private: void setUpCombo(); void setUpOutput(); void setUpOtherParams(); QPushButton* okButton; QComboBox* useProfile1SS; QComboBox* useProfile2SS; QLabel* useProfile1SSLabel; QLabel* useProfile2SSLabel; QGroupBox* outputGridBox; QGroupBox* ssGridBox; QGroupBox* ssParamsGridBox; QGroupBox* verticalBox; QCheckBox* secStruct; QCheckBox* gapMask; QLineEdit* helixGapPen; QLineEdit* strandGapPen; QLineEdit* loopGapPen; QLineEdit* ssTerminalPen; QLineEdit* helixTerminalPosWithin; QLineEdit* helixTerminalPosOutside; QLineEdit* strandTerminalPosWithin; QLineEdit* strandTerminalPosOutside; QLabel* helixGapPenLabel; QLabel* strandGapPenLabel; QLabel* loopGapPenLabel; QLabel* ssTerminalPenLabel; QLabel* helixTerminalPosWithinLabel; QLabel* helixTerminalPosOutsideLabel; QLabel* strandTerminalPosWithinLabel; QLabel* strandTerminalPosOutsideLabel; }; #endif clustalx-2.1/SecStructOptions.cpp0000644000175000017500000002103211470725216017013 0ustar ddineenddineen#include #include #include #include #include #include #include #include #include #include #include "SecStructOptions.h" #include "clustalW/general/userparams.h" SecStructOptions::SecStructOptions() { setWindowTitle("Secondary Structure Options"); QVBoxLayout* mainLayout = new QVBoxLayout; outputGridBox = new QGroupBox("Output"); ssGridBox = new QGroupBox; ssParamsGridBox = new QGroupBox; verticalBox = new QGroupBox; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Minimum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpCombo(); setUpOutput(); setUpOtherParams(); mainLayout->addWidget(okButton); mainLayout->addWidget(ssGridBox); mainLayout->addWidget(outputGridBox); mainLayout->addWidget(ssParamsGridBox); setLayout(mainLayout); } void SecStructOptions::setUpCombo() { QGridLayout *grid = new QGridLayout; useProfile1SS = new QComboBox; useProfile1SS->addItem("Yes"); useProfile1SS->addItem("No"); if(clustalw::userParameters->getUseSS1()) { useProfile1SS->setCurrentIndex(0); } else { useProfile1SS->setCurrentIndex(1); } useProfile2SS = new QComboBox; useProfile2SS->addItem("Yes"); useProfile2SS->addItem("No"); if(clustalw::userParameters->getUseSS2()) { useProfile2SS->setCurrentIndex(0); } else { useProfile2SS->setCurrentIndex(1); } useProfile1SSLabel = new QLabel("Use profile 1 secondary structure / penalty mask"); useProfile2SSLabel = new QLabel("Use profile 2 secondary structure / penalty mask"); grid->addWidget(useProfile1SSLabel, 0, 0); grid->addWidget(useProfile1SS, 0, 1); grid->addWidget(useProfile2SSLabel, 1, 0); grid->addWidget(useProfile2SS, 1, 1); ssGridBox->setLayout(grid); } void SecStructOptions::setUpOutput() { QGridLayout *grid = new QGridLayout; secStruct = new QCheckBox("Secondary Structure"); gapMask = new QCheckBox("Gap Penalty Mask"); if(clustalw::userParameters->getOutputStructPenalties() == clustalw::OUTSECST) { secStruct->setChecked(true); gapMask->setChecked(false); } else if(clustalw::userParameters->getOutputStructPenalties() == clustalw::OUTGAP) { gapMask->setChecked(true); secStruct->setChecked(false); } else if(clustalw::userParameters->getOutputStructPenalties() == clustalw::OUTBOTH) { secStruct->setChecked(true); gapMask->setChecked(true); } else { secStruct->setChecked(false); gapMask->setChecked(false); } grid->addWidget(secStruct, 0, 0); grid->addWidget(gapMask, 0, 1); outputGridBox->setLayout(grid); } void SecStructOptions::setUpOtherParams() { QString myNum; helixGapPen = new QLineEdit; myNum.setNum(clustalw::userParameters->getHelixPenalty()); helixGapPen->setText(myNum); strandGapPen = new QLineEdit; myNum.setNum(clustalw::userParameters->getStrandPenalty()); strandGapPen->setText(myNum); loopGapPen = new QLineEdit; myNum.setNum(clustalw::userParameters->getLoopPenalty()); loopGapPen->setText(myNum); ssTerminalPen = new QLineEdit; myNum.setNum(clustalw::userParameters->getHelixEndPenalty()); ssTerminalPen->setText(myNum); helixTerminalPosWithin = new QLineEdit; myNum.setNum(clustalw::userParameters->getHelixEndMinus()); helixTerminalPosWithin->setText(myNum); helixTerminalPosOutside = new QLineEdit; myNum.setNum(clustalw::userParameters->getHelixEndPlus()); helixTerminalPosOutside->setText(myNum); strandTerminalPosWithin = new QLineEdit; myNum.setNum(clustalw::userParameters->getStrandEndMinus()); strandTerminalPosWithin->setText(myNum); strandTerminalPosOutside = new QLineEdit; myNum.setNum(clustalw::userParameters->getStrandEndPlus()); strandTerminalPosOutside->setText(myNum); helixGapPenLabel = new QLabel("Helix Gap Penalty [1-9]:"); strandGapPenLabel = new QLabel("Strand Gap Penalty [1-9]:"); loopGapPenLabel = new QLabel("Loop Gap Penalty [1-9]:"); ssTerminalPenLabel = new QLabel("Secondary Structure Terminal Penalty [1-9]:"); helixTerminalPosWithinLabel = new QLabel("Helix Terminal Positions [1-9] within:"); helixTerminalPosOutsideLabel = new QLabel("outside:"); strandTerminalPosWithinLabel = new QLabel("Strand Terminal Positions [1-9] within:"); strandTerminalPosOutsideLabel = new QLabel("outside:"); QGridLayout *grid = new QGridLayout; grid->addWidget(helixGapPenLabel, 0, 0); grid->addWidget(helixGapPen, 0, 1); grid->addWidget(strandGapPenLabel, 1, 0); grid->addWidget(strandGapPen, 1, 1); grid->addWidget(loopGapPenLabel, 2, 0); grid->addWidget(loopGapPen, 2, 1); grid->addWidget(ssTerminalPenLabel, 3, 0); grid->addWidget(ssTerminalPen, 3, 1); grid->addWidget(helixTerminalPosWithinLabel, 4, 0); grid->addWidget(helixTerminalPosWithin, 4, 1); grid->addWidget(helixTerminalPosOutsideLabel, 5, 0); grid->addWidget(helixTerminalPosOutside, 5, 1); grid->addWidget(strandTerminalPosWithinLabel, 6, 0); grid->addWidget(strandTerminalPosWithin, 6, 1); grid->addWidget(strandTerminalPosOutsideLabel, 7, 0); grid->addWidget(strandTerminalPosOutside, 7, 1); ssParamsGridBox->setLayout(grid); } void SecStructOptions::accept() { bool ok; // Using secondary structure information?? if(useProfile1SS->currentIndex() == 0) { clustalw::userParameters->setUseSS1(true); } else { clustalw::userParameters->setUseSS1(false); } if(useProfile2SS->currentIndex() == 0) { clustalw::userParameters->setUseSS2(true); } else { clustalw::userParameters->setUseSS2(false); } // Output secondary structure information??? if(secStruct->isChecked() && !gapMask->isChecked()) { clustalw::userParameters->setOutputStructPenalties(clustalw::OUTSECST); } else if(gapMask->isChecked() && !secStruct->isChecked()) { clustalw::userParameters->setOutputStructPenalties(clustalw::OUTGAP); } else if(secStruct->isChecked() && gapMask->isChecked()) { clustalw::userParameters->setOutputStructPenalties(clustalw::OUTBOTH); } else { clustalw::userParameters->setOutputStructPenalties(clustalw::OUTNONE); } // Now check all the numbers in the boxes are 1-9 QString helixGapPenString = helixGapPen->text(); int pen = helixGapPenString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setHelixPenalty(pen); } QString strandGapPenString = strandGapPen->text(); pen = strandGapPenString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setStrandPenalty(pen); } QString loopGapPenString = loopGapPen->text(); pen = loopGapPenString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setLoopPenalty(pen); } QString ssTerminalPenString = ssTerminalPen->text(); pen = ssTerminalPenString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setHelixEndPenalty(pen); } QString helixTerminalPosWithinString = helixTerminalPosWithin->text(); pen = helixTerminalPosWithinString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setHelixEndMinus(pen); } QString helixTerminalPosOutsideString = helixTerminalPosOutside->text(); pen = helixTerminalPosOutsideString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setHelixEndPlus(pen); } QString strandTerminalPosWithinString = strandTerminalPosWithin->text(); pen = strandTerminalPosWithinString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setStrandEndMinus(pen); } QString strandTerminalPosOutsideString = strandTerminalPosOutside->text(); pen = strandTerminalPosOutsideString.toInt(&ok); if(ok && pen >= 1 && pen <= 9) { clustalw::userParameters->setStrandEndPlus(pen); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/SearchForString.h0000644000175000017500000000124611470725216016235 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #ifndef SEARCHFORSTRING_H #define SEARCHFORSTRING_H #include class QPushButton; class QGroupBox; class QLineEdit; class QVBoxLayout; class SearchForString : public QDialog { Q_OBJECT public: SearchForString(); QString getSearchString(); signals: void searchFromStartSignal(); void searchAgainSignal(); protected: private slots: void cancel(); private: QVBoxLayout* mainLayout; QPushButton* searchFromStart; QPushButton* searchAgain; QPushButton* closeButton; QLineEdit* searchString; }; #endif clustalx-2.1/SearchForString.cpp0000644000175000017500000000245411470725216016572 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #include "SearchForString.h" #include #include #include #include SearchForString::SearchForString() { searchString = new QLineEdit(""); searchFromStart = new QPushButton(tr("Search From Start")); searchFromStart->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); searchAgain = new QPushButton(tr("Search Again")); searchAgain->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); closeButton = new QPushButton(tr("Close")); closeButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(searchFromStart, SIGNAL(clicked()), this, SIGNAL(searchFromStartSignal())); connect(searchAgain, SIGNAL(clicked()), this, SIGNAL(searchAgainSignal())); connect(closeButton, SIGNAL(clicked()), this, SLOT(cancel())); mainLayout = new QVBoxLayout; mainLayout->addWidget(searchString); mainLayout->addWidget(searchFromStart); mainLayout->addWidget(searchAgain); mainLayout->addWidget(closeButton); setLayout(mainLayout); } QString SearchForString::getSearchString() { return searchString->text(); } void SearchForString::cancel() { setResult(QDialog::Rejected); close(); } clustalx-2.1/SaveSeqFile.h0000644000175000017500000000322111470725216015334 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #ifndef SAVESEQFILE_H #define SAVESEQFILE_H #include #include "ClustalQtParams.h" class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class QVBoxLayout; class QGridLayout; class QLineEdit; class SaveSeqFile : public QDialog { Q_OBJECT public: SaveSeqFile(QString inputFileName, int type, rangeParams params); QString getFileName(){return fileName;} protected: private slots: void accept(); void cancel(); void browse(); void changeUseRange(); private: void setUpLayout(); void setUpRadioButtons(); void setUpOtherParams(); QVBoxLayout* mainLayout; QGridLayout* grid; QGridLayout* otherParamLayout; QPushButton* okButton; QPushButton* cancelButton; QPushButton* browseButton; QGroupBox* fileGridBox; QGroupBox* otherParams; QGroupBox* verticalBox; QCheckBox* clustal; QCheckBox* gcg; QCheckBox* gde; QCheckBox* fasta; QCheckBox* nbrf; QCheckBox* phylip; QCheckBox* nexus; QCheckBox* useRangeInfo; QComboBox* gdeOutputCase; QComboBox* clustalSequenceNumbers; QComboBox* rangeNumbers; QLabel* gdeOutputCaseLabel; QLabel* clustalSequenceNumbersLabel; QLabel* rangeFromLabel; QLabel* rangeToLabel; QLabel* rangeNumbersLabel; QLabel* saveAsLabel; QLineEdit* rangeFrom; QLineEdit* rangeTo; QLineEdit* saveAs; int type; QString title; QString label; QString fileName; int firstSeq, lastSeq, firstRes, lastRes; }; #endif clustalx-2.1/SaveSeqFile.cpp0000644000175000017500000003014211470725216015671 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * 12-03-07,Nigel Brown(EMBL): fixed range selection (bz0029) in accept(), * which was miscalculating the end point of the selection. Dialog no longer * exits and outputs default range when given bad range values; changed * behaviour to allow user to correct input mistake and added more specific * textbox warnings. Also added check that user has selected at least one * output format and added corresponding textbox warning. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #include #include #include #include #include #include #include #include #include #include "SaveSeqFile.h" #include "clustalW/general/userparams.h" #include "clustalW/general/utils.h" #include "clustalW/alignment/AlignmentOutput.h" #include "FileDialog.h" SaveSeqFile::SaveSeqFile(QString inputFileName, int _type, rangeParams params) : type(_type) { clustalw::userParameters->setRangeFromToSet(false); clustalw::userParameters->setSeqRange(false); firstSeq = params.firstSeq; lastSeq = params.lastSeq; firstRes = params.firstRes; lastRes = params.lastRes; if(type == Sequences) { title = "SAVE SEQUENCES"; label = "Save Sequences As: (File extension will be appended)"; } else if(type == Profile1) { title = "SAVE PROFILE"; label = "Save Profile 1 As: (File extension will be appended)"; } else if(type == Profile2) { title = "SAVE PROFILE"; label = "Save Profile 2 As: (File extension will be appended)"; } else { title = ""; label = ""; } if(inputFileName.size() > 0) { std::string path = ""; clustalw::utilityObject->getPath(inputFileName.toStdString(), &path); if(path.size() > 0) { fileName = QString(path.c_str()); int len = fileName.size(); if(fileName[len -1] == '.') { fileName[len -1] = ' '; fileName = fileName.trimmed(); } } else { fileName = QDir::currentPath() + QString(QDir::separator()) + "temp"; } } fileGridBox = new QGroupBox("Format"); otherParams = new QGroupBox; mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); cancelButton = new QPushButton(tr("Cancel")); cancelButton->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); connect(cancelButton, SIGNAL(clicked()), this, SLOT(cancel())); browseButton = new QPushButton(tr("Browse")); browseButton->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); connect(browseButton, SIGNAL(clicked()), this, SLOT(browse())); setUpLayout(); mainLayout->addWidget(fileGridBox); mainLayout->addWidget(otherParams); setLayout(mainLayout); } void SaveSeqFile::setUpLayout() { setUpRadioButtons(); grid = new QGridLayout; grid->addWidget(clustal, 0, 0); grid->addWidget(nbrf, 0, 1); grid->addWidget(gcg, 1, 0); grid->addWidget(phylip, 1, 1); grid->addWidget(gde, 2, 0); grid->addWidget(nexus, 2, 1); grid->addWidget(fasta, 3, 0); fileGridBox->setLayout(grid); setUpOtherParams(); otherParamLayout = new QGridLayout; otherParamLayout->addWidget(gdeOutputCaseLabel, 0, 0); otherParamLayout->addWidget(gdeOutputCase, 0, 1); otherParamLayout->addWidget(clustalSequenceNumbersLabel, 1, 0); otherParamLayout->addWidget(clustalSequenceNumbers, 1, 1); otherParamLayout->addWidget(useRangeInfo, 2, 0); otherParamLayout->addWidget(rangeFromLabel, 3, 0); otherParamLayout->addWidget(rangeFrom, 3, 1); otherParamLayout->addWidget(rangeToLabel, 3, 2); otherParamLayout->addWidget(rangeTo, 3, 3); otherParamLayout->addWidget(rangeNumbersLabel, 4, 0); otherParamLayout->addWidget(rangeNumbers, 4, 1); otherParamLayout->addWidget(saveAsLabel, 5, 0); otherParamLayout->addWidget(saveAs, 6, 0, 1, 3); otherParamLayout->addWidget(browseButton, 6, 4); otherParamLayout->addWidget(okButton, 7, 0); otherParamLayout->addWidget(cancelButton, 7, 1); otherParams->setLayout(otherParamLayout); } void SaveSeqFile::setUpRadioButtons() { clustal = new QCheckBox("CLUSTAL format"); gcg = new QCheckBox("GCG/MSF format"); gde = new QCheckBox("GDE format"); fasta = new QCheckBox("FASTA format"); nbrf = new QCheckBox("NBRF/PIR format"); phylip = new QCheckBox("PHYLIP format"); nexus = new QCheckBox("NEXUS format"); if(clustalw::userParameters->getOutputClustal()) { clustal->setChecked(true); } if(clustalw::userParameters->getOutputGCG()) { gcg->setChecked(true); } if(clustalw::userParameters->getOutputGde()) { gde->setChecked(true); } if(clustalw::userParameters->getOutputFasta()) { fasta->setChecked(true); } if(clustalw::userParameters->getOutputNbrf()) { nbrf->setChecked(true); } if(clustalw::userParameters->getOutputPhylip()) { phylip->setChecked(true); } if(clustalw::userParameters->getOutputNexus()) { nexus->setChecked(true); } } void SaveSeqFile::setUpOtherParams() { gdeOutputCase = new QComboBox(); gdeOutputCase->addItem("Lower"); gdeOutputCase->addItem("Upper"); if(clustalw::userParameters->getLowercase()) { gdeOutputCase->setCurrentIndex(0); } else { gdeOutputCase->setCurrentIndex(1); } gdeOutputCaseLabel = new QLabel(tr("GDE output case :")); clustalSequenceNumbers = new QComboBox(); clustalSequenceNumbers->addItem("Off"); clustalSequenceNumbers->addItem("On"); if(clustalw::userParameters->getClSeqNumbers()) { clustalSequenceNumbers->setCurrentIndex(1); } else { clustalSequenceNumbers->setCurrentIndex(0); } clustalSequenceNumbersLabel = new QLabel(tr("CLUSTALW sequence numbers :")); rangeNumbers = new QComboBox(); rangeNumbers->addItem("Off"); rangeNumbers->addItem("On"); if(clustalw::userParameters->getSeqRange()) { clustalSequenceNumbers->setCurrentIndex(1); } else { clustalSequenceNumbers->setCurrentIndex(0); } useRangeInfo = new QCheckBox("Use range information: "); useRangeInfo->setChecked(false); if(!clustalw::userParameters->getRangeFromToSet()) { useRangeInfo->setChecked(false); } else { useRangeInfo->setChecked(true); } connect(useRangeInfo, SIGNAL(clicked()), this, SLOT(changeUseRange())); rangeFrom = new QLineEdit; rangeTo = new QLineEdit; QString num; num = num.setNum(firstRes); rangeFrom->setText(num); num = num.setNum(lastRes); rangeTo->setText(num); rangeFromLabel = new QLabel(tr("Save range from: ")); rangeToLabel = new QLabel(tr(" to: ")); rangeNumbersLabel = new QLabel(tr("Include range numbers: ")); saveAsLabel = new QLabel(label); saveAs = new QLineEdit; saveAs->setText(fileName); changeUseRange(); } void SaveSeqFile::changeUseRange() { if(useRangeInfo->isChecked()) { // Enable rangeFrom->setEnabled(true); rangeTo->setEnabled(true); rangeNumbers->setEnabled(true); } else { // disable rangeFrom->setEnabled(false); rangeTo->setEnabled(false); rangeNumbers->setEnabled(false); } } void SaveSeqFile::browse() { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getSaveFileName(this, tr("Save as")); if(myFile != "") { saveAs->setText(myFile); } } void SaveSeqFile::cancel() { setResult(QDialog::Rejected); close(); } void SaveSeqFile::accept() { // Find out which ones (and how many [nige]) have been checked! int selected = 0; /* If 'userParameters::setOutputWhatever(bool)' returned boolean, this would be 1/6 as long: selected += clustalw::userParameters->setOutputClustal(clustal->isChecked()); selected += clustalw::userParameters->setOutputGCG(gcg->isChecked()); ... */ if (clustal->isChecked()) { clustalw::userParameters->setOutputClustal(true); selected++; } else { clustalw::userParameters->setOutputClustal(false); } if (gcg->isChecked()) { clustalw::userParameters->setOutputGCG(true); selected++; } else { clustalw::userParameters->setOutputGCG(false); } if (gde->isChecked()) { clustalw::userParameters->setOutputGde(true); selected++; } else { clustalw::userParameters->setOutputGde(false); } if (fasta->isChecked()) { clustalw::userParameters->setOutputFasta(true); selected++; } else { clustalw::userParameters->setOutputFasta(false); } if (nbrf->isChecked()) { clustalw::userParameters->setOutputNbrf(true); selected++; } else { clustalw::userParameters->setOutputNbrf(false); } if (phylip->isChecked()) { clustalw::userParameters->setOutputPhylip(true); selected++; } else { clustalw::userParameters->setOutputPhylip(false); } if (nexus->isChecked()) { clustalw::userParameters->setOutputNexus(true); selected++; } else { clustalw::userParameters->setOutputNexus(false); } if (! selected) { QMessageBox::information(this, "", "Please select at least one output format.", QMessageBox::Ok); return; } // if range is selected // Check the range is ok. If it isn't use the default if (useRangeInfo->isChecked()) { if (rangeNumbers->currentIndex() == 0) { clustalw::userParameters->setRangeFromToSet(false); clustalw::userParameters->setSeqRange(false); } else { clustalw::userParameters->setRangeFromToSet(true); clustalw::userParameters->setSeqRange(true); } bool ok1; QString textFrom = rangeFrom->text(); int numFrom = textFrom.toInt(&ok1); bool ok2; QString textTo = rangeTo->text(); int numTo = textTo.toInt(&ok2); if (!ok1 || !ok2) { QMessageBox::information(this, "", "Range values must be integers.", QMessageBox::Ok); return; } if (numFrom < firstRes || numFrom > lastRes || numTo < firstRes || numTo > lastRes) { QMessageBox::information(this, "", "Invalid range.", QMessageBox::Ok); return; } if (numTo < numFrom) { QMessageBox::information(this, "", "Invalid range.", QMessageBox::Ok); return; } clustalw::userParameters->setRangeFrom(numFrom); clustalw::userParameters->setRangeTo(numTo); } // GDE output case if(gdeOutputCase->currentIndex() == 0) { clustalw::userParameters->setLowercase(true); } else { clustalw::userParameters->setLowercase(false); } // Clustalw sequence numbers if(clustalSequenceNumbers->currentIndex() == 1) { clustalw::userParameters->setClSeqNumbers(true); } else { clustalw::userParameters->setClSeqNumbers(false); } // save sequences as fileName = saveAs->text(); int len = fileName.size(); if(fileName == "") { fileName = QDir::currentPath() + "temp.aln"; } else if(fileName[len -1] != '.') { fileName += "."; } setResult(QDialog::Accepted); hide(); } clustalx-2.1/Resources.h0000644000175000017500000000307011470725216015141 0ustar ddineenddineen/** * Implements a singleton that maintains program resources. * The single instance is (re)instantiated on demand like: * Resources *res = Resources::Instance(); * * 24-05-07,Nigel Brown(EMBL): created. */ #ifndef RESOURCES_H #define RESOURCES_H #include #include enum ResourcePathType { DefaultPath, InstallPath, ExecutablePath, HomePath, }; class Resources { public: /* return the Resources singleton */ static Resources *Instance(); /* setters */ void setPathToExecutable(QString pathToFiles); /* getters */ QString getDefaultPath() { return defaultPath; } QString getInstallPath() { return installPath; } QString getExecutablePath() { return executablePath; } QString getHomePath() { return homePath; } std::string findFile(const char *file, const ResourcePathType where = DefaultPath) const; std::string findFile(const std::string &file, const ResourcePathType where = DefaultPath) const; std::string findFile(const QString &file, const ResourcePathType where = DefaultPath) const; QString searchPathsForFile(const QString &fileName) const; std::string searchPathsForFile(const std::string &fileName) const; /* debug */ void dump(); protected: /* hide the constructors */ Resources(); Resources(const Resources&); Resources& operator= (const Resources&); QString dirname(QString path); private: QString defaultPath; QString installPath; QString executablePath; QString homePath; }; #endif //RESOURCES_H clustalx-2.1/Resources.cpp0000644000175000017500000000710111470725216015473 0ustar ddineenddineen/** * Implements a singleton that maintains program resources. * The single instance is (re)instantiated on demand like: * Resources *res = Resources::Instance(); * * 24-05-07,Nigel Brown(EMBL): created. */ #include "Resources.h" #include #include #include // stdlib only for getenv #include #include using namespace std; //environment variables static const char *CLU_INSTALL_DIR = "CLU_INSTALL_DIR"; //return the sole instance Resources *Resources::Instance() { static Resources instance; return &instance; } Resources::Resources() { //defaultPath defaultPath = "."; //executablePath executablePath = "."; //installPath installPath = "."; char *env; if ((env = getenv(CLU_INSTALL_DIR)) != 0) { installPath = QString(env); } //homePath homePath = QDir::homePath(); } void Resources::setPathToExecutable(QString path) { executablePath = dirname(path); } QString Resources::dirname(QString path) { QString tempString; int size = path.size(); tempString = path; //cout << "tempString is\n" << tempString.toStdString() << "\n"; for (int i = size - 1; i > 0; i--) { if (tempString[i] == QDir::separator()) { tempString.chop(size - i); break; } } //cout << "tempString is\n" << tempString.toStdString() << "\n"; return tempString; } void Resources::dump() { printf("%s => %s [%s]\n%s => %s\n%s => %s\n", "installPath", installPath.toStdString().c_str(), CLU_INSTALL_DIR, "executablePath", executablePath.toStdString().c_str(), "homePath", homePath.toStdString().c_str() ); }; string Resources::findFile(const char *file, const ResourcePathType where) const { return findFile(QString(file), where); } string Resources::findFile(const string &file, const ResourcePathType where) const { return findFile(QString(file.c_str()), where); } string Resources::findFile(const QString &file, const ResourcePathType where) const { const QString *path; switch (where) { case InstallPath: path = &installPath; break; case ExecutablePath: path = &executablePath; break; case HomePath: path = &homePath; break; default: path = &defaultPath; break; } QString fileName = *path + QString(QDir::separator()) + file; // AW: replaced file with qfile, as ICC otherwise complains about redeclaration QFile qfile(fileName); qfile.open(QFile::ReadOnly); if (qfile.isOpen()) { qfile.close(); return fileName.toStdString(); } return string(); //not found/readable } /* Search for a (QString) file in a succession of likely locations and * return the full path as (QString). */ QString Resources::searchPathsForFile(const QString &fileName) const { string file = searchPathsForFile(fileName.toStdString()); return QString(file.c_str()); } /* Search for a (string) file in a succession of likely locations and * return the full path as (string). */ string Resources::searchPathsForFile(const string &fileName) const { string file; while (1) { file = findFile(fileName, InstallPath); if (file != "") break; file = findFile(fileName, ExecutablePath); if (file != "") break; file = findFile(fileName, HomePath); if (file != "") break; file = findFile(fileName); if (file != "") break; file = fileName; // give up break; } return file; } clustalx-2.1/README0000644000175000017500000000136311470725217013702 0ustar ddineenddineenPlease see also ftp://ftp.ebi.ac.uk/pub/software/clustalw2/README HOWTO COMPILE THE SOURCE AND INSTALL ---------------------------------------------------------------------- You will need a Unix environment with a working C++ compiler and the Qt developer kit (>=version 4.3) to compile the ClustalX source. For compilation under Windows you will also need to install mingw and cygwin. Type $ qmake and then $ make This will create the clustalX binary. HOWTO INSTALL AFTER COMPILATION ---------------------------------------------------------------------- Put colprot.xml, coldna.xml, colprint.xml, clustalx.hlp and clustalX[.exe] into one folder. Add a link to the binary to your Desktop for example. On Linux you can use the installer script. clustalx-2.1/QTUtility.h0000644000175000017500000000215311470725216015100 0ustar ddineenddineen#ifndef QTUTILITY_H #define QTUTILITY_H #include #include #include #include #include #include #include #include "clustalW/general/clustalw.h" #include "clustalW/general/Utility.h" class QLabel; using namespace std; namespace clustalw { class QTUtility : public Utility { public: QTUtility(QWidget* main); virtual void error( char *msg,...); virtual void error( const char *msg,...); virtual void warning( char *msg,...); virtual void warning( const char *msg,...); virtual char promptForYesNo(char *title,char *prompt); virtual char promptForYesNo(const char *title,char *prompt); virtual void setMainWindowAddress(QWidget* main); virtual void info( char *msg,...); virtual void info( const char *msg,...); virtual void myname( char *myname ); virtual void setInfoLabelPtr(QLabel* ptrToLabelObj); /* Attributes */ private: /* Functions */ /* Attributes */ QWidget* mainwidget; QLabel* infoLabel; }; } #endif clustalx-2.1/QTUtility.cpp0000644000175000017500000000713311470725216015436 0ustar ddineenddineen#include #include #include #include #include #include "QTUtility.h" namespace clustalw { QTUtility::QTUtility(QWidget* main) { mainwidget = main; infoLabel = 0; } void QTUtility::setInfoLabelPtr(QLabel* ptrToLabelObj) { infoLabel = ptrToLabelObj; } /** * * @param msg */ void QTUtility::error( char *msg,...) { va_list ap; char myMessage[1000]; va_start(ap, msg); vsprintf(myMessage, msg, ap); QString myString(myMessage); QString errorString("ERROR: "); myString = errorString + myString; QMessageBox::information(0, "", myString, QMessageBox::Ok); va_end(ap); } void QTUtility::error(const char *msg,...) { va_list ap; char myMessage[1000]; va_start(ap, msg); vsprintf(myMessage, msg, ap); QString myString(myMessage); QString errorString("ERROR: "); myString = errorString + myString; QMessageBox::information(0, "", myString, QMessageBox::Ok); va_end(ap); } /** * * @param msg */ void QTUtility::warning( char *msg,...) { va_list ap; char myMessage[1000]; va_start(ap, msg); vsprintf(myMessage, msg, ap); QString myString(myMessage); QString warningString("WARNING: "); myString = warningString + myString; QMessageBox::information(0, "", myString, QMessageBox::Ok); va_end(ap); } void QTUtility::warning(const char *msg,...) { va_list ap; char myMessage[1000]; va_start(ap, msg); vsprintf(myMessage, msg, ap); QString myString(myMessage); QString warningString("WARNING: "); myString = warningString + myString; QMessageBox::information(0, "", myString, QMessageBox::Ok); va_end(ap); } void QTUtility::info( char *msg,...) { if(infoLabel) { va_list ap; char myMessage[1000]; va_start(ap, msg); fprintf(stdout, "\n"); vfprintf(stdout, msg, ap); vsprintf(myMessage, msg, ap); QString myString(myMessage); infoLabel->setText(myString); QApplication::processEvents();// increases responsiveness va_end(ap); } } void QTUtility::info(const char *msg,...) { if(infoLabel) { va_list ap; char myMessage[1000]; va_start(ap, msg); fprintf(stdout, "\n"); // vfprintf(stdout, msg, ap); vsprintf(myMessage, msg, ap); cout << myMessage; QString myString(myMessage); infoLabel->setText(myString); QApplication::processEvents();// increases responsiveness va_end(ap); } } /** * */ void QTUtility::myname( char *myname) { strcpy(myname, "clustalx\0"); } char QTUtility::promptForYesNo(char *title, char *prompt) { QString promptMessage = QString(title) + "\n" + QString(prompt) + "?"; int answer; answer = QMessageBox::question(mainwidget, "", promptMessage, QMessageBox::Yes, QMessageBox::No); if(answer == QMessageBox::Yes) { return ('y'); } else { return ('n'); } } char QTUtility::promptForYesNo(const char *title, char *prompt) { QString promptMessage = QString(title) + "\n" + QString(prompt) + "?"; int answer; answer = QMessageBox::question(mainwidget, "", promptMessage, QMessageBox::Yes, QMessageBox::No); if(answer == QMessageBox::Yes) { return ('y'); } else { return ('n'); } return 'y'; } void QTUtility::setMainWindowAddress(QWidget* main) { mainwidget = main; } } clustalx-2.1/ProteinGapParameters.h0000644000175000017500000000153011470725216017262 0ustar ddineenddineen#ifndef PROTEINGAPPARAMETERS_H #define PROTEINGAPPARAMETERS_H #include class QPushButton; class QGroupBox; class QRadioButton; class QComboBox; class QLabel; class QLineEdit; class ProteinGapParameters : public QDialog { Q_OBJECT public: ProteinGapParameters(); protected: private slots: void accept(); private: void setUpLayout(); void setUpOtherParams(); void setInitialParameterValues(); QPushButton* okButton; QGroupBox* otherParams; QComboBox* resSpecPen; QComboBox* hydSpecPen; QComboBox* endGapSeparation; QLineEdit* hydroPhilicRes; QLineEdit* gapSeparationDist; QLabel* resSpecPenLabel; QLabel* hydSpecPenLabel; QLabel* endGapSeparationLabel; QLabel* hydroPhilicResLabel; QLabel* gapSeparationDistLabel; int gapDist; }; #endif clustalx-2.1/ProteinGapParameters.cpp0000644000175000017500000001117711470725216017625 0ustar ddineenddineen#include #include #include #include #include #include #include #include #include #include #include #include "ProteinGapParameters.h" #include "clustalW/general/userparams.h" ProteinGapParameters::ProteinGapParameters() : gapDist(4) { setWindowTitle("Protein Gap Parameters"); otherParams = new QGroupBox; QVBoxLayout* mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpLayout(); mainLayout->addWidget(okButton); mainLayout->addWidget(otherParams); setLayout(mainLayout); } void ProteinGapParameters::setUpLayout() { setUpOtherParams(); QGridLayout *otherParamLayout = new QGridLayout; otherParamLayout->addWidget(resSpecPenLabel, 0, 0); otherParamLayout->addWidget(resSpecPen, 0, 1); otherParamLayout->addWidget(hydSpecPenLabel, 1, 0); otherParamLayout->addWidget(hydSpecPen, 1, 1); otherParamLayout->addWidget(hydroPhilicResLabel, 2, 0); otherParamLayout->addWidget(hydroPhilicRes, 2, 1); otherParamLayout->addWidget(gapSeparationDistLabel, 3, 0); otherParamLayout->addWidget(gapSeparationDist, 3, 1); otherParamLayout->addWidget(endGapSeparationLabel, 4, 0); otherParamLayout->addWidget(endGapSeparation, 4, 1); otherParams->setLayout(otherParamLayout); } void ProteinGapParameters::setUpOtherParams() { resSpecPen = new QComboBox(); resSpecPen->addItem("On"); resSpecPen->addItem("Off"); resSpecPenLabel = new QLabel(tr("Residue-specific Penalties")); hydSpecPen = new QComboBox(); hydSpecPen->addItem("On"); hydSpecPen->addItem("Off"); hydSpecPenLabel = new QLabel(tr("Hydrophilic Penalties")); endGapSeparation = new QComboBox(); endGapSeparation->addItem("Off"); endGapSeparation->addItem("On"); endGapSeparationLabel = new QLabel(tr("End Gap Separation")); hydroPhilicRes = new QLineEdit; hydroPhilicResLabel = new QLabel(tr("Hydrophilic Residues: ")); gapSeparationDist = new QLineEdit; gapSeparationDistLabel = new QLabel(tr("Gap Separation Distance [0-100]: ")); setInitialParameterValues(); } void ProteinGapParameters::setInitialParameterValues() { if(clustalw::userParameters->getNoPrefPenalties()) { resSpecPen->setCurrentIndex(1); } else { resSpecPen->setCurrentIndex(0); } if(clustalw::userParameters->getNoHydPenalties()) { hydSpecPen->setCurrentIndex(1); } else { hydSpecPen->setCurrentIndex(0); } if(clustalw::userParameters->getUseEndGaps()) { endGapSeparation->setCurrentIndex(1); } else { endGapSeparation->setCurrentIndex(0); } QString hydRes(clustalw::userParameters->getHydResidues().c_str()); hydroPhilicRes->setText(hydRes); QString num; num = num.setNum(clustalw::userParameters->getGapDist()); gapSeparationDist->setText(num); } void ProteinGapParameters::accept() { // We have 2 line edit fields. These must be validated. // The first one must contain only letters. QString hydRes = hydroPhilicRes->text(); QString hydResEdited; QChar res; // The second one must contain a number between 0 and 100. for(int i = 0; i < hydRes.size(); i++) { res = hydRes[i]; if(res.isLetter()) { hydResEdited += res; } } hydRes = hydResEdited; // Set the hydrophilic residues clustalw::userParameters->setHydResidues(hydRes.toStdString()); bool ok; QString gapDist = gapSeparationDist->text(); int dist; dist = gapDist.toInt(&ok); if(ok && dist >= 0 && dist <= 100) { clustalw::userParameters->setGapDist(dist); } // Now the combo boxes if(resSpecPen->currentIndex() == 1) { clustalw::userParameters->setNoPrefPenalties(true); } else { clustalw::userParameters->setNoPrefPenalties(false); } if(hydSpecPen->currentIndex() == 1) { clustalw::userParameters->setNoHydPenalties(true); } else { clustalw::userParameters->setNoHydPenalties(false); } if(endGapSeparation->currentIndex() == 1) { clustalw::userParameters->setUseEndGaps(true); } else { clustalw::userParameters->setUseEndGaps(false); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/PostscriptFileParams.h0000644000175000017500000000305511470725216017310 0ustar ddineenddineen#ifndef POSTSCRIPTFILEPARAMS_H #define POSTSCRIPTFILEPARAMS_H #include #include "ClustalQtParams.h" class PostscriptFileParams { public: PostscriptFileParams(QString _writeSeqsToFile, QString _psColorsFile, int _pageSize, int _orientation, bool _printHeader, bool _printQualityCurve, bool _printRuler, bool _printResidueNumber, bool _resizeToFitPage, int _printFromPos, int _printToPos, int _useBlockLen); QString getWriteSeqsToFile(){return writeSeqsToFile;} QString getPSColorsFile(){return psColorsFile;} int getPageSize(){return pageSize;} int getOrientation(){return orientation;} bool getPrintHeader(){return printHeader;} bool getPrintQualityCurve(){return printQualityCurve;} bool getPrintRuler(){return printRuler;} bool getPrintResidueNumber(){return printResidueNumber;} bool getResizeToFitPage(){return resizeToFitPage;} int getPrintFromPos(){return printFromPos;} int getPrintToPos(){return printToPos;} int getUseBlockLen(){return useBlockLen;} private: QString writeSeqsToFile; QString psColorsFile; int pageSize; int orientation; bool printHeader; bool printQualityCurve; bool printRuler; bool printResidueNumber; bool resizeToFitPage; int printFromPos; int printToPos; int useBlockLen; }; #endif clustalx-2.1/PostscriptFileParams.cpp0000644000175000017500000000140711470725216017642 0ustar ddineenddineen#include "PostscriptFileParams.h" PostscriptFileParams::PostscriptFileParams(QString _writeSeqsToFile, QString _psColorsFile, int _pageSize, int _orientation, bool _printHeader, bool _printQualityCurve, bool _printRuler, bool _printResidueNumber, bool _resizeToFitPage, int _printFromPos, int _printToPos, int _useBlockLen) : writeSeqsToFile(_writeSeqsToFile), psColorsFile(_psColorsFile), pageSize(_pageSize), orientation(_orientation), printHeader(_printHeader), printQualityCurve(_printQualityCurve), printRuler(_printRuler), printResidueNumber(_printResidueNumber), resizeToFitPage(_resizeToFitPage), printFromPos(_printFromPos), printToPos(_printToPos), useBlockLen(_useBlockLen) { } clustalx-2.1/PairwiseParams.h0000644000175000017500000000516111470725216016121 0ustar ddineenddineen#ifndef PAIRWISEPARAMS_H #define PAIRWISEPARAMS_H #include #include class QPushButton; class QGroupBox; class QRadioButton; class QLabel; class QButtonGroup; class QTabWidget; class QLineEdit; class QHBoxLayout; class QVBoxLayout; class QGridLayout; class QShowEvent; class PairwiseParams : public QDialog { Q_OBJECT public: PairwiseParams(); protected: void showEvent(QShowEvent* event); private slots: void accept(); void slowBrowseProtein(); void slowBrowseDNA(); void userProteinClicked(); void userDNAClicked(); private: void setUpSlowTab(); void setUpSlowTextAreas(); void setUpSlowProteinWeightMat(); void setUpSlowDNAWeightMat(); void setUpFastTab(); void setInitialProteinMat(); void setInitialDNAMat(); void radioSetUsingUserProtein(); void setSlowInitialTextParams(); void setFastInitialTextParams(); QTabWidget* tabWidget; QPushButton* okButton; QComboBox* slowOrQuick; QGroupBox* loadDNAGroup; QGroupBox* loadProteinGroup; QGroupBox* slowPairwiseTab; QGroupBox* fastPairwiseTab; QVBoxLayout* mainLayout; QVBoxLayout* slowTabLayout; QGridLayout* slowGapLayout; QGridLayout* protMatLayout; QGridLayout* dnaMatLayout; QGridLayout* fastTabLayout; QHBoxLayout* horizLayoutAA; QHBoxLayout* horizLayoutDNA; // Slow first QGroupBox* slowGapParams; QLabel* slowGapOpenLabel; QLabel* slowGapExtendLabel; QLineEdit* slowGapOpen; QLineEdit* slowGapExtend; QString slowAAMatrixPath; QString slowDNAMatrixPath; std::string aaMatrixFile; std::string dnaMatrixFile; QPushButton* slowLoadProtMat; QPushButton* slowLoadDNAMat; QGroupBox* slowProtMatGroup; QButtonGroup* slowProteinWeightMat; QRadioButton* slowProtBlosum30; QRadioButton* slowProtPAM350; QRadioButton* slowProtGonnet250; QRadioButton* slowProtIdentity; QRadioButton* slowProtAAUserMat; QGroupBox* slowDNAMatGroup; QButtonGroup* slowDNAWeightMat; QRadioButton* slowDNAIUB; QRadioButton* slowDNAClustalw; QRadioButton* slowDNAUser; // Fast QGroupBox* fastParams; QLineEdit* fastGapPenalty; QLineEdit* fastKTupSize; QLineEdit* fastTopDiagonals; QLineEdit* fastWindowSize; QLabel* fastGapPenaltyLabel; QLabel* fastKTupSizeLabel; QLabel* fastTopDiagonalsLabel; QLabel* fastWindowSizeLabel; QLabel* userProteinMatrix; QLabel* userDNAMatrix; bool proteinMatLoaded; bool dnaMatLoaded; }; #endif clustalx-2.1/PairwiseParams.cpp0000644000175000017500000004156111470725216016460 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * 15-03-07,Nigel Brown(EMBL): setSlowInitialTextParams() was calling * getGapOpen() and getGapExtend() instead of getPWGapOpen() and * getPWGapExtend(), so these user settings were being dropped. */ #include "PairwiseParams.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "clustalW/general/userparams.h" #include "clustalW/substitutionMatrix/globalmatrix.h" #include "FileDialog.h" PairwiseParams::PairwiseParams() { setWindowTitle("Pairwise Parameters"); proteinMatLoaded = false; dnaMatLoaded = false; if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAParams(); } else { clustalw::userParameters->setProtParams(); } slowOrQuick = new QComboBox; slowOrQuick->addItem("Slow-Accurate"); slowOrQuick->addItem("Fast-Approximate"); if(clustalw::userParameters->getQuickPairAlign()) { slowOrQuick->setCurrentIndex(1); } else { slowOrQuick->setCurrentIndex(0); } mainLayout = new QVBoxLayout; slowPairwiseTab = new QGroupBox; fastPairwiseTab = new QGroupBox; tabWidget = new QTabWidget; setUpSlowTab(); tabWidget->addTab(slowPairwiseTab, "Slow/Accurate Pairwise Parameters"); setUpFastTab(); tabWidget->addTab(fastPairwiseTab, "Fast/Approx Pairwise Parameters"); okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); mainLayout->addWidget(okButton); mainLayout->addWidget(slowOrQuick); mainLayout->addWidget(tabWidget); setLayout(mainLayout); } void PairwiseParams::showEvent(QShowEvent* event) { if(clustalw::userParameters->getQuickPairAlign()) { slowOrQuick->setCurrentIndex(1); } else { slowOrQuick->setCurrentIndex(0); } // Each time the dialog is shown reload the values from the userParameters object!!! setInitialProteinMat(); setInitialDNAMat(); setSlowInitialTextParams(); setFastInitialTextParams(); } void PairwiseParams::userProteinClicked() { if(!proteinMatLoaded) { slowBrowseProtein(); } } void PairwiseParams::userDNAClicked() { if(!dnaMatLoaded) { slowBrowseDNA(); } } void PairwiseParams::setUpSlowTab() { setUpSlowTextAreas(); setUpSlowProteinWeightMat(); setUpSlowDNAWeightMat(); loadProteinGroup = new QGroupBox; horizLayoutAA = new QHBoxLayout; userProteinMatrix = new QLabel(""); slowLoadProtMat = new QPushButton(tr("Load protein matrix:")); slowLoadProtMat->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); horizLayoutAA->addWidget(slowLoadProtMat); horizLayoutAA->addWidget(userProteinMatrix); loadProteinGroup->setLayout(horizLayoutAA); connect(slowLoadProtMat, SIGNAL(clicked()), this, SLOT(slowBrowseProtein())); loadDNAGroup = new QGroupBox; horizLayoutDNA = new QHBoxLayout; userDNAMatrix = new QLabel(""); slowLoadDNAMat = new QPushButton(tr("Load DNA matrix:")); slowLoadDNAMat->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); horizLayoutDNA->addWidget(slowLoadDNAMat); horizLayoutDNA->addWidget(userDNAMatrix); loadDNAGroup->setLayout(horizLayoutDNA); connect(slowLoadDNAMat, SIGNAL(clicked()), this, SLOT(slowBrowseDNA())); slowTabLayout = new QVBoxLayout; slowTabLayout->addWidget(slowGapParams); slowTabLayout->addWidget(slowProtMatGroup); slowTabLayout->addWidget(loadProteinGroup); slowTabLayout->addWidget(slowDNAMatGroup); slowTabLayout->addWidget(loadDNAGroup); slowPairwiseTab->setLayout(slowTabLayout); } void PairwiseParams::setUpSlowTextAreas() { slowGapParams = new QGroupBox; slowGapOpenLabel = new QLabel(tr("Gap Opening [0-100]: ")); slowGapExtendLabel = new QLabel(tr("Gap Extend [0-100]: ")); slowGapOpen = new QLineEdit; slowGapExtend = new QLineEdit; slowGapLayout = new QGridLayout; slowGapLayout->addWidget(slowGapOpenLabel, 0, 0); slowGapLayout->addWidget(slowGapOpen, 0, 1); slowGapLayout->addWidget(slowGapExtendLabel, 1, 0); slowGapLayout->addWidget(slowGapExtend, 1, 1); slowGapParams->setLayout(slowGapLayout); } void PairwiseParams::setSlowInitialTextParams() { QString num; num = num.setNum(clustalw::userParameters->getPWGapOpen()); slowGapOpen->setText(num); num = num.setNum(clustalw::userParameters->getPWGapExtend()); slowGapExtend->setText(num); } void PairwiseParams::setUpSlowProteinWeightMat() { slowProtMatGroup = new QGroupBox(tr("Protein Weight Matrix")); slowProteinWeightMat = new QButtonGroup; slowProteinWeightMat->setExclusive(true); slowProtBlosum30 = new QRadioButton("BLOSUM 30"); slowProtPAM350 = new QRadioButton("PAM 350"); slowProtGonnet250 = new QRadioButton("Gonnet 250"); slowProtIdentity = new QRadioButton("Identity matrix"); slowProtAAUserMat = new QRadioButton("User defined"); connect(slowProtAAUserMat, SIGNAL(clicked()), this, SLOT(userProteinClicked())); slowProteinWeightMat->addButton(slowProtBlosum30); slowProteinWeightMat->addButton(slowProtPAM350); slowProteinWeightMat->addButton(slowProtGonnet250); slowProteinWeightMat->addButton(slowProtIdentity); slowProteinWeightMat->addButton(slowProtAAUserMat); protMatLayout = new QGridLayout; protMatLayout->addWidget(slowProtBlosum30, 0, 0); protMatLayout->addWidget(slowProtPAM350, 0, 1); protMatLayout->addWidget(slowProtGonnet250, 0, 2); protMatLayout->addWidget(slowProtIdentity, 1, 0); protMatLayout->addWidget(slowProtAAUserMat, 1, 1); slowProtMatGroup->setLayout(protMatLayout); } void PairwiseParams::setInitialProteinMat() { if((clustalw::subMatrix->getPWMatrixNum() - 1) == clustalw::PWAABLOSUM) { slowProtBlosum30->setChecked(true); } else if((clustalw::subMatrix->getPWMatrixNum() - 1) == clustalw::PWAAPAM) { slowProtPAM350->setChecked(true); } else if((clustalw::subMatrix->getPWMatrixNum() - 1) == clustalw::PWAAGONNET) { slowProtGonnet250->setChecked(true); } else if((clustalw::subMatrix->getPWMatrixNum() - 1) == clustalw::PWAAIDENTITY) { slowProtIdentity->setChecked(true); } else if((clustalw::subMatrix->getPWMatrixNum() - 1) == clustalw::PWAAUSER) { slowProtAAUserMat->setChecked(true); } } void PairwiseParams::setUpSlowDNAWeightMat() { slowDNAMatGroup = new QGroupBox(tr("DNA Weight Matrix")); slowDNAWeightMat = new QButtonGroup; slowDNAWeightMat->setExclusive(true); slowDNAIUB = new QRadioButton("IUB"); slowDNAClustalw = new QRadioButton("CLUSTALW(1.6)"); slowDNAUser = new QRadioButton("User defined"); connect(slowDNAUser, SIGNAL(clicked()), this, SLOT(userDNAClicked())); slowDNAWeightMat->addButton(slowDNAIUB); slowDNAWeightMat->addButton(slowDNAClustalw); slowDNAWeightMat->addButton(slowDNAUser); dnaMatLayout = new QGridLayout; dnaMatLayout->addWidget(slowDNAIUB, 0, 0); dnaMatLayout->addWidget(slowDNAClustalw, 0, 1); dnaMatLayout->addWidget(slowDNAUser, 0, 2); slowDNAMatGroup->setLayout(dnaMatLayout); } void PairwiseParams::setInitialDNAMat() { if((clustalw::subMatrix->getPWDNAMatrixNum() - 1) == clustalw::DNAIUB) { slowDNAIUB->setChecked(true); } else if((clustalw::subMatrix->getPWDNAMatrixNum() - 1) == clustalw::DNACLUSTALW) { slowDNAClustalw->setChecked(true); } else if((clustalw::subMatrix->getPWDNAMatrixNum() - 1) == clustalw::DNAUSERDEFINED) { slowDNAUser->setChecked(true); } } void PairwiseParams::setUpFastTab() { fastGapPenaltyLabel = new QLabel(tr("Gap Penalty [1-500]: ")); fastGapPenalty = new QLineEdit; fastKTupSizeLabel = new QLabel(tr("K-Tuple Size [1-2]: ")); fastKTupSize = new QLineEdit; fastTopDiagonalsLabel = new QLabel(tr("Top Diagonals [1-50]: ")); fastTopDiagonals = new QLineEdit; fastWindowSizeLabel = new QLabel(tr("Window Size [1-50]: ")); fastWindowSize = new QLineEdit; fastTabLayout = new QGridLayout; fastTabLayout->addWidget(fastGapPenaltyLabel, 0, 0); fastTabLayout->addWidget(fastGapPenalty, 0, 1); fastTabLayout->addWidget(fastKTupSizeLabel, 1, 0); fastTabLayout->addWidget(fastKTupSize, 1, 1); fastTabLayout->addWidget(fastTopDiagonalsLabel, 2, 0); fastTabLayout->addWidget(fastTopDiagonals, 2, 1); fastTabLayout->addWidget(fastWindowSizeLabel, 3, 0); fastTabLayout->addWidget(fastWindowSize, 3, 1); fastPairwiseTab->setLayout(fastTabLayout); } void PairwiseParams::setFastInitialTextParams() { QString num; num = num.setNum(clustalw::userParameters->getWindowGap()); fastGapPenalty->setText(num); num = num.setNum(clustalw::userParameters->getKtup()); fastKTupSize->setText(num); num = num.setNum(clustalw::userParameters->getSignif()); fastTopDiagonals->setText(num); num = num.setNum(clustalw::userParameters->getWindow()); fastWindowSize->setText(num); } void PairwiseParams::radioSetUsingUserProtein() { slowProtAAUserMat->setChecked(true); } void PairwiseParams::slowBrowseProtein() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; slowAAMatrixPath = fd.getOpenFileName(this, tr("Find Protein Matrix")); string path = slowAAMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getUserMatFromFile(userFile, clustalw::Protein, clustalw::Pairwise)) { aaMatrixFile = string(userFile); proteinMatLoaded = true; userProteinMatrix->setText(aaMatrixFile.c_str()); radioSetUsingUserProtein(); } else { aaMatrixFile = ""; proteinMatLoaded = false; userProteinMatrix->setText(aaMatrixFile.c_str()); setInitialProteinMat(); } delete []userFile; } else // pressed cancel { if(!proteinMatLoaded) { setInitialProteinMat(); } } } void PairwiseParams::slowBrowseDNA() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; slowDNAMatrixPath = fd.getOpenFileName(this, tr("Find DNA Matrix")); string path = slowDNAMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getUserMatFromFile(userFile, clustalw::DNA, clustalw::Pairwise)) { dnaMatrixFile = string(userFile); dnaMatLoaded = true; userDNAMatrix->setText(dnaMatrixFile.c_str()); slowDNAUser->setChecked(true); } else { dnaMatrixFile = ""; dnaMatLoaded = false; setInitialDNAMat(); userDNAMatrix->setText(dnaMatrixFile.c_str()); } delete []userFile; } else // pressed cancel { if(!dnaMatLoaded) { setInitialDNAMat(); } } } void PairwiseParams::accept() { if(slowOrQuick->currentIndex() == 1) { clustalw::userParameters->setQuickPairAlign(true); } else { clustalw::userParameters->setQuickPairAlign(false); } // Protein matrices if(slowProtBlosum30->isChecked()) { //slowProtBlosum30->setChecked(true); clustalw::subMatrix->setCurrentNameAndNum("blosum", 1, clustalw::Protein, clustalw::Pairwise); } else if(slowProtPAM350->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("pam", 2, clustalw::Protein, clustalw::Pairwise); } else if(slowProtGonnet250->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("gonnet", 3, clustalw::Protein, clustalw::Pairwise); } else if(slowProtIdentity->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("id", 4, clustalw::Protein, clustalw::Pairwise); } else if(slowProtAAUserMat->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum(aaMatrixFile, 5, clustalw::Protein, clustalw::Pairwise); } // DNA Matrix if(slowDNAIUB->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("iub", 1, clustalw::DNA, clustalw::Pairwise); } else if(slowDNAClustalw->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("clustalw", 2, clustalw::DNA, clustalw::Pairwise); } else if(slowDNAUser->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum(dnaMatrixFile, 3, clustalw::DNA, clustalw::Pairwise); } // Check slowGapOpen 0-100 bool ok; QString sGapOpen = slowGapOpen->text(); float sgapOpen; sgapOpen = sGapOpen.toFloat(&ok); if(ok && sgapOpen >= 0.0 && sgapOpen <= 100.0) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAPWGapOpenPenalty(sgapOpen); } else { clustalw::userParameters->setAAPWGapOpen(sgapOpen); } } // Check slowGapExtend 0-100 QString sGapExtend = slowGapExtend->text(); float sgapExtend; sgapExtend = sGapExtend.toFloat(&ok); if(ok && sgapExtend >= 0.0 && sgapExtend <= 100.0) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAPWGapExtendPenalty(sgapExtend); } else { clustalw::userParameters->setAAPWGapExtend(sgapExtend); } } // Check fastGapPenalty 1-500 QString fastGapPenString = fastGapPenalty->text(); int fastGPen; fastGPen = fastGapPenString.toInt(&ok); if(ok && fastGPen >= 1 && fastGPen <= 500) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAWindowGap(fastGPen); } else { clustalw::userParameters->setAAWindowGap(fastGPen); } } // Check fastKTupSize 1-2 QString fastKTupString = fastKTupSize->text(); int fastKTup; fastKTup = fastKTupString.toInt(&ok); if(ok && fastKTup >= 1 && fastKTup <= 2) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAKtup(fastKTup); } else { clustalw::userParameters->setAAKtup(fastKTup); } } // Check fastTopDiagonals 1-50 QString fastTopDiagonalsString = fastTopDiagonals->text(); int fastTopDiag; fastTopDiag = fastTopDiagonalsString.toInt(&ok); if(ok && fastTopDiag >= 1 && fastTopDiag <= 50) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNASignif(fastTopDiag); } else { clustalw::userParameters->setAASignif(fastTopDiag); } } // Check fastWindowSize 1-50 QString fastWindowSizeString = fastWindowSize->text(); int fastWinSize; fastWinSize = fastWindowSizeString.toInt(&ok); if(ok && fastWinSize >= 1 && fastWinSize <= 50) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAWindow(fastWinSize); } else { clustalw::userParameters->setAAWindow(fastWinSize); } } // NOTE it is important to set the parameters here. if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAParams(); } else { clustalw::userParameters->setProtParams(); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/PSPrinter.h0000644000175000017500000000367211470725216015065 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #ifndef PSPRINTER_H #define PSPRINTER_H #include #include #include #include #include "ColorParameters.h" #include "ClustalQtParams.h" #include "PostscriptFileParams.h" class PSPrinter : public QWidget { Q_OBJECT public: PSPrinter(psPrintParams* printParams); void writePSFile(PostscriptFileParams* postscriptParams); private: void printPSInfo(ofstream *psOutFile, int pageSize); void printPageHeader(ofstream *psOutFile, int psRotation,int maxX,int maxY, int page, int numPages, bool header, QString strTime, QString psFile, int psXTrans, int psYTrans, float psScale); void printHeaderLine(ofstream *psOutFile, int ix,int fr,int lr); void printFooterLine(ofstream *psOutFile, int fr,int lr); void printQualityCurve(ofstream *psOutFile, int fr,int lr,int scoreHeight); void printSeqLine(ofstream *psOutFile, int row,int seq,int fr,int lr,int resNumber); void makeRuler(); static const int LEFTMARGIN = 20; static const int SEPARATION = 2; static const int CHARHEIGHT = 10; static const int CHARWIDTH = 6; static const int A4X = 564; static const int A4Y = 800; static const int A3X = 832; static const int A3Y = 1159; static const int USLETTERX = 564; static const int USLETTERY = 750; static const int SCOREY = 3; static const int HEADER = 7; static const int NOHEADER = 0; static const int MAXRESNO = 6; float ConstConvertRGB; ColorParameters printerColors; psPrintParams* data; int blockHeight, blockLeft, blockTop; int headerTop, seqTop, footerTop, curveTop; std::string ruler; // Will be made by makeRuler. }; #endif clustalx-2.1/PSPrinter.cpp0000644000175000017500000006406511470725216015423 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * Changes: * * Mark 22-1-2007 The postscript files had spaces instead of gaps '-'. I made a * change to the printSeqLine function to fix this. * * 30-01-07,Nigel Brown(EMBL): Added 'statusdict begin a4 end' for A4 * paper. Added CLU_LONG_REVISION_STRING in place of hardwired clustalw * revision level, included from "clustalW/versionw.h". * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. ****************************************************************************/ #include #include #include #include #include #include #include "PSPrinter.h" #include "ColorFileXmlParser.h" #include "clustalW/general/userparams.h" #include "clustalW/general/utils.h" #include "clustalW/clustalw_version.h" PSPrinter::PSPrinter(psPrintParams* printParams) : ConstConvertRGB(255.0), blockHeight(0), blockLeft(0), blockTop(0), headerTop(0), seqTop(0), footerTop(0), curveTop(0) { data = printParams; } void PSPrinter::writePSFile(PostscriptFileParams* postscriptParams) { QString psFile = postscriptParams->getWriteSeqsToFile(); QString parFile = postscriptParams->getPSColorsFile(); int pageSize = postscriptParams->getPageSize(); int orientation = postscriptParams->getOrientation(); bool header = postscriptParams->getPrintHeader(); bool ruler = postscriptParams->getPrintRuler(); bool resNo = postscriptParams->getPrintResidueNumber(); bool resize = postscriptParams->getResizeToFitPage(); int firstPrintRes = postscriptParams->getPrintFromPos(); int lastPrintRes = postscriptParams->getPrintToPos(); int bLength = postscriptParams->getUseBlockLen(); bool showCurve = postscriptParams->getPrintQualityCurve(); int i, j, bn, seq, numSeqs; int err; int blockLen, numPages; int fr, lr; int page, row; int psRotation = 0, psXTrans = 0, psYTrans = 0; float psScale, hScale, wScale; int maxSeq; int maxX = 0, maxY = 0; int scoreHeight = 0; int mainHeader = 0; int numELines, numECols; int nHead, nFoot; int pagePixWidth; // width of the page in pixels int pageCharHeight; // height of the page in chars for sequences int pagePixHeight; // height of the page in pixels for sequences int blocksPerPage, numBlocks; std::vector resNumber; const clustalw::SeqArray* seqArray = data->alignPtr->getSeqArray(); int gapPos1 = clustalw::userParameters->getGapPos1(); // Mark: 22-1-2007 new local int gapPos2 = clustalw::userParameters->getGapPos2(); // Mark: 22-1-2007 new local time_t *tptr = 0, ttime; QString strTime; ofstream psOutFile; std::string fileName = psFile.toStdString(); psOutFile.open(fileName.c_str()); if(!psOutFile.is_open()) { QMessageBox::information(this, "PS file", "Cannot open PS file."); return; } // get the page size parameters if (pageSize == A4) { if (orientation == PORTRAIT) { maxX = A4X; maxY = A4Y; psRotation = 0; } else { maxX = A4Y; maxY = A4X; psRotation = -90; } } else if (pageSize == A3) { if (orientation == PORTRAIT) { maxX = A3X; maxY = A3Y; psRotation = 0; } else { maxX = A3Y; maxY = A3X; psRotation = -90; } } else if (pageSize == USLETTER) { if (orientation == PORTRAIT) { maxX = USLETTERX; maxY = USLETTERY; psRotation = 0; } else { maxX = USLETTERY; maxY = USLETTERX; psRotation = -90; } } if(showCurve) { scoreHeight = SCOREY; } if(header) { mainHeader = HEADER; } else { mainHeader = NOHEADER; } pagePixWidth = maxX - LEFTMARGIN * 2; pagePixHeight = maxY - mainHeader * CHARHEIGHT; numSeqs = data->nSeqs; nHead = data->seqHeader.size(); if(ruler) { nFoot = 1; } else { nFoot = 0; } numELines = nHead + nFoot + scoreHeight + SEPARATION; int nCols = data->alignPtr->getLengthLongestSequence(data->firstSeq, data->lastSeq); // check the block length, residue range parameters if(firstPrintRes <= 0) { firstPrintRes = 1; } if((lastPrintRes <= 0) || (lastPrintRes > nCols)) { lastPrintRes = nCols; } if(firstPrintRes > lastPrintRes) { QMessageBox::information(this, "PS file", "Bad residue range - cannot write postscript"); return; } if (bLength==0 || lastPrintRes - firstPrintRes + 1 < bLength) { blockLen = lastPrintRes - firstPrintRes + 1; } else { blockLen = bLength; } QChar res; resNumber.resize(data->nSeqs + 1, 0); // NOTE need to be careful with this. We want to start at firstSeq not 0!!!! for(i = data->firstSeq; i < data->nSeqs + data->firstSeq; i++) { for(j = 1; j < firstPrintRes - 1; j++) { if(j < data->alignPtr->getSeqLength(i)) { res = QChar((*seqArray)[i][j+1]); if(res.isLetter() && resNumber.size() > i - data->firstSeq) { resNumber[i - data->firstSeq]++; } } } } int maxNames = data->alignPtr->getMaxNames(); if(resNo) { numECols = MAXRESNO + 1 + maxNames; } else { numECols = 1 + maxNames; } // print out the PS revision level etc. ttime = time(tptr); strTime = QString(ctime(&ttime)); printPSInfo(&psOutFile, pageSize); if(ruler) { makeRuler(); } char cRes; // calculate scaling factors, block sizes to fit the page etc. if (resize == false || blockLen == lastPrintRes - firstPrintRes + 1) { // split the alignment into blocks of sequences. If the blocks are too long // for the page - tough! if(resize == false) { psScale = 1.0; } else { psScale = static_cast(pagePixWidth) / static_cast((blockLen + numECols) * CHARWIDTH); } psXTrans = LEFTMARGIN * (1 - psScale); psYTrans = pagePixHeight * (1 - psScale); if (pageSize != A3 && orientation == LANDSCAPE) { psXTrans -= LEFTMARGIN; } if(psScale == 0.0) { return; } pageCharHeight = ((maxY / CHARHEIGHT) - mainHeader) / psScale; maxSeq = pageCharHeight - numELines; if(maxSeq == 0) { return; // NOTE this is to fix an arithmetic exception I was getting. // I will need to figure out why this was. } blockHeight = (maxSeq + numELines) * CHARHEIGHT; numPages = (numSeqs / maxSeq) + 1; seq = 0; for (page = 0; page < numPages; page++) { // print the top of page header printPageHeader(&psOutFile, psRotation, maxX, maxY, page, numPages, header, strTime, psFile, psXTrans, psYTrans, psScale); blockTop = maxY - mainHeader * CHARHEIGHT; blockLeft = LEFTMARGIN + (1 + maxNames) * CHARWIDTH; headerTop = blockTop; fr = firstPrintRes - 1; lr = lastPrintRes - 1; // show the header lines for (i = 0; i < nHead; i++) { printHeaderLine(&psOutFile, i, fr, lr); } seqTop = blockTop - nHead * CHARHEIGHT; int resCode; // New local variable to improve readability // show the sequence lines for (row = 0; row < maxSeq; row++) { if(resNo) { for(i = fr; i <= lr; i++) { if(i < data->alignPtr->getSeqLength(seq + data->firstSeq)) { resCode = (*seqArray)[seq + data->firstSeq][i+1]; cRes = clustalw::userParameters->getAminoAcidCode(resCode); res = QChar(cRes); if(res.isLetter() && resNumber.size() > seq) { resNumber[seq]++; } } } } printSeqLine(&psOutFile, row, seq, fr, lr, resNumber[seq]); seq++; if(seq >= numSeqs) { row++; break; } } footerTop = seqTop - row * CHARHEIGHT; // show the footer lines if(nFoot != 0) { printFooterLine(&psOutFile, fr, lr); } curveTop = footerTop - nFoot * CHARHEIGHT; // show the quality curve if(showCurve) { printQualityCurve(&psOutFile, fr, lr, scoreHeight); } psOutFile << "\nshowpage\n"; psOutFile << "restore\n"; } } else { // split the alignment into blocks of residues, and scale the blocks to fit the page maxSeq = pagePixHeight / CHARHEIGHT - numELines - mainHeader; hScale = static_cast(maxSeq) / static_cast(numSeqs); wScale = static_cast(pagePixWidth) / static_cast((blockLen+numECols)*CHARWIDTH); psScale = clustalw::utilityObject->MIN(hScale, wScale); psXTrans = LEFTMARGIN * (1 - psScale); psYTrans = pagePixHeight * (1 - psScale); if (pageSize != A3 && orientation == LANDSCAPE) { psXTrans -= LEFTMARGIN; } pageCharHeight = ((maxY / CHARHEIGHT) - mainHeader) / psScale; maxSeq = pageCharHeight - numELines; blockHeight = (numSeqs + numELines) * CHARHEIGHT; blocksPerPage = pageCharHeight / (numSeqs + numELines); if (blocksPerPage == 0) { clustalw::utilityObject->error("illegal combination of print parameters"); return; } numBlocks = (lastPrintRes - firstPrintRes) / blockLen + 1; if (numBlocks % blocksPerPage == 0) { numPages = numBlocks / blocksPerPage; } else { numPages = numBlocks / blocksPerPage + 1; } for (bn = 0; bn < numBlocks; bn++) { page = bn / blocksPerPage; // print the top of page header if (bn % blocksPerPage == 0) { printPageHeader(&psOutFile, psRotation, maxX, maxY, page, numPages, header, strTime, psFile, psXTrans, psYTrans, psScale); } blockTop = maxY - mainHeader * CHARHEIGHT - blockHeight * (bn % blocksPerPage); blockLeft = LEFTMARGIN + (1 + maxNames) * CHARWIDTH; headerTop = blockTop; seqTop = blockTop - nHead * CHARHEIGHT; footerTop = blockTop - (nHead + numSeqs) * CHARHEIGHT; curveTop = blockTop - (nHead + numSeqs + nFoot) * CHARHEIGHT; fr = firstPrintRes - 1 + blockLen * bn; lr = fr + blockLen - 1; if(lr >= lastPrintRes) { lr = lastPrintRes - 1; } // show the header lines for (i = 0; i < nHead; i++) { printHeaderLine(&psOutFile, i, fr, lr); } // show the sequence lines for (i = 0; i < numSeqs; i++) { row = i % maxSeq; if(resNo) { for(j = fr; j <= lr; j++) { if(j < data->alignPtr->getSeqLength(i + data->firstSeq)) { cRes = clustalw::userParameters->getAminoAcidCode((*seqArray)[i + data->firstSeq][j+1]); res = QChar(cRes); if(res.isLetter() && resNumber.size() > i) { resNumber[i]++; } } } } printSeqLine(&psOutFile, row, i, fr, lr, resNumber[i]); } if(nFoot != 0) { printFooterLine(&psOutFile, fr, lr); } // show the quality curve if(showCurve) { printQualityCurve(&psOutFile, fr, lr, scoreHeight); } if ((bn == (numBlocks-1)) || ((bn % blocksPerPage == blocksPerPage-1))) { psOutFile << "\nshowpage\n"; psOutFile << "restore\n"; } } } psOutFile.close(); return; } void PSPrinter::printPageHeader(ofstream *psOutFile, int psRotation,int maxX,int maxY, int page, int numPages, bool header, QString strTime, QString psFile, int psXTrans, int psYTrans, float psScale) { int psX,psY; QString tempStr; // NOTE this is in clustalw!!!!!! //QString revisionLevel = "ClustalW 2.1"; //nige // QString revisionLevel = CLU_LONG_VERSION_STRING; // // Andreas: changes according to autogenerated config.h in // clustalw. Why is ClustalW instead of X needed here? // QString revisionLevel = CLUSTALW_VERSION; (*psOutFile) << "%%Page: " << page << " " << page << "\n"; (*psOutFile) << "save\n\n"; if (psRotation == -90) { (*psOutFile) << "0 " << maxX << " translate\n"; (*psOutFile) << psRotation << " rotate\n"; } if (header) { tempStr = "CLUSTAL " + revisionLevel + " MULTIPLE SEQUENCE ALIGNMENT"; psX = (maxX - tempStr.length() * 10) / 2; psY = maxY - 2 * CHARHEIGHT; (*psOutFile) << psX << " "<< psY << " moveto\n"; (*psOutFile) << "/Times-Bold findfont 14 scalefont setfont\n"; (*psOutFile) << "(" << tempStr.toStdString() << ") show\n\n"; psX = 20; psY = maxY - 4 * CHARHEIGHT; (*psOutFile) << psX << " " << psY << " moveto\n"; (*psOutFile) << "(File: " << psFile.toStdString() << ") show\n\n"; tempStr = "Date: " + strTime; psX = maxX - tempStr.length() * 8 - 20; psY = maxY - 4 * CHARHEIGHT; (*psOutFile) << psX << " " << psY << " moveto\n"; (*psOutFile) << "(" << tempStr.toStdString() << ") show\n\n"; QString message = "Page "; QString num; num.setNum(page+1); message += num + " of "; num.setNum(numPages); message += num; psX = 20; psY = maxY - 5 * CHARHEIGHT - 4; (*psOutFile) << psX << " " << psY << " moveto\n"; (*psOutFile) << "(" << message.toStdString() << ") show\n\n"; } (*psOutFile) << psXTrans << " " << psYTrans << " translate\n"; (*psOutFile) << std::setprecision(2) << psScale << " " << std::setprecision(2) << psScale << " scale\n"; (*psOutFile) << "/Courier-Bold findfont 10 scalefont setfont\n"; } void PSPrinter::printPSInfo(ofstream *psOutFile, int pageSize) { (*psOutFile) << "%!PS-Adobe-1.0\n"; (*psOutFile) << "%%Creator: Julie Thompson\n"; (*psOutFile) << "%%Title:ClustalX Alignment\n"; (*psOutFile) << "%%EndComments\n"; (*psOutFile) << "/box { newpath\n"; (*psOutFile) << "\t-0 -3 moveto\n"; (*psOutFile) << "\t-0 " << CHARHEIGHT-3 << " lineto\n"; (*psOutFile) << "\t" << CHARWIDTH << " " << CHARHEIGHT-3 << " lineto\n"; (*psOutFile) << "\t" << CHARWIDTH << " -3 lineto\n"; (*psOutFile) << "\tclosepath\n"; (*psOutFile) << " } def\n\n"; (*psOutFile) << "/color_char { gsave\n"; (*psOutFile) << "\tsetrgbcolor\n"; (*psOutFile) << "\tmoveto\n"; (*psOutFile) << "\tshow\n"; (*psOutFile) << "\tgrestore\n"; (*psOutFile) << " } def\n\n"; (*psOutFile) << "/cbox { gsave\n"; (*psOutFile) << "\ttranslate\n"; (*psOutFile) << "\tnewpath\n"; (*psOutFile) << "\t0 0 moveto\n"; (*psOutFile) << "\tlineto\n"; (*psOutFile) << "\tlineto\n"; (*psOutFile) << "\tlineto\n"; (*psOutFile) << "\tclosepath\n"; (*psOutFile) << "\tfill\n"; (*psOutFile) << "\tgrestore\n"; (*psOutFile) << " } def\n\n"; (*psOutFile) << "/color_inv { gsave\n"; (*psOutFile) << "\tsetrgbcolor\n"; (*psOutFile) << "\ttranslate\n"; (*psOutFile) << "\tbox fill\n"; (*psOutFile) << "\tgrestore\n"; (*psOutFile) << "\tmoveto\n"; (*psOutFile) << "\tshow\n"; (*psOutFile) << " } def\n\n"; (*psOutFile) << "/white_inv { gsave\n"; (*psOutFile) << "\tsetrgbcolor\n"; (*psOutFile) << "\ttranslate\n"; (*psOutFile) << "\tbox fill\n"; (*psOutFile) << "\tgrestore\n"; (*psOutFile) << "\tgsave\n"; (*psOutFile) << "\tsetrgbcolor\n"; (*psOutFile) << "\tmoveto\n"; (*psOutFile) << "\tshow\n"; (*psOutFile) << "\tgrestore\n"; (*psOutFile) << " } def\n\n"; if (pageSize==A3) (*psOutFile) <<"statusdict begin a3 end\n\n"; else if (pageSize==A4) (*psOutFile) <<"statusdict begin a4 end\n\n"; /* For canon color printer, use a3tray instead of a3!! */ } void PSPrinter::printHeaderLine(ofstream *psOutFile, int ix, int fr, int lr) { int i; int psX, psY; int maxNames = data->alignPtr->getMaxNames(); psX = LEFTMARGIN; psY = headerTop - (ix * CHARHEIGHT); (*psOutFile) << psX << " " << psY << " moveto\n"; if(data->nameHeader.size() > ix) { (*psOutFile) << "( "<< setw(maxNames) << data->nameHeader[ix].toStdString() << " ) show\n"; } for(i = fr; i <= lr && i < data->seqHeader[ix].size(); i++) { psX = blockLeft + (i - fr) * CHARWIDTH; (*psOutFile) << "("; (*psOutFile) << data->seqHeader[ix][i].toAscii(); (*psOutFile) << ") "; (*psOutFile) << psX << " " << psY << " " << psX << " " << psY << " 1.0 1.0 1.0 color_inv\n"; } (*psOutFile) << "\n"; } /** * Mark: Change 22-1-2007. I made some changes to this function as the gaps were being * printed out as spaces instead of '-' * */ void PSPrinter::printSeqLine(ofstream *psOutFile, int row, int seq, int fr, int lr, int resNumber) { if(seq + 1 <= data->alignPtr->getNumSeqs()) { int i; int psX, psY; float red, green, blue; int maxNames = data->alignPtr->getMaxNames(); psX = LEFTMARGIN; psY = seqTop - (row * CHARHEIGHT); int firstSeq = data->firstSeq; (*psOutFile) << psX << " " << psY << " moveto\n"; (*psOutFile) << setw(maxNames) << "(" << data->alignPtr->getName(seq + firstSeq) << " ) show\n"; QColor color; clustalw::Array2D* colorMask = data->colorMask; const clustalw::SeqArray* seqArray = data->alignPtr->getSeqArray(); int resCode = 0; // Mark Change 22-1-2007 added local variable int gapPos1 = clustalw::userParameters->getGapPos1(); // Added local int gapPos2 = clustalw::userParameters->getGapPos2(); // Added local char resToPrint = ' '; for(i = fr; i <= lr; i++) { if(seq < colorMask->getRowSize() && i < colorMask->getColSize() && i < data->alignPtr->getSeqLength(seq + firstSeq)) { color = (*colorMask)[seq][i]; red = static_cast(color.red()) / ConstConvertRGB; green = static_cast(color.green()) / ConstConvertRGB; blue = static_cast(color.blue()) / ConstConvertRGB; if(!data->backGroundColors && red == 1 && green == 1 && blue == 1) { red = blue = green = 0; } psX = blockLeft + (i - fr) * CHARWIDTH; resCode = (*seqArray)[seq + firstSeq][i + 1]; /** * Mark change 22-1-2007 * Put in the change here to print out '-' instead of gaps. */ if(resCode == gapPos1 || resCode == gapPos2) { resToPrint = '-'; } else { resToPrint = clustalw::userParameters->getAminoAcidCode(resCode); } (*psOutFile) << "("; (*psOutFile) << resToPrint; (*psOutFile) << ") "; // I think segment exception is the low scoring segs // NOTE make sure that we check if we are in range!!!!!!!! if(data->showLowScoreRes && data->lowScoreRes.getRowSize() > seq && data->lowScoreRes.getColSize() > i && data->lowScoreRes[seq][i] > 0) { (*psOutFile) << psX << " " << psY << " " << setprecision(1) << 1.0 << " " << setprecision(1) << 1.0 << " " << setprecision(1) << 1.0 << " " << psX << " " << psY << " " << setprecision(1) << 0.1 << " " << setprecision(1) << 0.1 << " " << setprecision(1) << 0.1 << " white_inv\n"; } else if(data->showExRes && data->exRes.getRowSize() > seq && data->exRes.getColSize() > i && data->exRes[seq][i] == 1) { (*psOutFile) << psX << " " << psY << " " << setprecision(1) << 1.0 << " " << setprecision(1) << 1.0 << " " << setprecision(1) << 1.0 << " " << psX << " " << psY << " " << setprecision(1) << 0.4 << " " << setprecision(1) << 0.4 << " " << setprecision(1) << 0.4 << " white_inv\n"; } else { if(data->backGroundColors) { (*psOutFile) << psX << " " << psY << " " << psX << " " << psY << " " << setprecision(1) << red << " " << setprecision(1) << green << " " << setprecision(1) << blue << " color_inv\n"; } else { (*psOutFile) << psX << " " << psY << " " << setprecision(1) << red << " " << setprecision(1) << green << " " << setprecision(1) << blue << " color_char\n"; } } } } if(resNumber > 0) { psX = blockLeft + (lr - fr + 1) * CHARWIDTH; psY = seqTop - (row * CHARHEIGHT); (*psOutFile) << psX << " " << psY << " moveto\n"; (*psOutFile) << "(" << setw(MAXRESNO) << resNumber << ") show\n"; } (*psOutFile) << "\n"; } } void PSPrinter::printFooterLine(ofstream *psOutFile, int fr,int lr) { int i; int psX, psY; psX = LEFTMARGIN; psY = footerTop; //- (ix * CHARHEIGHT); (*psOutFile) << psX << " " << psY << " moveto\n"; (*psOutFile) << "(" << setw(data->alignPtr->getMaxNames()) << " ) show\n"; for(i = fr; i <= lr && i < ruler.length(); i++) { psX = blockLeft + (i - fr) * CHARWIDTH; (*psOutFile) << "("; (*psOutFile) << ruler[i]; (*psOutFile) << ") "; (*psOutFile) << psX << " " << psY << " " << psX << " " << psY << " 1.0 1.0 1.0 color_inv\n"; } (*psOutFile) << "\n"; } void PSPrinter::makeRuler() { int i,j; string marker; int markerLen; int length = data->alignPtr->getLengthLongestSequence(data->firstSeq, data->lastSeq); ruler = ""; ruler.resize(length + 1, ' '); ruler[0] = '1'; for (i = 1; i < length; i++) { if ((i + 1) % 10 > 0) { ruler[i] = '.'; } else { marker = ""; std::stringstream ss; ss << ((i + 1) / 10) * 10; ss >> marker; markerLen = marker.length(); for (j = 0; j < markerLen && (i + 1 + j - markerLen) < length; j++) { ruler[i + 1 + j - markerLen] = marker[j]; } } } cout << ruler << "\n"; } void PSPrinter::printQualityCurve(ofstream *psOutFile, int fr, int lr, int scoreHeight) { int i, w, h; int psX, psY, curveBottom; w = CHARWIDTH; psX = blockLeft + CHARWIDTH; curveBottom = curveTop - scoreHeight * CHARHEIGHT; (*psOutFile) << "0.3 0.3 0.3 setrgbcolor\n"; for(i = fr + 1; i <= lr && i < data->histogram->size(); i++) { (*psOutFile) << psX << " " << curveBottom << " moveto\n"; h = scoreHeight * CHARHEIGHT * ((float)(*data->histogram)[i] / 100.0); if(h < 1) { h = 1; } (*psOutFile) << w << " 0 " << w << " " << h << " 0 " << h << " " << psX << " " << curveBottom << " cbox\n"; psX += CHARWIDTH; } (*psOutFile) << "0.0 0.0 0.0 setrgbcolor\n"; } clustalx-2.1/LowScoringSegParams.h0000644000175000017500000000403311470725216017060 0ustar ddineenddineen#ifndef LOWSCORINGSEGPARAMS_H #define LOWSCORINGSEGPARAMS_H #include class QPushButton; class QGroupBox; class QRadioButton; class QComboBox; class QLabel; class QButtonGroup; class QSlider; class QLCDNumber; class QGridLayout; class QHBoxLayout; class QVBoxLayout; class LowScoringSegParams : public QDialog { Q_OBJECT public: LowScoringSegParams(); protected: void showEvent(QShowEvent* event); private slots: void accept(); void browseProtein(); void browseDNA(); void userProteinClicked(); void userDNAClicked(); private: void setUpLayout(); void setUpProteinMatRadioButtons(); void setInitialProteinMat(); void setInitialDNAMat(); void setInitialParamValues(); void setUpDNAMatRadioButtons(); void setUpOtherParams(); void radioSetUsingUserProtein(); QVBoxLayout* mainLayout; QHBoxLayout* horizLayoutAA; QHBoxLayout* horizLayoutDNA; QGridLayout* grid; QGridLayout* protMatLayout; QGridLayout* dnaMatLayout; QPushButton* okButton; QPushButton* loadProtMat; QPushButton* loadDNAMat; QGroupBox* sliderGridBox; QGroupBox* proteinMatGridBox; QGroupBox* dnaMatGridBox; QGroupBox* loadDNAGroup; QGroupBox* loadProteinGroup; QButtonGroup* proteinWeightMat; QRadioButton* gonnetPAM80; QRadioButton* gonnetPAM120; QRadioButton* gonnetPAM250; QRadioButton* gonnetPAM350; QRadioButton* aaUserMat; QButtonGroup* DNAWeightMat; QRadioButton* IUB; QRadioButton* clustal; QRadioButton* dnaUserMat; QLabel* minLenSegsLabel; QLabel* dnaMarkingScaleLabel; QSlider* minLenSegs; QSlider* dnaMarkingScale; QLCDNumber* minLenSegsLCD; QLCDNumber* dnaMarkingScaleLCD; QLabel* userProteinMatrix; QLabel* userDNAMatrix; QString aaMatrixPath; QString dnaMatrixPath; std::string aaMatrixFile; std::string dnaMatrixFile; bool proteinMatLoaded; bool dnaMatLoaded; }; #endif clustalx-2.1/LowScoringSegParams.cpp0000644000175000017500000002477211470725216017427 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "LowScoringSegParams.h" #include "clustalW/substitutionMatrix/globalmatrix.h" #include "FileDialog.h" LowScoringSegParams::LowScoringSegParams() { proteinMatLoaded = false; dnaMatLoaded = false; sliderGridBox = new QGroupBox; proteinMatGridBox = new QGroupBox(tr("Protein Weight Matrix")); dnaMatGridBox = new QGroupBox(tr("DNA Weight Matrix")); mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpLayout(); mainLayout->addWidget(okButton); mainLayout->addWidget(sliderGridBox); mainLayout->addWidget(proteinMatGridBox); mainLayout->addWidget(loadProteinGroup); mainLayout->addWidget(dnaMatGridBox); mainLayout->addWidget(loadDNAGroup); setLayout(mainLayout); } void LowScoringSegParams::showEvent(QShowEvent* event) { setInitialProteinMat(); setInitialParamValues(); setInitialDNAMat(); } void LowScoringSegParams::setUpLayout() { setUpOtherParams(); grid = new QGridLayout; grid->addWidget(minLenSegsLabel, 0, 0); grid->addWidget(minLenSegsLCD, 0, 1); grid->addWidget(minLenSegs, 0, 2); grid->addWidget(dnaMarkingScaleLabel, 1, 0); grid->addWidget(dnaMarkingScaleLCD, 1, 1); grid->addWidget(dnaMarkingScale, 1, 2); sliderGridBox->setLayout(grid); setUpProteinMatRadioButtons(); protMatLayout = new QGridLayout; protMatLayout->addWidget(gonnetPAM80, 0, 0); protMatLayout->addWidget(gonnetPAM120, 0, 1); protMatLayout->addWidget(gonnetPAM250, 0, 2); protMatLayout->addWidget(gonnetPAM350, 1, 0); protMatLayout->addWidget(aaUserMat, 1, 1); proteinMatGridBox->setLayout(protMatLayout); setUpDNAMatRadioButtons(); dnaMatLayout = new QGridLayout; dnaMatLayout->addWidget(IUB, 0, 0); dnaMatLayout->addWidget(clustal, 0, 1); dnaMatLayout->addWidget(dnaUserMat, 1, 0); dnaMatGridBox->setLayout(dnaMatLayout); } void LowScoringSegParams::userProteinClicked() { if(!proteinMatLoaded) { browseProtein(); } } void LowScoringSegParams::userDNAClicked() { if(!dnaMatLoaded) { browseDNA(); } } void LowScoringSegParams::setUpProteinMatRadioButtons() { proteinWeightMat = new QButtonGroup; gonnetPAM80 = new QRadioButton("Gonnet PAM 80"); gonnetPAM120 = new QRadioButton("Gonnet PAM 120"); gonnetPAM250 = new QRadioButton("Gonnet PAM 250"); gonnetPAM350 = new QRadioButton("Gonnet PAM 350"); aaUserMat = new QRadioButton("User defined"); connect(aaUserMat, SIGNAL(clicked()), this, SLOT(userProteinClicked())); proteinWeightMat->addButton(gonnetPAM80); proteinWeightMat->addButton(gonnetPAM120); proteinWeightMat->addButton(gonnetPAM250); proteinWeightMat->addButton(gonnetPAM350); proteinWeightMat->addButton(aaUserMat); } void LowScoringSegParams::setInitialProteinMat() { if(clustalw::subMatrix->getQTsegmentAAMatNum() == clustalw::QTAASEGUSER) { aaUserMat->setChecked(true); } else if(clustalw::subMatrix->getQTsegmentAAMatNum() == clustalw::QTAASEGGONNETPAM80) { gonnetPAM80->setChecked(true); } else if(clustalw::subMatrix->getQTsegmentAAMatNum() == clustalw::QTAASEGGONNETPAM120) { gonnetPAM120->setChecked(true); } else if(clustalw::subMatrix->getQTsegmentAAMatNum() == clustalw::QTAASEGGONNETPAM250) { gonnetPAM250->setChecked(true); } else if(clustalw::subMatrix->getQTsegmentAAMatNum() == clustalw::QTAASEGGONNETPAM350) { gonnetPAM350->setChecked(true); } } void LowScoringSegParams::setUpDNAMatRadioButtons() { DNAWeightMat = new QButtonGroup; IUB = new QRadioButton("IUB"); clustal = new QRadioButton("CLUSTALW(1.6)"); dnaUserMat = new QRadioButton("User defined"); connect(dnaUserMat, SIGNAL(clicked()), this, SLOT(userDNAClicked())); DNAWeightMat->addButton(IUB); DNAWeightMat->addButton(clustal); DNAWeightMat->addButton(dnaUserMat); } void LowScoringSegParams::setInitialDNAMat() { if(clustalw::subMatrix->getQTsegmentDNAMatNum() == clustalw::DNAIUB) { IUB->setChecked(true); } else if(clustalw::subMatrix->getQTsegmentDNAMatNum() == clustalw::DNACLUSTALW) { clustal->setChecked(true); } else if(clustalw::subMatrix->getQTsegmentDNAMatNum() == clustalw::DNAUSERDEFINED) { dnaUserMat->setChecked(true); } } void LowScoringSegParams::setUpOtherParams() { minLenSegs = new QSlider(Qt::Horizontal); minLenSegs->setRange(1, 20); minLenSegsLabel = new QLabel(tr("Minimum Length of Segments ")); minLenSegsLCD = new QLCDNumber; connect(minLenSegs, SIGNAL(valueChanged(int)), minLenSegsLCD, SLOT(display(int))); dnaMarkingScale = new QSlider(Qt::Horizontal); dnaMarkingScale->setRange(1, 10); dnaMarkingScaleLabel = new QLabel(tr("DNA Marking Scale ")); dnaMarkingScaleLCD = new QLCDNumber; connect(dnaMarkingScale, SIGNAL(valueChanged(int)), dnaMarkingScaleLCD, SLOT(display(int))); loadProtMat = new QPushButton(tr("Load protein matrix:")); loadProtMat->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(loadProtMat, SIGNAL(clicked()), this, SLOT(browseProtein())); loadProteinGroup = new QGroupBox; horizLayoutAA = new QHBoxLayout; userProteinMatrix = new QLabel(""); horizLayoutAA->addWidget(loadProtMat); horizLayoutAA->addWidget(userProteinMatrix); loadProteinGroup->setLayout(horizLayoutAA); loadDNAMat = new QPushButton(tr("Load DNA matrix:")); loadDNAMat->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(loadDNAMat, SIGNAL(clicked()), this, SLOT(browseDNA())); loadDNAGroup = new QGroupBox; horizLayoutDNA = new QHBoxLayout; userDNAMatrix = new QLabel(""); horizLayoutDNA->addWidget(loadDNAMat); horizLayoutDNA->addWidget(userDNAMatrix); loadDNAGroup->setLayout(horizLayoutDNA); } void LowScoringSegParams::setInitialParamValues() { minLenSegs->setValue(clustalw::userParameters->getQTminLenLowScoreSegment()); minLenSegsLCD->display(minLenSegs->value()); dnaMarkingScale->setValue(clustalw::userParameters->getQTlowScoreDNAMarkingScale()); dnaMarkingScaleLCD->display(dnaMarkingScale->value()); } void LowScoringSegParams::radioSetUsingUserProtein() { aaUserMat->setChecked(true); } void LowScoringSegParams::browseProtein() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; aaMatrixPath = fd.getOpenFileName(this, tr("Find Protein Matrix")); string path = aaMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getQTLowScoreMatFromFile(userFile, false)) { aaMatrixFile = string(userFile); proteinMatLoaded = true; userProteinMatrix->setText(aaMatrixFile.c_str()); radioSetUsingUserProtein(); } else { aaMatrixFile = ""; proteinMatLoaded = false; userProteinMatrix->setText(aaMatrixFile.c_str()); setInitialProteinMat(); } delete []userFile; } else // pressed cancel { if(!proteinMatLoaded) { setInitialProteinMat(); } } } void LowScoringSegParams::browseDNA() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; dnaMatrixPath = fd.getOpenFileName(this, tr("Find DNA Matrix")); string path = dnaMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getQTLowScoreMatFromFile(userFile, true)) { dnaMatrixFile = string(userFile); dnaMatLoaded = true; userDNAMatrix->setText(dnaMatrixFile.c_str()); dnaUserMat->setChecked(true); } else { dnaMatrixFile = ""; dnaMatLoaded = false; setInitialDNAMat(); userDNAMatrix->setText(dnaMatrixFile.c_str()); } delete []userFile; } else // pressed cancel { if(!dnaMatLoaded) { setInitialDNAMat(); } } } void LowScoringSegParams::accept() { clustalw::userParameters->setQTminLenLowScoreSegment(minLenSegs->value()); clustalw::userParameters->setQTlowScoreDNAMarkingScale(dnaMarkingScale->value()); if(aaUserMat->isChecked()) { clustalw::subMatrix->setQTsegmentAAMatNum(clustalw::QTAASEGUSER); } else if(gonnetPAM80->isChecked()) { clustalw::subMatrix->setQTsegmentAAMatNum(clustalw::QTAASEGGONNETPAM80); } else if(gonnetPAM120->isChecked()) { clustalw::subMatrix->setQTsegmentAAMatNum(clustalw::QTAASEGGONNETPAM120); } else if(gonnetPAM250->isChecked()) { clustalw::subMatrix->setQTsegmentAAMatNum(clustalw::QTAASEGGONNETPAM250); } else if(gonnetPAM350->isChecked()) { clustalw::subMatrix->setQTsegmentAAMatNum(clustalw::QTAASEGGONNETPAM350); } if(IUB->isChecked()) { clustalw::subMatrix->setQTsegmentDNAMatNum(clustalw::DNAIUB); } else if(clustal->isChecked()) { clustalw::subMatrix->setQTsegmentDNAMatNum(clustalw::DNACLUSTALW); } else if(dnaUserMat->isChecked()) { clustalw::subMatrix->setQTsegmentDNAMatNum(clustalw::DNAUSERDEFINED); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/KeyController.h0000644000175000017500000000231711470725216015766 0ustar ddineenddineen/** * Implements a singleton that maintains keyboard state. The single instance * is (re)instantiated on demand like: * KeyController *kbd = KeyController::Instance(); * * 25-04-07,Nigel Brown(EMBL): created. */ #ifndef KEYCONTROLLER_H #define KEYCONTROLLER_H #include #include #include class KeyController : public QObject { public: /* return the KeyController singleton */ static KeyController *Instance(); /* resetters */ void clear() { keyShift = keyCtrl = false; }; void clearShift() { keyShift = false; }; void clearCtrl() { keyCtrl = false; }; /* setters */ bool consumeKeyPress(int key); bool consumeKeyRelease(int key); /* testers */ bool shiftPressed() { return keyShift; } bool shiftReleased() { return !keyShift; } bool ctrlPressed() { return keyCtrl; } bool ctrlReleased() { return !keyCtrl; } protected: /* hide the constructors */ KeyController(); KeyController(const KeyController&); KeyController& operator= (const KeyController&); //- bool eventFilter(QObject *o, QEvent *e); private: /* keyboard state */ bool keyShift; bool keyCtrl; }; #endif //KEYCONTROLLER_H clustalx-2.1/KeyController.cpp0000644000175000017500000000454011470725216016321 0ustar ddineenddineen/** * Implements a singleton that maintains keyboard state. The single instance * is (re)instantiated on demand like: * KeyController *kbd = KeyController::Instance(); * * 25-04-07,Nigel Brown(EMBL): created. */ #include "KeyController.h" #include #include #include #include using namespace std; //return the sole instance KeyController *KeyController::Instance() { static KeyController instance; return &instance; } KeyController::KeyController() : keyShift(false), keyCtrl(false) { //- qApp->installEventFilter(this); } bool KeyController::consumeKeyPress(int key) { switch(key) { case Qt::Key_Shift: //printf("KeyController: shift down\n"); return keyShift = true; case Qt::Key_Control: //printf("KeyController: ctrl down\n"); return keyCtrl = true; } return false; } bool KeyController::consumeKeyRelease(int key) { switch(key) { case Qt::Key_Shift: //printf("KeyController: shift up\n"); return !(keyShift = false); case Qt::Key_Control: //printf("KeyController: ctrl up\n"); return !(keyCtrl = false); } return false; } /* nige: another approach - overkill! */ //- //- bool KeyController::eventFilter(QObject *, QEvent *e) //- { //- //printf("KeyController::eventFilter(%0x, %d)\n", (int)this, e->type()); //- //- /* nige: detect shift and ctrl keys BEFORE this widget gets focus */ //- if (e->type() == QEvent::KeyPress) { //- QKeyEvent *keyEvent = static_cast(e); //- if (keyEvent->key() == Qt::Key_Shift) { //- cout << "shift pressed\n"; //- keyShift = true; //- } //- if (keyEvent->key() == Qt::Key_Control) { //- cout << "ctrl pressed\n"; //- keyCtrl = true; //- } //- } //- //- /* nige: detect shift and ctrl keys BEFORE this widget gets focus */ //- if (e->type() == QEvent::KeyRelease) { //- QKeyEvent *keyEvent = static_cast(e); //- if (keyEvent->key() == Qt::Key_Shift) { //- cout << "shift released\n"; //- keyShift = false; //- } //- if (keyEvent->key() == Qt::Key_Control) { //- cout << "ctrl released\n"; //- keyCtrl = false; //- } //- } //- //- return false; //- } clustalx-2.1/HistogramWidget.h0000644000175000017500000000357511470725216016302 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 16-01-07,Nigel Brown(EMBL): added drawContents() called by paintEvent(). * * 20-02-07,Nigel Brown(EMBL): constructor and updateBoxSize() parameters * reordered. * * 12-4-07, Mark Larkin : Changed destructor. We dont need to delete QObjects. * * 16-04-07,Nigel Brown(EMBL): removed destructor; arrayOfHeights not * allocated here. ****************************************************************************/ #ifndef HISTOGRAMWIDGET_H #define HISTOGRAMWIDGET_H #include class QImage; //class QPixmap; #include // Mark 20-4-2007 class QPoint; class QSize; class QString; class QWidget; #include #include /** * The HistogramWidget class displays a graphical histogram. It also displays a ruler. */ class HistogramWidget : public QWidget { Q_OBJECT public: HistogramWidget(int boxWidth, int boxSize, QWidget *parent = 0); QSize sizeHint() const; void addHistogramInfo(std::vector* histogramValues); void clearHistogram(); const std::vector* getColumnScores(){return arrayOfHeights;} public slots: void updateBoxSize(int boxWidth, int boxSize); void updateSizeHint(); signals: void characterSelected(const QString &character); protected: void paintEvent(QPaintEvent *event); void drawContents(QPainter *p, int cx, int cy, int cw, int ch); //nige //void mousePressEvent(QMouseEvent *event); //nige test private: void calculateHistogramBars(); QFont rulerFont; int alignLength; int heightAllowed; int boxSize; int boxWidth; float heightOfBox; std::vector* arrayOfHeights; QVector histogramBars; int displayStartPos; int offSetFromTop; int extraWhiteSpace; QColor *rulerColor; int lengthSmallBar; int lengthLargeBar; bool isSet; }; #endif clustalx-2.1/HistogramWidget.cpp0000644000175000017500000002342411470725216016630 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 16-01-07,Nigel Brown(EMBL): added drawContents() called by paintEvent(). * * 31-01-07,Nigel Brown(EMBL): changed && to || in updateBoxSize(). Changed * drawContents() to paint histogram 'bars' before writing ruler text as this * was effectively clipped by the histogram bar on the next pass. * * 06-02-07,Nigel Brown(EMBL): darker histogram bars for Mac. * * 20-02-07,Nigel Brown(EMBL): constructor and updateBoxSize() parameters * reordered. * * 12-4-07, Mark Larkin : Changed destructor. We dont need to delete QObjects. * * 16-04-07,Nigel Brown(EMBL): removed destructor; arrayOfHeights not * allocated here. ****************************************************************************/ #include "HistogramWidget.h" #include #include //#include #include #include #include #include #include // next two only needed for cerr/debugging //#include //using namespace std; /** * This is the constructor for a HistogramWidget. * @param histogramValues[] The array of histogram values to be displayed. * @param length The length of the alignment. * @param boxSize The width of the histogram bars. * @param *parent The parent of this widget. This parameter is optional. */ HistogramWidget::HistogramWidget(int _boxWidth, int _boxSize, QWidget *parent) : QWidget(parent) { /* * Set up the initial values. */ alignLength = 0; arrayOfHeights = 0; isSet = false; boxSize = _boxSize; boxWidth = _boxWidth; heightOfBox = 1; //histogramImage = 0; displayStartPos = 0; extraWhiteSpace = 5; lengthSmallBar = 1; lengthLargeBar = 3; setBackgroundRole(QPalette::Base); /* * Create a color to display the ruler in. */ rulerColor = new QColor; rulerColor->setRgb(192,215,233); offSetFromTop = 10; heightAllowed = 50; updateSizeHint(); } /** * This function overrides the parents sizeHint function. It gives an idea of * the size this widget needs. */ QSize HistogramWidget::sizeHint() const { //(void) printf("HistogramWidget::sizeHint(w=%d,h=%d) <- [ha=%d, hob=%d, ews=%d]\n", // alignLength * boxWidth, (heightAllowed * heightOfBox) + extraWhiteSpace, // heightAllowed, heightOfBox, extraWhiteSpace); return QSize(alignLength * boxWidth, (heightAllowed * heightOfBox) + extraWhiteSpace); } /** * This function will be called every time the alignment may have changed size. */ void HistogramWidget::updateSizeHint() { resize(alignLength * boxWidth, (heightAllowed * heightOfBox) + extraWhiteSpace); update(); } void HistogramWidget::addHistogramInfo(std::vector* histogramValues) { arrayOfHeights = histogramValues; alignLength = arrayOfHeights->size(); /* bug 143: we re-use these scores for the column quality, i.e. don't reduce to 99 HERE 99 seems to be needed, otherwise histogram display is messed up // Check each element is <= 100 for(int i = 0; i < alignLength; i++) { if((*arrayOfHeights)[i] >= 100) { (*arrayOfHeights)[i] = 99; // Make sure 99 is the maximum value. } } */ isSet = true; updateSizeHint(); calculateHistogramBars(); } void HistogramWidget::clearHistogram() { isSet = false; arrayOfHeights = 0; alignLength = 0; updateSizeHint(); } /** * This function is an event handler. It is called automatically when the widget * needs to be repainted. * It draws the histogram and the ruler from the appropriate start and end positions. * @param *event A paint event object. */ void HistogramWidget::paintEvent(QPaintEvent *event) { QPainter painter(this); QRect rect = event->rect(); int cx = rect.x(); int cy = rect.y(); int cw = rect.width(); int ch = rect.height(); //(void) printf("HistogramWidget::paintEvent(%d,%d, %d,%d)\n", cx,cy, cw,ch); drawContents(&painter, cx, cy, cw, ch); } void HistogramWidget::drawContents(QPainter *painter, int cx, int cy, int cw, int ch) { painter->fillRect(cx, cy, cw, ch, QBrush(Qt::blue)); painter->setBackgroundMode(Qt::OpaqueMode); //painter->setBackground(QBrush(Qt::white)); // Andreas Wilm (UCD):ff this is too small especially on high // resolutions. Calculate dynamically rulerFont.setPixelSize(8); painter->setFont(rulerFont); QFontMetrics fontMetrics(rulerFont); int offSetForRuler = fontMetrics.height() - 1; // Set up the begin and end columns int beginColumn = cx / boxWidth; int endColumn = (cx+cw-1) / boxWidth; int index = 0; int rulerNum; int xPos; int yPos = 0; //(void) printf("HistogramWidget::drawContents(x=%d,y=%d, w=%d,h=%d)(bw=%d,bh=%d) -> [bc=%d,ec=%d]\n", // cx,cy, cw,ch, boxWidth,boxSize, beginColumn,endColumn); if(isSet && arrayOfHeights) { //31-01-07,nige: the histogram vertical stripes must be painted first //otherwise the current stripe may paint over the last ruler text. for (int column = beginColumn; column <= endColumn; ++column) { if(column >= 0 && column < arrayOfHeights->size()) { index = (*arrayOfHeights)[column]; // bug 143: dynamically set to 99 here (see above) // otherwise histogram is messed up if (index>=100) index=99; if(index >= 0 && index < histogramBars.size() && !histogramBars[index].isNull()) { painter->drawPixmap((column) * boxWidth, 0, histogramBars[index]); } } } for (int column = beginColumn; column <= endColumn; ++column) { if(column >= 0 && column < arrayOfHeights->size()) { rulerNum = column + 1; if((rulerNum % 10) == 0) { painter->drawLine(column * boxWidth + (boxWidth / 2), offSetForRuler, column * boxWidth + (boxWidth / 2), offSetForRuler + lengthLargeBar); xPos = column * boxWidth + (boxWidth / 2) - (fontMetrics.width(QString::number(rulerNum))) / 2; if(rulerNum >= 10000) { painter->drawText(xPos, yPos, 7 * boxWidth, boxSize, Qt::TextSingleLine, QString::number(rulerNum)); } else if(rulerNum >= 1000 ) { painter->drawText(xPos, yPos, 6 * boxWidth, boxSize, Qt::TextSingleLine, QString::number(rulerNum)); } else if(rulerNum >= 100 ) { painter->drawText(xPos, yPos, 5 * boxWidth, boxSize, Qt::TextSingleLine, QString::number(rulerNum)); } else if(rulerNum >= 10 ) { painter->drawText(xPos, yPos, 3 * boxWidth, boxSize, Qt::TextSingleLine, QString::number(rulerNum)); } else { painter->drawText(xPos, yPos, boxWidth, boxSize, Qt::TextSingleLine, QString::number(rulerNum)); } } else { if(rulerNum == 1) { painter->drawText((boxWidth / 2) - (fontMetrics.width(QString::number(0))) / 2, yPos, boxWidth, boxSize, Qt::TextSingleLine, QString::number(rulerNum)); } painter->drawLine(column * boxWidth + (boxWidth / 2), offSetForRuler, column * boxWidth + (boxWidth / 2), offSetForRuler + lengthSmallBar); } } } } } void HistogramWidget::updateBoxSize(int _boxWidth, int _boxSize) { if (boxWidth != _boxWidth || boxSize != _boxSize) { boxWidth = _boxWidth; boxSize = _boxSize; resize(alignLength * boxWidth, (heightAllowed * heightOfBox) + extraWhiteSpace); calculateHistogramBars(); update(); } } void HistogramWidget::calculateHistogramBars() { histogramBars.clear(); histogramBars.resize(100); int heightOfBar; for(int i = 0; i < histogramBars.size(); i++) { QPoint origin(0, height()); histogramBars[i] = QPixmap(boxWidth, height()); heightOfBar = (i * heightOfBox) / 3; QPainter painter(&histogramBars[i]); painter.fillRect(0, 0, boxWidth, height(), QBrush(Qt::white)); painter.translate(origin); #if OS_MAC painter.fillRect(0, 0, boxWidth, -heightOfBar, QBrush(Qt::gray)); #else painter.fillRect(0, 0, boxWidth, -heightOfBar, QBrush(Qt::lightGray)); #endif painter.fillRect(0, -height(), boxWidth, 11, QBrush(rulerColor->rgb())); } } /* TEST void HistogramWidget::mousePressEvent(QMouseEvent *event) //nige { if (event->button() == Qt::LeftButton) { QSize size = sizeHint(); printf("SIZEHINT: w=%d, h=%d\n", size.width(), size.height()); } else { QWidget::mousePressEvent(event); } } TEST end */ clustalx-2.1/HelpDisplayWidget.h0000644000175000017500000000206111470725216016550 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. Also removed * setAllPtrsToNull function. * * 24-05-07,Nigel Brown(EMBL): simplified file searching mechanism; removed * filePath, executablePath members; made helpFileName static; removed * executablePath argument to constructor. * * 06-06-07,Nigel Brown(EMBL): loadHelpInformation() now returns string *. */ #ifndef HELPDISPLAYWIDGET_H #define HELPDISPLAYWIDGET_H #include #include #include "clustalW/general/clustalw.h" #include "ClustalQtParams.h" class QPushButton; class QTextEdit; class QVBoxLayout; class HelpDisplayWidget : public QDialog { Q_OBJECT public: HelpDisplayWidget(char helpPointer); protected: private slots: void accept(); private: string *loadHelpInformation(char helpPointer); QVBoxLayout* mainLayout; QTextEdit *helpText; QPushButton* okButton; QString title; static const std::string helpFileName; static const int MinLineEditWidth = 250; }; #endif //HELPDISPLAYWIDGET_H clustalx-2.1/HelpDisplayWidget.cpp0000644000175000017500000001130511470725216017104 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. Also removed * setAllPtrsToNull function. * * 24-05-07,Nigel Brown(EMBL): simplified file searching mechanism; removed * filePath, executablePath members; made helpFileName static; removed * executablePath argument to constructor. Added Resources class to handle * file search. * * 06-06-07,Nigel Brown(EMBL): loadHelpInformation() now returns 'string *' * instead of 'void'; constructor tests this value to see if a help string was * successfully loaded then typesets it as HTML and scrolls to the top. */ #include "HelpDisplayWidget.h" #include #include #include #include #include "Resources.h" const string HelpDisplayWidget::helpFileName = "clustalx.hlp"; HelpDisplayWidget::HelpDisplayWidget(char helpPointer) { setWindowTitle("Help"); helpText = new QTextEdit; helpText->setReadOnly(true); helpText->setMinimumWidth(500); helpText->setMinimumHeight(400); setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("Ok")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Minimum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); mainLayout->addWidget(helpText); mainLayout->addWidget(okButton); setLayout(mainLayout); /* nige */ string *buff = loadHelpInformation(helpPointer); if (buff) { //cout << buff->c_str() << endl; helpText->setHtml(QString(buff->c_str())); //nige: scroll to top of text QTextCursor tc = helpText->textCursor(); tc.movePosition(QTextCursor::Start); helpText->setTextCursor(tc); delete buff; } } string *HelpDisplayWidget::loadHelpInformation(char helpPointer) { ifstream helpFile; int i, number, nlines; bool foundHelp; char templine[MAXLINE + 1]; // To be used with get! char token = '\0'; string digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; string helpMarker = ">>HELP"; Resources *res = Resources::Instance(); string file = res->searchPathsForFile(helpFileName); //cout << "helpfile: " << file.c_str() << endl; //nige string *buff = new string(""); try { helpFile.open(file.c_str(), ifstream::in); if(!helpFile.is_open()) { clustalw::utilityObject->error("Cannot open File file [%s]\n", helpFileName.c_str()); return 0; } nlines = 0; number = -1; foundHelp = false; string tempS; while(!helpFile.eof()) { if(!helpFile.getline(templine, MAXLINE + 1)) { if(!foundHelp) { clustalw::utilityObject->error("No help found in help file\n"); } helpFile.close(); return 0; } tempS = string(templine); if(tempS.find(helpMarker, 0) != string::npos) // If found helpMarker! { token = ' '; for(i = helpMarker.length(); i < 8; i++) // MAGIC 8?????? { if(digits.find(tempS[i], 0) != string::npos) { token = tempS[i]; break; } } } if(token == helpPointer) { foundHelp = true; while(helpFile.getline(templine, MAXLINE + 1)) { tempS = string(templine); if(tempS.find(helpMarker, 0) != string::npos) // If found another help { //nige: section terminated by next section anchor helpFile.close(); *buff += "\n"; return buff; } //replace trailing newline on non-html if (tempS[0] != '<') { tempS += '\n'; } //cout << tempS.c_str() << endl; *buff += tempS; ++nlines; } //nige: last section terminated by EOF helpFile.close(); *buff += "\n"; return buff; } } } catch(ifstream::failure e) { return 0; } if(helpFile.is_open()) { helpFile.close(); } return 0; } void HelpDisplayWidget::accept() { setResult(QDialog::Accepted); hide(); } clustalx-2.1/HardCodedColorScheme.h0000644000175000017500000000064011470725216017130 0ustar ddineenddineen#ifndef HARDCODEDCOLORSCHEME_H #define HARDCODEDCOLORSCHEME_H #include #include #include #include #include "ClustalQtParams.h" class HardCodedColorScheme { public: HardCodedColorScheme(); ~HardCodedColorScheme(); const std::vector* getHardCodedScheme(){return colors;} private: std::vector* colors; }; #endif clustalx-2.1/HardCodedColorScheme.cpp0000644000175000017500000000304311470725216017463 0ustar ddineenddineen#include "HardCodedColorScheme.h" HardCodedColorScheme::HardCodedColorScheme() { // Set up the colors! colors = new std::vector; colors->resize(8); QColor colorToAdd; QString name; ColorNamePair pairToAdd; colorToAdd.setRgb(229, 25, 25); name = "RED"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[0] = pairToAdd; colorToAdd.setRgb(25, 25, 178); name = "BLUE"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[1] = pairToAdd; colorToAdd.setRgb(25, 229, 25); name = "GREEN"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[2] = pairToAdd; colorToAdd.setRgb(229, 153, 76); name = "ORANGE"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[3] = pairToAdd; colorToAdd.setRgb(25, 229, 229); name = "CYAN"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[4] = pairToAdd; colorToAdd.setRgb(229, 127, 127); name = "PINK"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[5] = pairToAdd; colorToAdd.setRgb(229, 25, 229); name = "MAGENTA"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[6] = pairToAdd; colorToAdd.setRgb(229, 229, 0); name = "YELLOW"; pairToAdd.color = colorToAdd; pairToAdd.name = name; (*colors)[7] = pairToAdd; } HardCodedColorScheme::~HardCodedColorScheme() { delete colors; } clustalx-2.1/FileDialog.h0000644000175000017500000000370611470725216015174 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * This widget was created to get around the problem that the QFileDialog cannot * remember the last directory visited. The QFileDialog doesnt know the last * directory it has been to. So I developed a wrapper class to make it easier. * It uses a global string defined in globalLastDir.h * * Changes: * * 03-02-07,Nigel Brown(EMBL): copied getOpenFileName() into two versions * getOpenDataFileName() and getOpenParamFileName() and changed lastDir to * lastDataDir and lastParamDir. These maintain two different working * directories for data files and for parameter files, respectively. * * 03-02-07,Nigel Brown(EMBL): changed getSaveFileName() to * getSaveDataFileName() which now takes a file suffix as a hint to the file * to be opened. Added lastBaseName to hold last data file basename (ie., no * suffix). Removed 'globalLastDir.h'dependency by making lastDataDir, * lastParamDir, lastBaseName static members. ****************************************************************************/ #ifndef FILEDIALOG_H #define FILEDIALOG_H #include #include class FileDialog : public QFileDialog { Q_OBJECT public: QString getOpenDataFileName(QWidget *parent = 0, const QString &caption = QString(), const QString &filter = QString(), QString *selectedFilter = 0); QString getOpenParamFileName(QWidget *parent = 0, const QString &caption = QString(), const QString &filter = QString(), QString *selectedFilter = 0); QString getSaveDataFileName(const QString &suffix = QString(), QWidget *parent = 0, const QString &caption = QString(), const QString &filter = QString(), QString *selectedFilter = 0); protected: private slots: private: static QString lastDataDir; static QString lastParamDir; static QString lastBaseName; }; #endif //FILEDIALOG_H clustalx-2.1/FileDialog.cpp0000644000175000017500000000664111470725216015530 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * This widget was created to get around the problem that the QFileDialog cannot * remember the last directory visited. The QFileDialog doesnt know the last * directory it has been to. So I developed a wrapper class to make it easier. * It uses a global string defined in globalLastDir.h * * Changes: * * 03-02-07,Nigel Brown(EMBL): copied getOpenFileName() into two versions * getOpenDataFileName() and getOpenParamFileName() and changed lastDir to * lastDataDir and lastParamDir. These maintain two different working * directories for data files and for parameter files, respectively. * * 03-02-07,Nigel Brown(EMBL): changed getSaveFileName() to * getSaveDataFileName() which now takes a file suffix as a hint to the file * to be opened. Added lastBaseName to hold last data file basename (ie., no * suffix). Removed 'globalLastDir.h'dependency by making lastDataDir, * lastParamDir, lastBaseName static members. ****************************************************************************/ #include "FileDialog.h" #include #include //#include //using namespace std; /* static initializations */ QString FileDialog::lastDataDir = QString(QDir::currentPath()); QString FileDialog::lastParamDir = QString(QDir::currentPath()); QString FileDialog::lastBaseName; /* static initializations end */ QString FileDialog::getOpenDataFileName(QWidget *parent, const QString &caption, const QString &filter, QString *selectedFilter) { if (lastDataDir == "") lastDataDir = QDir::currentPath(); QString filePath = QFileDialog::getOpenFileName(parent, caption, lastDataDir, filter, selectedFilter); if (filePath != "") { QFileInfo info(filePath); lastDataDir = info.absolutePath(); lastBaseName = info.completeBaseName(); //cout << "lastDataDir: " << lastDataDir.toStdString() << // " lastBaseName: " << lastBaseName.toStdString() << endl; } return filePath; } QString FileDialog::getOpenParamFileName(QWidget *parent, const QString &caption, const QString &filter, QString *selectedFilter) { if (lastParamDir == "") lastParamDir = QDir::currentPath(); QString filePath = QFileDialog::getOpenFileName(parent, caption, lastParamDir, filter, selectedFilter); if (filePath != "") { QFileInfo info(filePath); lastParamDir = info.absolutePath(); //cout << "lastParamDir: " << lastParamDir.toStdString() << endl; } return filePath; } QString FileDialog::getSaveDataFileName(const QString &suffix, QWidget *parent, const QString &caption, const QString &filter, QString *selectedFilter) { QString filePath; if (lastDataDir == "") lastDataDir = QDir::currentPath(); filePath.append(lastDataDir); if (lastBaseName != "") { filePath.append("/"); filePath.append(lastBaseName); } if (suffix != "") { filePath.append("."); filePath.append(suffix); } filePath = QFileDialog::getSaveFileName(parent, caption, filePath, filter, selectedFilter); if(filePath != "") { QFileInfo info(filePath); lastDataDir = info.absolutePath(); lastBaseName = info.completeBaseName(); //cout << "lastDataDir: " << lastDataDir.toStdString() << // " lastBaseName: " << lastBaseName.toStdString() << endl; } return filePath; } clustalx-2.1/ColumnScoreParams.h0000644000175000017500000000403011470725216016561 0ustar ddineenddineen#ifndef COLUMNSCOREPARAMS_H #define COLUMNSCOREPARAMS_H #include class QPushButton; class QGroupBox; class QRadioButton; class QComboBox; class QLabel; class QButtonGroup; class QSlider; class QLCDNumber; class QShowEvent; class QHBoxLayout; class QGridLayout; class ColumnScoreParams : public QDialog { Q_OBJECT public: ColumnScoreParams(); protected: void showEvent(QShowEvent* event); private slots: void accept(); void browseProtein(); void browseDNA(); void userProteinClicked(); void userDNAClicked(); private: void setUpLayout(); void setUpProteinMatRadioButtons(); void setUpDNAMatRadioButtons(); void setUpOtherParams(); void setUpLoadButtons(); void setInitialProteinMat(); void setInitialDNAMat(); void setInitialParamValues(); void radioSetUsingUserProtein(); QPushButton* okButton; QPushButton* loadProtMat; QPushButton* loadDNAMat; QGroupBox* sliderGridBox; QGroupBox* proteinMatGridBox; QGroupBox* dnaMatGridBox; QGroupBox* loadDNAGroup; QGroupBox* loadProteinGroup; QHBoxLayout* horizLayoutAA; QHBoxLayout* horizLayoutDNA; QGridLayout* grid; QGridLayout* protMatLayout; QGridLayout* dnaMatLayout; QButtonGroup* proteinWeightMat; QRadioButton* identity; QRadioButton* gonnetPAM80; QRadioButton* gonnetPAM120; QRadioButton* gonnetPAM250; QRadioButton* gonnetPAM350; QRadioButton* aaUserMat; QButtonGroup* DNAWeightMat; QRadioButton* IUB; QRadioButton* clustal; QRadioButton* dnaUserMat; QLabel* scorePlotScale; QLabel* residueExceptionCutoff; QSlider* plotScale; QSlider* cutOff; QLCDNumber* plotValue; QLCDNumber* cutOffValue; QString aaMatrixPath; QString dnaMatrixPath; QLabel* userProteinMatrix; QLabel* userDNAMatrix; std::string aaMatrixFile; std::string dnaMatrixFile; bool proteinMatLoaded; bool dnaMatLoaded; }; #endif clustalx-2.1/ColumnScoreParams.cpp0000644000175000017500000002542411470725216017126 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ColumnScoreParams.h" #include "clustalW/general/userparams.h" #include "clustalW/substitutionMatrix/globalmatrix.h" #include "FileDialog.h" ColumnScoreParams::ColumnScoreParams() { proteinMatLoaded = false; dnaMatLoaded = false; sliderGridBox = new QGroupBox; proteinMatGridBox = new QGroupBox(tr("Protein Weight Matrix")); dnaMatGridBox = new QGroupBox(tr("DNA Weight Matrix")); QVBoxLayout* mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpLayout(); mainLayout->addWidget(okButton); mainLayout->addWidget(sliderGridBox); mainLayout->addWidget(proteinMatGridBox); mainLayout->addWidget(loadProteinGroup); mainLayout->addWidget(dnaMatGridBox); mainLayout->addWidget(loadDNAGroup); setLayout(mainLayout); } void ColumnScoreParams::showEvent(QShowEvent* event) { setInitialProteinMat(); setInitialParamValues(); setInitialDNAMat(); } void ColumnScoreParams::setUpLayout() { setUpLoadButtons(); setUpOtherParams(); grid = new QGridLayout; grid->addWidget(scorePlotScale, 0, 0); grid->addWidget(plotValue, 0, 1); grid->addWidget(plotScale, 0, 2); grid->addWidget(residueExceptionCutoff, 1, 0); grid->addWidget(cutOffValue, 1, 1); grid->addWidget(cutOff, 1, 2); sliderGridBox->setLayout(grid); setUpProteinMatRadioButtons(); protMatLayout = new QGridLayout; protMatLayout->addWidget(identity, 0, 0); protMatLayout->addWidget(gonnetPAM80, 0, 1); protMatLayout->addWidget(gonnetPAM120, 0, 2); protMatLayout->addWidget(gonnetPAM250, 1, 0); protMatLayout->addWidget(gonnetPAM350, 1, 1); protMatLayout->addWidget(aaUserMat, 1, 2); proteinMatGridBox->setLayout(protMatLayout); setUpDNAMatRadioButtons(); dnaMatLayout = new QGridLayout; dnaMatLayout->addWidget(IUB, 0, 0); dnaMatLayout->addWidget(clustal, 0, 1); dnaMatLayout->addWidget(dnaUserMat, 1, 0); dnaMatGridBox->setLayout(dnaMatLayout); } void ColumnScoreParams::setUpProteinMatRadioButtons() { proteinWeightMat = new QButtonGroup; identity = new QRadioButton("Identity"); gonnetPAM80 = new QRadioButton("Gonnet PAM 80"); gonnetPAM120 = new QRadioButton("Gonnet PAM 120"); gonnetPAM250 = new QRadioButton("Gonnet PAM 250"); gonnetPAM350 = new QRadioButton("Gonnet PAM 350"); aaUserMat = new QRadioButton("User defined"); connect(aaUserMat, SIGNAL(clicked()), this, SLOT(userProteinClicked())); proteinWeightMat->addButton(identity); proteinWeightMat->addButton(gonnetPAM80); proteinWeightMat->addButton(gonnetPAM120); proteinWeightMat->addButton(gonnetPAM250); proteinWeightMat->addButton(gonnetPAM350); proteinWeightMat->addButton(aaUserMat); } void ColumnScoreParams::userProteinClicked() { if(!proteinMatLoaded) { browseProtein(); } } void ColumnScoreParams::setInitialProteinMat() { if(clustalw::subMatrix->getQTAAHistMatNum() == clustalw::AAHISTIDENTITY) { identity->setChecked(true); } else if(clustalw::subMatrix->getQTAAHistMatNum() == clustalw::AAHISTGONNETPAM80) { gonnetPAM80->setChecked(true); } else if(clustalw::subMatrix->getQTAAHistMatNum() == clustalw::AAHISTGONNETPAM120) { gonnetPAM120->setChecked(true); } else if(clustalw::subMatrix->getQTAAHistMatNum() == clustalw::AAHISTGONNETPAM250) { gonnetPAM250->setChecked(true); } else if(clustalw::subMatrix->getQTAAHistMatNum() == clustalw::AAHISTGONNETPAM350) { gonnetPAM350->setChecked(true); } else if(clustalw::subMatrix->getQTAAHistMatNum() == clustalw::AAHISTUSER) { aaUserMat->setChecked(true); } } void ColumnScoreParams::setUpDNAMatRadioButtons() { DNAWeightMat = new QButtonGroup; IUB = new QRadioButton("IUB"); clustal = new QRadioButton("CLUSTALW(1.6)"); dnaUserMat = new QRadioButton("User defined"); connect(dnaUserMat, SIGNAL(clicked()), this, SLOT(userDNAClicked())); DNAWeightMat->addButton(IUB); DNAWeightMat->addButton(clustal); DNAWeightMat->addButton(dnaUserMat); } void ColumnScoreParams::userDNAClicked() { if(!dnaMatLoaded) { browseDNA(); } } void ColumnScoreParams::setInitialDNAMat() { if(clustalw::subMatrix->getQTDNAHistMatNum() == clustalw::DNAIUB) { IUB->setChecked(true); } else if(clustalw::subMatrix->getQTDNAHistMatNum() == clustalw::DNACLUSTALW) { clustal->setChecked(true); } else if(clustalw::subMatrix->getQTDNAHistMatNum() == clustalw::DNAUSERDEFINED) { dnaUserMat->setChecked(true); } } void ColumnScoreParams::setUpOtherParams() { plotScale = new QSlider(Qt::Horizontal); plotScale->setRange(1, 10); scorePlotScale = new QLabel(tr("Score Plot Scale ")); plotValue = new QLCDNumber; connect(plotScale, SIGNAL(valueChanged(int)), plotValue, SLOT(display(int))); cutOff = new QSlider(Qt::Horizontal); cutOff->setRange(1, 10); residueExceptionCutoff = new QLabel(tr("Residue Exception Cutoff ")); cutOffValue = new QLCDNumber; connect(cutOff, SIGNAL(valueChanged(int)), cutOffValue, SLOT(display(int))); } void ColumnScoreParams::setUpLoadButtons() { loadProtMat = new QPushButton(tr("Load protein matrix:")); loadProtMat->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(loadProtMat, SIGNAL(clicked()), this, SLOT(browseProtein())); loadProteinGroup = new QGroupBox; horizLayoutAA = new QHBoxLayout; userProteinMatrix = new QLabel(""); horizLayoutAA->addWidget(loadProtMat); horizLayoutAA->addWidget(userProteinMatrix); loadProteinGroup->setLayout(horizLayoutAA); loadDNAMat = new QPushButton(tr("Load DNA matrix:")); loadDNAMat->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(loadDNAMat, SIGNAL(clicked()), this, SLOT(browseDNA())); loadDNAGroup = new QGroupBox; horizLayoutDNA = new QHBoxLayout; userDNAMatrix = new QLabel(""); horizLayoutDNA->addWidget(loadDNAMat); horizLayoutDNA->addWidget(userDNAMatrix); loadDNAGroup->setLayout(horizLayoutDNA); } void ColumnScoreParams::setInitialParamValues() { plotScale->setValue(clustalw::userParameters->getQTScorePlotScale()); plotValue->display(plotScale->value()); cutOff->setValue(clustalw::userParameters->getQTResExceptionCutOff()); cutOffValue->display(cutOff->value()); } void ColumnScoreParams::browseProtein() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; aaMatrixPath = fd.getOpenFileName(this, tr("Find Protein Matrix")); string path = aaMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getAAScoreMatFromFile(userFile)) { aaMatrixFile = string(userFile); proteinMatLoaded = true; userProteinMatrix->setText(aaMatrixFile.c_str()); radioSetUsingUserProtein(); } else { aaMatrixFile = ""; proteinMatLoaded = false; userProteinMatrix->setText(aaMatrixFile.c_str()); setInitialProteinMat(); } delete []userFile; } else // pressed cancel { if(!proteinMatLoaded) { setInitialProteinMat(); } } } void ColumnScoreParams::browseDNA() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; dnaMatrixPath = fd.getOpenFileName(this, tr("Find DNA Matrix")); string path = dnaMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getDNAScoreMatFromFile(userFile)) { dnaMatrixFile = string(userFile); dnaMatLoaded = true; userDNAMatrix->setText(dnaMatrixFile.c_str()); dnaUserMat->setChecked(true); } else { dnaMatrixFile = ""; dnaMatLoaded = false; setInitialDNAMat(); userDNAMatrix->setText(dnaMatrixFile.c_str()); } delete []userFile; } else // pressed cancel { if(!dnaMatLoaded) { setInitialDNAMat(); } } } void ColumnScoreParams::radioSetUsingUserProtein() { aaUserMat->setChecked(true); } void ColumnScoreParams::accept() { clustalw::userParameters->setQTScorePlotScale(plotScale->value()); clustalw::userParameters->setQTResExceptionCutOff(cutOff->value()); //set amino acid histogram matrix if(identity->isChecked()) { clustalw::subMatrix->setQTAAHistMatNum(clustalw::AAHISTIDENTITY); } else if(gonnetPAM80->isChecked()) { clustalw::subMatrix->setQTAAHistMatNum(clustalw::AAHISTGONNETPAM80); } else if(gonnetPAM120->isChecked()) { clustalw::subMatrix->setQTAAHistMatNum(clustalw::AAHISTGONNETPAM120); } else if(gonnetPAM250->isChecked()) { clustalw::subMatrix->setQTAAHistMatNum(clustalw::AAHISTGONNETPAM250); } else if(gonnetPAM350->isChecked()) { clustalw::subMatrix->setQTAAHistMatNum(clustalw::AAHISTGONNETPAM350); } else if(aaUserMat->isChecked()) { clustalw::subMatrix->setQTAAHistMatNum(clustalw::AAHISTUSER); } // set DNA histogram matrix if(IUB->isChecked()) { clustalw::subMatrix->setQTDNAHistMatNum(clustalw::DNAIUB); } else if(clustal->isChecked()) { clustalw::subMatrix->setQTDNAHistMatNum(clustalw::DNACLUSTALW); } else if(dnaUserMat->isChecked()) { clustalw::subMatrix->setQTDNAHistMatNum(clustalw::DNAUSERDEFINED); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/ColorParameters.h0000644000175000017500000000306411470725216016274 0ustar ddineenddineen#ifndef COLORPARAMETERS_H #define COLORPARAMETERS_H #include #include #include #include #include #include #include "HardCodedColorScheme.h" class ColorParameters { public: ColorParameters(); void addColor(QString colorName, int red, int green, int blue); void addConsensusCondition(QChar name, int cutOff, QString res); void addColorRule(QChar res, QString colorName, QString conditions); void clear(); int getNumColors(){return colors.size();}; void useHardCodedColors(); void useDefaultColorRules(); QColor getResidueColor(QChar res, QChar consensus); int getResidueColorNum(QChar res, QChar consensus); QColor getColorFromNum(int colorNum); QColor getDefaultColor(){return defaultColor;} int getNumConsensusParams(); int getDefaultIndex(){return 0;} std::vector* getConPar(){return &conPar;} std::vector getColorRules(){return colorPar;} void addAllRulesTogether(std::vector colPar); void addConsensusParams(std::vector _conPar){conPar = _conPar;} void printColorInfo(); private: int getIndexOfColor(QColor col); //std::map colors; // Stores the colors from @rgbindex OR hard coded!! std::vector >colors; std::vector colorPar; std::vector conPar; QColor defaultColor; }; #endif clustalx-2.1/ColorParameters.cpp0000644000175000017500000001554111470725216016632 0ustar ddineenddineen#include "ColorParameters.h" #include "ClustalQtParams.h" #include #include #include #include "clustalW/general/userparams.h" ColorParameters::ColorParameters() { defaultColor = QColor(Qt::white); } void ColorParameters::useDefaultColorRules() { bool isDNA = clustalw::userParameters->getDNAFlag(); // Clear the colorPar map colorPar.clear(); QString redRes; QString orangeRes; QString blueRes; QString greenRes; if(!isDNA) { // Default simple protein colors!!!!! redRes = "krh"; orangeRes = "gpst"; blueRes = "fwy"; greenRes = "ilmv"; } else { // Default simple DNA colors!!! redRes = "a"; orangeRes = "c"; blueRes = "tu"; greenRes = "g"; } // Now add the rules!!!!!!!!!!!! for(int i = 0; i < redRes.length(); i++) { addColorRule(redRes[i], "RED", ""); } for(int i = 0; i < orangeRes.length(); i++) { addColorRule(orangeRes[i], "ORANGE", ""); } for(int i = 0; i < blueRes.length(); i++) { addColorRule(blueRes[i], "BLUE", ""); } for(int i = 0; i < greenRes.length(); i++) { addColorRule(greenRes[i], "GREEN", ""); } } void ColorParameters::addColor(QString colorName, int red, int green, int blue) { QColor colorToAdd(red, green, blue); colors.push_back(std::pair(colorName, colorToAdd)); } void ColorParameters::addConsensusCondition(QChar name, int cutOff, QString res) { consensusParams paramToAdd; paramToAdd.consensusName = name; paramToAdd.cutOffPercent = cutOff; paramToAdd.cutOffList = res; paramToAdd.length = res.length(); conPar.push_back(paramToAdd); } void ColorParameters::addColorRule(QChar res, QString colorName, QString conditions) { residueColors paramToAdd; paramToAdd.residue = res; paramToAdd.consensusConditionList = conditions; paramToAdd.length = conditions.length(); paramToAdd.colorName = colorName; if(paramToAdd.length > 0) { paramToAdd.type = Compound; } else { paramToAdd.type = Simple; } //std::map::iterator colorIterator; std::vector >::iterator colorIterator; colorIterator = colors.begin();//colors.find(colorName); while(colorIterator != colors.end()) { if(colorIterator->first == colorName) { break; } colorIterator++; } if(colorIterator == colors.end()) { // If it didnt find the color name in the map!!!! paramToAdd.color = QColor(defaultColor); } else { paramToAdd.color = QColor(colorIterator->second); } colorPar.push_back(paramToAdd); } void ColorParameters::addAllRulesTogether(std::vector colPar) { // Clear old rules colorPar.clear(); // add each color std::vector::iterator begin = colPar.begin(); std::vector::iterator end = colPar.end(); for(; begin != end; begin++) { addColorRule(begin->residue, begin->colorName, begin->consensusConditionList); } } /** * If we cannot find the @rgbindex tag in the file, we use the default colors. * These colors are specified in the HardCodedColorScheme class. */ void ColorParameters::useHardCodedColors() { HardCodedColorScheme hardCodedColors; colors.clear(); const std::vector* colorScheme = hardCodedColors.getHardCodedScheme(); int numOfColors = colorScheme->size(); for(int i = 0; i< numOfColors; i++) { colors.push_back(std::pair(QString((*colorScheme)[i].name), QColor((*colorScheme)[i].color))); } } QColor ColorParameters::getResidueColor(QChar res, QChar consensus) { int i, j; QChar colorParRes; char showRes = res.toAscii(); char showCon = consensus.toAscii(); for(i = 0; i < colorPar.size(); i++) { colorParRes = colorPar[i].residue; if(res.toLower() == colorParRes.toLower()) { if(colorPar[i].type == Simple) { return colorPar[i].color; } else if(colorPar[i].type == Compound) { for(j = 0; j < colorPar[i].consensusConditionList.size(); j++) { char temp = colorPar[i].consensusConditionList[j].toAscii(); if(consensus == colorPar[i].consensusConditionList[j]) { return colorPar[i].color; } } } else { return defaultColor; } } } return defaultColor; } int ColorParameters::getResidueColorNum(QChar res, QChar consensus) { int i, j; QChar colorParRes; char showRes = res.toAscii(); char showCon = consensus.toAscii(); for(i = 0; i < colorPar.size(); i++) { colorParRes = colorPar[i].residue; if(res.toLower() == colorParRes.toLower()) { if(colorPar[i].type == Simple) { return getIndexOfColor(colorPar[i].color); } else if(colorPar[i].type == Compound) { for(j = 0; j < colorPar[i].consensusConditionList.size(); j++) { char temp = colorPar[i].consensusConditionList[j].toAscii(); if(consensus == colorPar[i].consensusConditionList[j]) { return getIndexOfColor(colorPar[i].color); } } } else { return 0; } } } return 0; } void ColorParameters::printColorInfo() { for(int i = 0; i < colors.size(); i++) { cout << "Color index = " << i + 1 << " Color name is " << colors[i].first.toStdString() << "\n"; } } int ColorParameters::getIndexOfColor(QColor col) { for(int i = 0; i < colors.size(); i++) { if(colors[i].second == col) { //cout << "the color is " << colors[i].first.toStdString() << "\n"; return i + 1; } } return 0; // Not found } QColor ColorParameters::getColorFromNum(int colorNum) { if(colorNum > 0 && colorNum - 1 < colorPar.size()) { //cout << "color " << colorNum << " is " // << colors[colorNum - 1].second return colors[colorNum - 1].second; } else { return defaultColor; } } int ColorParameters::getNumConsensusParams() { return conPar.size(); } void ColorParameters::clear() { colors.clear(); colorPar.clear(); conPar.clear(); } clustalx-2.1/ColorFileXmlParser.h0000644000175000017500000000154711470725216016712 0ustar ddineenddineen#ifndef COLORFILEXMLPARSER_H #define COLORFILEXMLPARSER_H #include #include #include #include #include "ColorParameters.h" class ColorFileXmlParser : public QWidget { Q_OBJECT public: ColorFileXmlParser(bool showMess = true); bool read(QIODevice *device); ColorParameters* getColorParametersObj(){return &colorParams;} private slots: private: int parseRGBIndex(const QDomElement &element, QTreeWidgetItem *parentItem = 0); void parseConsensus(const QDomElement &element, QTreeWidgetItem *parentItem = 0); int parseColorRule(const QDomElement &element, QTreeWidgetItem *parentItem = 0); QDomDocument domDocument; ColorParameters colorParams; bool showMessages; }; #endif clustalx-2.1/ColorFileXmlParser.cpp0000644000175000017500000001432011470725216017236 0ustar ddineenddineen#include #include "ColorFileXmlParser.h" #include "ClustalQtParams.h" ColorFileXmlParser::ColorFileXmlParser(bool showMess) { showMessages = showMess; } bool ColorFileXmlParser::read(QIODevice *device) { colorParams.clear(); QString errorStr; int errorLine; int errorColumn; if (!domDocument.setContent(device, true, &errorStr, &errorLine, &errorColumn)) { if(showMessages) { QMessageBox::information(window(), tr("Color File Parser"), tr("Parse error at line %1, column %2:\n%3") .arg(errorLine) .arg(errorColumn) .arg(errorStr)); } return false; } QDomElement root = domDocument.documentElement(); if (root.tagName() != "colorparam") { if(showMessages) { QMessageBox::information(window(), tr("Color File"), tr("The file is not a color parameter file.")); } return false; } int numColors = 0; QDomElement rgbIndex = root.firstChildElement("rgbindex"); if (!rgbIndex.isNull()) { numColors = parseRGBIndex(rgbIndex); } if(numColors == 0) { // We use the hard coded color index!!!!! colorParams.useHardCodedColors(); } QDomElement consensus = root.firstChildElement("consensus"); if (!consensus.isNull()) // NOTE optional! { parseConsensus(consensus); } QDomElement colorRules = root.firstChildElement("colorrules"); int numOfRules = 0; if (!colorRules.isNull()) { numOfRules = parseColorRule(colorRules); } if(numOfRules == 0) { // So we need to use the default colors specified!!!!!!!!!! if(showMessages) { //QMessageBox::warning(this, tr("Warning"), // tr("colorrules not found in the parameter file, using\ // defaults!\n")); } colorParams.useDefaultColorRules(); } return true; } int ColorFileXmlParser::parseRGBIndex(const QDomElement &element, QTreeWidgetItem *parentItem) { int numColors = 0; QString colorName; QString redString, greenString, blueString; int red, green, blue; bool ok1, ok2, ok3; int numColorsSoFar = 0; // parse the colors!!!!!!! QDomElement color = element.firstChildElement("color"); while (!color.isNull()) { numColorsSoFar = colorParams.getNumColors(); if(numColorsSoFar == MaxColors) { if(showMessages) { QMessageBox::warning(this, tr("Warning"), tr("Max number of colors is %1. Not using the rest!\n") .arg(MaxColors)); } return numColors; } // get each colors information!!!!!!! if(color.hasAttribute("name") && color.hasAttribute("red") && color.hasAttribute("green") && color.hasAttribute("blue")) { colorName = color.attribute("name"); redString = color.attribute("red"); greenString = color.attribute("green"); blueString = color.attribute("blue"); if(colorName.length() > 0 && redString.length() > 0 && greenString.length() > 0 && blueString.length() > 0) { red = redString.toInt(&ok1); green = greenString.toInt(&ok2); blue = blueString.toInt(&ok3); if(ok1 && ok2 && ok3) { colorParams.addColor(colorName, red, green, blue); numColors++; } } } color = color.nextSiblingElement("color"); } return numColors; } void ColorFileXmlParser::parseConsensus(const QDomElement &element, QTreeWidgetItem *parentItem) { QString conditionNameString; QChar name; int cutOffPercent; QString residues; QString temp; bool ok; QDomElement condition = element.firstChildElement("condition"); while (!condition.isNull()) { // get each colors information!!!!!!! if(condition.hasAttribute("name") && condition.hasAttribute("cutoffpercent") && condition.hasAttribute("residues")) { conditionNameString = condition.attribute("name"); temp = condition.attribute("cutoffpercent"); residues = condition.attribute("residues"); if(conditionNameString.length() > 0 && temp.length() > 0 && residues.length() > 0) { name = conditionNameString[0]; cutOffPercent = temp.toInt(&ok); if(ok) { colorParams.addConsensusCondition(name, cutOffPercent, residues); } } } condition = condition.nextSiblingElement("condition"); } } int ColorFileXmlParser::parseColorRule(const QDomElement &element, QTreeWidgetItem *parentItem) { int numOfRules = 0; QString colorName; QChar res; QString conditions; QString temp; QDomElement rule = element.firstChildElement("resrule"); while (!rule.isNull()) { // get each colors information!!!!!!! if(rule.hasAttribute("residue") && rule.hasAttribute("colorname") && rule.hasAttribute("conditions")) { colorName = rule.attribute("colorname"); temp = rule.attribute("residue"); conditions = rule.attribute("conditions"); if(colorName.length() > 0 && temp.length() > 0) { res = temp[0]; colorParams.addColorRule(res, colorName, conditions); numOfRules++; } } rule = rule.nextSiblingElement("resrule"); } return numOfRules; }clustalx-2.1/ClustalQtParams.h0000644000175000017500000000431411470725216016251 0ustar ddineenddineen/** * Changes: * * 30-03-07,Nigel Brown(EMBL): renamed variable 'AlignProf1ToProf2WithTree' to * 'AlignProf2ToProf1WithTree' for consistency with clustalx and user * interface menu naming. */ #ifndef CLUSTALQTPARAMS_H #define CLUSTALQTPARAMS_H #include #include #include "clustalW/alignment/Alignment.h" #include static QString psColorFile = "colprot.xml"; static QString psDNAFile = "coldna.xml"; static QString psPrintFile = "colprint.xml"; static const int BlackAndWhite = 1; static const int ColorsBackGround = 2; static const int ColorsLetters = 3; //static const int UserColors = 4; static const int MaxColors = 16; static const int Simple = 0; static const int Compound = 1; static const int Sequences = 0; static const int Profile1 = 1; static const int Profile2 = 2; static const int A4 = 0; static const int A3 = 1; static const int USLETTER = 2; static const int PORTRAIT = 1; static const int LANDSCAPE = 0; enum FileNameDialogType { CompleteAlign, AlignFromTree, RealignSelectedSeqs, RealignSeqRange, DoGuideTreeOnly, AlignProf2ToProf1, AlignSeqsToProf1, AlignProf2ToProf1WithTree, AlignSeqsToProf1WithTree }; enum Viewer { SequenceViewer, Profile1Viewer, Profile2Viewer }; enum AlignType { Sequence, Profile }; struct ColorNamePair { QColor color; QString name; }; struct rangeParams { int firstSeq; int lastSeq; int firstRes; int lastRes; }; struct residueColors { int type; QChar residue; QString colorName; int length; QColor color; QString consensusConditionList; }; struct consensusParams { QChar consensusName; // Name of the rule int cutOffPercent; int length; QString cutOffList; }; struct psPrintParams { int firstSeq; int lastSeq; int nSeqs; clustalw::Alignment* alignPtr; bool showLowScoreRes; clustalw::Array2D lowScoreRes; bool showExRes; clustalw::Array2D exRes; std::vector* histogram; clustalw::Array2D* colorMask; std::vector nameHeader; // Should have 2 elements std::vector seqHeader; // Should have 2 elements bool backGroundColors; }; #endif clustalx-2.1/BootstrapTreeDialog.h0000644000175000017500000000316711470725216017113 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. Also removed * setAllPtrsToNull function. */ #ifndef BOOTSTRAPTREEDIALOG_H #define BOOTSTRAPTREEDIALOG_H #include #include #include "ClustalQtParams.h" class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class QVBoxLayout; class QGridLayout; class QLineEdit; class BootstrapTreeDialog : public QDialog { Q_OBJECT public: BootstrapTreeDialog(QString seqFileName); clustalw::TreeNames getNames(); protected: private slots: void accept(); void cancel(); void browseClustal(); void browseNexus(); void browsePhylip(); private: void browse(QLineEdit* lineEditName); void setUpLayout(); void setAllPtrsToNull(); QString getPathFromFileName(QString fileName); QVBoxLayout* mainLayout; QGridLayout* fileLayout; QPushButton* okButton; QPushButton* bootstrapCancelButton; QPushButton* clustalTreeBrowseButton; QPushButton* phylipTreeBrowseButton; QPushButton* nexusTreeBrowseButton; QGroupBox* fileBox; QLabel* randomNumberGenSeedLabel; QLabel* numBootstrapTrialsLabel; QLabel* clustalTreeNameLabel; QLabel* phylipTreeNameLabel; QLabel* nexusTreeNameLabel; QLineEdit* randomNumberGenSeed; QLineEdit* numBootstrapTrials; QLineEdit* clustalTreeName; QLineEdit* phylipTreeName; QLineEdit* nexusTreeName; QString title; QString filePath; auto_ptr treeFileNames; static const int MinLineEditWidth = 250; }; #endif clustalx-2.1/BootstrapTreeDialog.cpp0000644000175000017500000001677211470725216017454 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. Also removed * setAllPtrsToNull function. */ #include #include #include #include #include #include #include #include #include #include #include "BootstrapTreeDialog.h" #include "clustalW/general/userparams.h" #include "clustalW/general/utils.h" #include "ClustalQtParams.h" #include "FileDialog.h" BootstrapTreeDialog::BootstrapTreeDialog(QString seqFileName) { setAllPtrsToNull(); title = "Draw Tree"; setWindowTitle(title); filePath = getPathFromFileName(seqFileName); mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); bootstrapCancelButton = new QPushButton(tr("Cancel")); bootstrapCancelButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(bootstrapCancelButton, SIGNAL(clicked()), this, SLOT(cancel())); fileBox = new QGroupBox(); setUpLayout(); mainLayout->addWidget(fileBox); setLayout(mainLayout); } QString BootstrapTreeDialog::getPathFromFileName(QString fileName) { QString filePath; if(fileName.size() > 0) { std::string path = ""; clustalw::utilityObject->getPath(fileName.toStdString(), &path); if(path.size() > 0) { filePath = QString(path.c_str()); } else { filePath = QDir::currentPath() + QString(QDir::separator()) + "temp"; } } return filePath; } void BootstrapTreeDialog::setUpLayout() { int row = 0; fileLayout = new QGridLayout; QString numStr; int num; randomNumberGenSeedLabel = new QLabel(tr("Random number generator seed[1-1000]: ")); randomNumberGenSeed = new QLineEdit(); num = clustalw::userParameters->getBootRanSeed(); numStr.setNum(num); randomNumberGenSeed->setText(numStr); fileLayout->addWidget(randomNumberGenSeedLabel, row, 0); fileLayout->addWidget(randomNumberGenSeed, row, 1); row++; numBootstrapTrialsLabel = new QLabel(tr("Number of bootstrap trials[1-10000]: ")); numBootstrapTrials = new QLineEdit(); num = clustalw::userParameters->getBootNumTrials(); numStr.setNum(num); numBootstrapTrials->setText(numStr); fileLayout->addWidget(numBootstrapTrialsLabel, row, 0); fileLayout->addWidget(numBootstrapTrials, row, 1); row++; if(clustalw::userParameters->getOutputTreeClustal()) { clustalTreeNameLabel = new QLabel(tr("Save Clustal tree as: ")); clustalTreeName = new QLineEdit(); clustalTreeName->setText(filePath + "njb"); clustalTreeName->setMinimumWidth(MinLineEditWidth); clustalTreeBrowseButton = new QPushButton("Browse"); connect(clustalTreeBrowseButton, SIGNAL(clicked()), this, SLOT(browseClustal())); fileLayout->addWidget(clustalTreeNameLabel, row, 0); fileLayout->addWidget(clustalTreeName, row, 1); fileLayout->addWidget(clustalTreeBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputTreePhylip()) { phylipTreeNameLabel = new QLabel(tr("Save Phylip tree as: ")); phylipTreeName = new QLineEdit(); phylipTreeName->setText(filePath + "phb"); phylipTreeName->setMinimumWidth(MinLineEditWidth); phylipTreeBrowseButton = new QPushButton("Browse"); connect(phylipTreeBrowseButton, SIGNAL(clicked()), this, SLOT(browsePhylip())); fileLayout->addWidget(phylipTreeNameLabel, row, 0); fileLayout->addWidget(phylipTreeName, row, 1); fileLayout->addWidget(phylipTreeBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputTreeNexus()) { nexusTreeNameLabel = new QLabel(tr("Save Nexus tree as: ")); nexusTreeName = new QLineEdit(); nexusTreeName->setText(filePath + "treb"); nexusTreeName->setMinimumWidth(MinLineEditWidth); nexusTreeBrowseButton = new QPushButton("Browse"); connect(nexusTreeBrowseButton, SIGNAL(clicked()), this, SLOT(browseNexus())); fileLayout->addWidget(nexusTreeNameLabel, row, 0); fileLayout->addWidget(nexusTreeName, row, 1); fileLayout->addWidget(nexusTreeBrowseButton, row, 2); row++; } fileLayout->addWidget(okButton, row, 0); fileLayout->addWidget(bootstrapCancelButton, row, 1); fileBox->setLayout(fileLayout); } void BootstrapTreeDialog::browseClustal() { browse(clustalTreeName); } void BootstrapTreeDialog::browseNexus() { browse(nexusTreeName); } void BootstrapTreeDialog::browsePhylip() { browse(phylipTreeName); } void BootstrapTreeDialog::accept() { bool ok; int num; QString randomNumSeed = randomNumberGenSeed->text(); num = randomNumSeed.toInt(&ok); if(ok) { if(num >= 1 && num <= 1000) { clustalw::userParameters->setBootRanSeed(num); } } QString numTrials = numBootstrapTrials->text(); num = numTrials.toInt(&ok); if(ok) { if(num >= 1 && num <= 10000) { clustalw::userParameters->setBootNumTrials(num); } } treeFileNames.reset(new clustalw::TreeNames); if(clustalTreeName != 0) { if(clustalTreeName->text() == "") { treeFileNames->clustalName = filePath.toStdString() + "njb"; } else { QString file = clustalTreeName->text(); treeFileNames->clustalName = file.toStdString(); } } if(phylipTreeName != 0) { if(phylipTreeName->text() == "") { treeFileNames->phylipName = filePath.toStdString() + "phb"; } else { QString file = phylipTreeName->text(); treeFileNames->phylipName = file.toStdString(); } } if(nexusTreeName != 0) { if(nexusTreeName->text() == "") { treeFileNames->nexusName = filePath.toStdString() + "treb"; } else { QString file = nexusTreeName->text(); treeFileNames->nexusName = file.toStdString(); } } setResult(QDialog::Accepted); hide(); } void BootstrapTreeDialog::cancel() { setResult(QDialog::Rejected); hide(); } void BootstrapTreeDialog::browse(QLineEdit* lineEditName) { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getSaveFileName(this, tr("Save as")); if(myFile != "") { lineEditName->setText(myFile); } } clustalw::TreeNames BootstrapTreeDialog::getNames() { return *(treeFileNames.get()); } void BootstrapTreeDialog::setAllPtrsToNull() { nexusTreeBrowseButton = 0; nexusTreeName = 0; nexusTreeNameLabel = 0; phylipTreeBrowseButton = 0; phylipTreeName = 0; phylipTreeNameLabel = 0; clustalTreeBrowseButton = 0; clustalTreeName = 0; clustalTreeNameLabel = 0; numBootstrapTrials = 0; numBootstrapTrialsLabel = 0; randomNumberGenSeed = 0; randomNumberGenSeedLabel = 0; bootstrapCancelButton = 0; okButton = 0; fileLayout = 0; fileBox = 0; mainLayout = 0; } clustalx-2.1/AlignmentWidget.h0000644000175000017500000001351711470725216016260 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * * Changes: * * 16-01-07,Nigel Brown(EMBL): added drawContents() called by paintEvent(). * * 20-02-07,Nigel Brown(EMBL): constructor and changeBoxSizeAndFontSize() * parameters reordered. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. * * 25-04-07,Nigel Brown(EMBL): Removed keyPressEvent(), keyReleaseEvent(), * shiftPressed. Keyboard events handled by KeyController singleton. Fixed * problem in shift-selection of columns which did not extend selection * backwards by replacing with new procedure selectColumns(). * * 04-05-07,Nigel Brown(EMBL): moved getFirstSelectedCol() and * getSecondSelectedCol() into AlignmentWidget.cpp. * * 09-05-07,Nigel Brown(EMBL): Added 'mousePressed', mouseMoveEvent() * and mouseReleaseEvent(); Changed 'rowSelected' to 'firstRow' and * added 'secondRow' store row range information. * * 09-05-07,Nigel Brown(EMBL): Added 'scrollArea' and registerScrollArea() * allowing the instance to know its own containing QScrollArea (This is not * the same as parentWidget()). ****************************************************************************/ #ifndef ALIGNMENTWIDGET_H #define ALIGNMENTWIDGET_H #include #include #include #include #include #include #include #include #include "clustalW/alignment/Alignment.h" #include "ColorParameters.h" #include #include class QMouseEvent; class QPaintEvent; class QMessageBox; class QKeyEvent; class QScrollArea; //- nige /** * The AlignmentWidget is used to display a multiple alignment. It takes sequences * in the form of QString objects. The conservation information is displayed, * if it is available. */ class AlignmentWidget : public QWidget { Q_OBJECT public: AlignmentWidget(int boxWidth, int boxSize, QFont* font, QWidget *parent = 0); QSize sizeHint() const; void changeBoxSizeAndFontSize(int _boxWidth, int _boxSize, QFont* fontPtr); void updateSizeHint(); void setAlignment(const clustalw::SeqArray* align, clustalw::Alignment* _alignObjPtr); void updateDisplay(int firstSeq, int lastSeq, clustalw::Array2D* exRes, int nhead, QString secStruct); void updateLowScoreSeg(); void showExceptionalResidues(bool show); bool getShowExceptionalResidues(){return showExceptionalRes;} void showLowScoreSeg(bool show, clustalw::Array2D* _lowScoreRes); bool getShowLowScoreSeg(){return showLowScoreRes;} void setColorScheme(ColorParameters* colors); void setPrintoutColorScheme(ColorParameters colors); void setBlackAndWhite(); void setBackGroundColoring(bool backgroundColor); bool getBackGroundColor(){return backGroundColor;} void clearRangeSelection(); QString getConservationInfo(){return QString(conservationInfo.c_str());} QString getSecStructInfo(){return secStructInfo;} clustalw::Array2D getPrintoutColorMask(); ColorParameters* getPtrToCurrentColorScheme(){return currentColorScheme;} int getFirstSelectedCol(); int getSecondSelectedCol(); void registerScrollArea(QScrollArea *s) { scrollArea = s; }; //- nige public slots: signals: void characterSelected(const QString &character); protected: void mousePressEvent(QMouseEvent *event); void mouseMoveEvent(QMouseEvent *event); void mouseReleaseEvent(QMouseEvent *event); void paintEvent(QPaintEvent *event); void selectColumns(int column); //nige void drawContents(QPainter *p, int cx, int cy, int cw, int ch); //nige private: char getAminoAcidCode(int i); void setUpWeakAndStrongGroups(); void calcConserveInfo(); void calculatePixMaps(); void makeColorMask(clustalw::Array2D* colMask, ColorParameters* colorScheme); void makeColorMask(clustalw::Array2D* colMask, ColorParameters* colorScheme); void generatePixMaps(); void generateSecStructurePixmaps(); QString initConsensus(); int getExtraRowNum(); int firstSeq, lastSeq; QFont displayFont; int firstColumnSelected; int secondColumn; int boxSize; int boxWidth; int pixelsize; int alignLength; int numSequencesSoFar; int offSetFromTop; int displayStartPos; QPixmap defaultPixMap; QPixmap defaultGreyPixMap; QString conserveInfo; QColor *selectedColor; // This will stay here!!!!! Others will go!!! QColor *lightGrey; QColor *slateGrey; QColor *backGroundGrey; bool conserveDisplay; bool mousePressed; const clustalw::SeqArray* alignment; clustalw::Alignment* alignObjPtr; bool alignmentSet; bool showAlignment; std::vector strongGroup; std::vector weakGroup; std::string conservationInfo; clustalw::Array2D* exceptionalRes; bool exResLoaded; bool showExceptionalRes; std::string conservationChars; bool lowScoreResLoaded; bool showLowScoreRes; int colorScheme; clustalw::Array2D* lowScoreRes; clustalw::Array2D* colorAlignment; clustalw::Array2D colorMask; clustalw::Array2D colMask; QColor defaultColor; clustalw::Array2D coloredLetters; std::vector* selectedColoredRes; std::vector* lowScoreColoredRes; std::vector* exceptionalColoredRes; std::vector* conservationPixmaps; std::vector* secStructPixmaps; // Color scheme stuff!!! ColorParameters* currentColorScheme; ColorParameters printoutColorScheme; bool blackAndWhite; bool backGroundColor; int secStruct; QString secStructInfo; int defaultIndex; QScrollArea *scrollArea; //-nige }; #endif clustalx-2.1/AlignmentWidget.cpp0000644000175000017500000011164011470725216016607 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * The AlignmentWidget is used to display a multiple alignment. It takes sequences * in the form of QString objects. The conservation information is displayed, * if it is available. * * Changes: * * 18-12-06,Nigel Brown(EMBL): calcConserveInfo(): removed decrement by 2 * of maxAA, since this is already done by the owner class; simplified a * few l-value expressions. * * 16-01-07,Nigel Brown(EMBL): added drawContents() called by paintEvent(). * * 31-01-07,Nigel Brown(EMBL): changed && to || in changeBoxSizeAndFontSize(). * * 03-02-07,Nigel Brown(EMBL): changed pencolor to white for * lowScoreColoredRes and exceptionalColoredRes in generatePixMaps() * in keeping with 'clustalX'. * * 06-02-07,Nigel Brown(EMBL): darker low scoring and exceptional residues for * Mac. * * 20-02-07,Nigel Brown(EMBL): constructor and changeBoxSizeAndFontSize() * parameters reordered. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. * * 25-04-07,Nigel Brown(EMBL): Removed keyPressEvent(), keyReleaseEvent(), * shiftPressed. Keyboard events handled by KeyController singleton. Fixed * problem in shift-selection of columns which did not extend selection * backwards by replacing with new procedure selectColumns(). * * 04-05-07,Nigel Brown(EMBL): moved getFirstSelectedCol() and * getSecondSelectedCol() in from include file; modified them to ensure they * return low-to-high ordered values, regardless of the actual mouse selection * ordering. * * 09-05-07,Nigel Brown(EMBL): added ensureVisible() call on new * 'scrollArea' member inside mouseMoveEvent() to scroll when column * range selection extends beyond visible. * * 2007-12-03, Andreas Wilm (UCD): removed nigels comments from * include line to avoid compiler warnings * ****************************************************************************/ #include #include "AlignmentWidget.h" #include "clustalW/general/userparams.h" #include "ClustalQtParams.h" #include "KeyController.h" // KeyController.h - nige //AW not needed anymore#include "compile.h" //#include //using namespace std; /** * This is the constructor for an AlignmentWidget. * @param num The number of sequences to be displayed in the AlignmentWidget. * @param length The length of the alignment. * @param boxsize The size of the box to be used in displaying one residue. * @param *parent The parent widget of this widget, an optional parameter. */ AlignmentWidget::AlignmentWidget(int boxwidth, int boxsize, QFont* font, QWidget *parent) : QWidget(parent), blackAndWhite(false), selectedColoredRes(0), lowScoreColoredRes(0), exceptionalColoredRes(0), conservationPixmaps(0), secStructPixmaps(0), backGroundColor(true), secStruct(0), defaultIndex(0) { setEnabled(true); setFocusPolicy(Qt::StrongFocus); defaultPixMap.fill(Qt::white); exResLoaded = false; showExceptionalRes = false; lowScoreResLoaded = false; showLowScoreRes = false; defaultColor = QColor(Qt::white); secStructInfo = ""; alignment = 0; firstSeq = 0; lastSeq = 0; exceptionalRes = 0; lowScoreRes = 0; colorAlignment = 0; /* * Set up initial values */ firstColumnSelected = -1; secondColumn = -1; setMouseTracking(true); boxSize = boxsize; boxWidth = boxwidth; defaultPixMap = QPixmap(boxWidth, boxSize); defaultPixMap.fill(); offSetFromTop = 2; displayStartPos = 0; conserveDisplay = false; mousePressed = false; alignmentSet = false; showAlignment = false; displayFont = *font; alignLength = 0; numSequencesSoFar = 0; /* * Set up the colors to be used in displaying the alignment */ selectedColor = new QColor; lightGrey = new QColor; slateGrey = new QColor; backGroundGrey = new QColor; selectedColor->setRgb(197, 197, 226); #if OS_MAC lightGrey->setRgb(112, 112, 112); slateGrey->setRgb(60, 60, 60); #else lightGrey->setRgb(211, 211, 211); slateGrey->setRgb(112, 128, 144); #endif backGroundGrey->setRgb(221, 221, 221); setUpWeakAndStrongGroups(); conservationChars = " .:*"; } void AlignmentWidget::setAlignment(const clustalw::SeqArray* align, clustalw::Alignment* _alignObjPtr) { alignment = align; alignObjPtr = _alignObjPtr; alignmentSet = true; firstSeq = 0; lastSeq = 0; numSequencesSoFar = 0; colorAlignment = new clustalw::Array2D; selectedColoredRes = new std::vector; lowScoreColoredRes = new std::vector; exceptionalColoredRes = new std::vector; conservationPixmaps = new std::vector; secStructPixmaps = new std::vector; calcConserveInfo(); updateSizeHint(); } void AlignmentWidget::updateDisplay(int fSeq, int lSeq, clustalw::Array2D* exRes, int nhead, QString secStructString) { if(fSeq != 0 && lSeq != 0) { lowScoreResLoaded = false; showLowScoreRes = false; alignmentSet = true; showAlignment = true; exResLoaded = true; firstSeq = fSeq; lastSeq = lSeq; numSequencesSoFar = lastSeq - firstSeq + 1; secStruct = nhead; secStructInfo = secStructString; updateSizeHint(); calcConserveInfo(); exceptionalRes = exRes; calculatePixMaps(); firstColumnSelected = -1; // dont show previous selection secondColumn = -1; } else { firstSeq = 0; lastSeq = 0; alignmentSet = false; showAlignment = false; lowScoreResLoaded = false; showLowScoreRes = false; exResLoaded = false; secStruct = 0; secStructInfo = ""; colorAlignment->clearArray(); //selectedColoredRes->clear(); numSequencesSoFar = 0; firstColumnSelected = -1; secondColumn = -1; updateSizeHint(); } update(); } void AlignmentWidget::calculatePixMaps() { int lengthLongestSeq = alignObjPtr->getLengthLongestSequence(firstSeq,lastSeq); std::map::iterator colorIterator; QFontMetrics fontMetrics(displayFont); colorAlignment->clearArray(); colorAlignment->ResizeRect(numSequencesSoFar + getExtraRowNum(), lengthLongestSeq + 1); makeColorMask(&colMask, currentColorScheme); int extraRow = getExtraRowNum(); int currentSeq; char currentChar; int codeForAA; int colorRow; int codeForSpace = clustalw::userParameters->resIndex( clustalw::userParameters->getAminoAcidCodes(), '-'); if(secStruct == 1) { generateSecStructurePixmaps(); } for(int i = 0; i < colorAlignment->getRowSize(); i++) { currentSeq = firstSeq + i - extraRow; for(unsigned int j = 0; j < lengthLongestSeq; j++) { (*colorAlignment)[i][j] = &defaultPixMap; if(i == 0) { // Conservation information. currentChar = conservationInfo[j]; int code = clustalw::userParameters->resIndex(conservationChars, currentChar); if(code >= 0 && code < conservationPixmaps->size()) { (*colorAlignment)[i][j] = &(*conservationPixmaps)[code]; } else { (*colorAlignment)[i][j] = &defaultPixMap; } } else if(i == 1 && secStruct == 1) { (*colorAlignment)[i][j] = &(*secStructPixmaps)[j]; } else { if((*alignment)[currentSeq].size() > j + 1) { codeForAA = (*alignment)[currentSeq][j + 1]; if(codeForAA == 31 || codeForAA == 30) { char res = '-'; codeForAA = clustalw::userParameters->resIndex( clustalw::userParameters->getAminoAcidCodes(), res); } } else { codeForAA = -1; } if(lowScoreResLoaded && showLowScoreRes && (*lowScoreRes)[currentSeq - firstSeq][j] == 1) { if(codeForAA >=0 && codeForAA < lowScoreColoredRes->size()) { (*colorAlignment)[i][j] = &(*lowScoreColoredRes)[codeForAA]; } else { (*colorAlignment)[i][j] = &defaultPixMap; } } else if(showExceptionalRes && exResLoaded && (*exceptionalRes)[currentSeq - firstSeq][j] == 1) { if(codeForAA >=0 && codeForAA < exceptionalColoredRes->size()) { (*colorAlignment)[i][j] = &(*exceptionalColoredRes)[codeForAA]; } else { (*colorAlignment)[i][j] = &defaultPixMap; } } else { if(colMask.getRowSize() > (i - extraRow) && colMask.getColSize() > j) { if(blackAndWhite) { colorRow = 0; } else { colorRow = colMask[i - extraRow][j]; } } else { colorRow = 0; // default } if(colorRow >= 0 && colorRow < coloredLetters.getRowSize() && codeForAA >=0 && codeForAA < coloredLetters.getColSize()) { (*colorAlignment)[i][j] = &coloredLetters[colorRow][codeForAA]; } else { (*colorAlignment)[i][j] = &defaultPixMap; } } } } } update(); } void AlignmentWidget::generateSecStructurePixmaps() { QFontMetrics fontMetrics(displayFont); int lengthLongestSeq = alignObjPtr->getLengthLongestSequence(firstSeq,lastSeq); secStructPixmaps->clear(); secStructPixmaps->resize(lengthLongestSeq + 1); defaultGreyPixMap = QPixmap(boxWidth, boxSize); defaultGreyPixMap.fill(backGroundGrey->rgb()); for(int i = 0; i < secStructInfo.size(); i++) { if(i < secStructPixmaps->size() && i < secStructInfo.size()) { displayFont.setBold(true); // secondary structure info (*secStructPixmaps)[i] = defaultGreyPixMap; (*secStructPixmaps)[i].fill(backGroundGrey->rgb()); QPainter paintChar(&(*secStructPixmaps)[i]); paintChar.setFont(displayFont); paintChar.drawText((boxWidth / 2) - fontMetrics.width(secStructInfo[i])/2, offSetFromTop + fontMetrics.ascent(), QString(secStructInfo[i])); } } } void AlignmentWidget::showLowScoreSeg(bool show, clustalw::Array2D* _lowScoreRes) { int numSeqs = lastSeq - firstSeq + 1; if(_lowScoreRes != 0) { if((_lowScoreRes->getRowSize() == numSeqs + 1) && (_lowScoreRes->getColSize() == alignObjPtr->getLengthLongestSequence(firstSeq, lastSeq) + 1)) { lowScoreRes = _lowScoreRes; lowScoreResLoaded = true; showLowScoreRes = show; } } else { lowScoreResLoaded = false; showLowScoreRes = false; } calculatePixMaps(); update(); } /** * The function sizeHint is overloaded from the QWidget class. It gives a recomended size for * the widget when resizing is performed. */ QSize AlignmentWidget::sizeHint() const { int extraRow; if(secStruct == 1) { extraRow = 2; } else { extraRow = 1; } return QSize(alignLength * boxWidth, (numSequencesSoFar + extraRow) * boxSize); } void AlignmentWidget::showExceptionalResidues(bool show) { showExceptionalRes = show; calculatePixMaps(); update(); } /** * This function is an event handler. It finds the column in the alignment * that has been clicked. * @param *event A mouse event. */ void AlignmentWidget::mousePressEvent(QMouseEvent *event) //nige { KeyController *kbd = KeyController::Instance(); if (event->button() == Qt::LeftButton) { mousePressed = true; int column = (event->x() / boxWidth); if (kbd->shiftPressed()) { selectColumns(column); } else { firstColumnSelected = column; secondColumn = -1; } update(); //nige return; } QWidget::mousePressEvent(event); } /** * This function is an event handler. It finds the column in the alignment that the mouse * is at when the mouse button has been released. * @param *event A mouse event. */ void AlignmentWidget::mouseReleaseEvent(QMouseEvent *event) //nige { if (event->button() == Qt::LeftButton) { mousePressed = false; update(); return; } QWidget::mouseReleaseEvent(event); } void AlignmentWidget::mouseMoveEvent(QMouseEvent *event) { if (mousePressed == true) { //scroll if necessary to continue selection scrollArea->ensureVisible(event->x(), event->y()); int column = (event->x() / boxWidth); secondColumn = column; update(); return; } QWidget::mouseMoveEvent(event); } void AlignmentWidget::selectColumns(int column) { //cout << "setCols(IN " << firstColumnSelected << "," << secondColumn << ")\n"; bool growright = true; if (secondColumn == -1) { if (column < firstColumnSelected) growright = false; secondColumn = firstColumnSelected; } else if (firstColumnSelected > secondColumn) { growright = false; } if (growright) { if (column < firstColumnSelected) { //reverse direction // c < f <= s ---> f<=>s then c < f firstColumnSelected = secondColumn; secondColumn = column; } else if (column > secondColumn) { // f <= s < c ---> f < s secondColumn = column; } else { // f (c) s ---> f < c secondColumn = column; } } else {//growleft if (column < secondColumn) { // c < s < f ---> c < f secondColumn = column; } else if (column > firstColumnSelected) { //reverse direction // s < f < c ---> f<=>s then f < c firstColumnSelected = secondColumn; secondColumn = column; } else { // s (c) f ---> c < f secondColumn = column; } } //cout << "setCols(OUT " << firstColumnSelected << "," << secondColumn << ")\n"; } /** * This function will be called every time sequences are changed. So for example after * an alignment, after we add more sequences etc */ void AlignmentWidget::updateSizeHint() { int lengthLongest = alignObjPtr->getLengthLongestSequence(firstSeq, lastSeq); int extraRow = getExtraRowNum(); resize(lengthLongest * boxWidth, (numSequencesSoFar + extraRow) * boxSize); } /** * This function is an event handler. It repaints the widget whenever it needs * to be repainted. It is called automatically. * It also calculates the number of columns showing and colors in the alignment. * @param *event A resize event. */ void AlignmentWidget::paintEvent(QPaintEvent *event) { QPainter painter(this); QRect rect = event->rect(); int cx = rect.x(); int cy = rect.y(); int cw = rect.width(); int ch = rect.height(); //(void) printf("paintEvent(%d,%d, %d,%d)\n", cx,cy, cw,ch); drawContents(&painter, cx, cy, cw, ch); } void AlignmentWidget::drawContents(QPainter *painter, int cx, int cy, int cw, int ch) { painter->fillRect(cx, cy, cw, ch, QBrush(Qt::green));//white)); painter->setBackgroundMode(Qt::OpaqueMode); // Set up the begin and end columns and rows. int beginRow = cy / boxSize; int beginColumn = cx / boxWidth; int endRow = (cy+ch-1) / boxSize; int endColumn = (cx+cw-1) / boxWidth; int selectedRes; conserveDisplay = true; int seqLength = 0; int extra = getExtraRowNum(); //(void) printf("drawContents(x=%d,y=%d, w=%d,h=%d)(bw=%d,bh=%d) -> [br=%d,er=%d, bc=%d,ec=%d]\n", //cx,cy, cw,ch, boxWidth, boxSize, beginRow,endRow, beginColumn,endColumn); if(firstSeq != 0 && alignmentSet && showAlignment) { for (int row = beginRow; row <= endRow; ++row) { for (int column = beginColumn; column <= endColumn; ++column) { //printf("draw[0](%d,%d)\n", row, column); if(firstSeq + row - extra >= 1) { seqLength = alignObjPtr->getSeqLength(firstSeq + row - extra); } else { seqLength = 0; } if (row > (extra - 1) && column < seqLength && (((column >= firstColumnSelected) && (column <= secondColumn)) || ((column >= secondColumn) && (column <= firstColumnSelected) && secondColumn != -1))) // Selected ???? { selectedRes = (*alignment)[firstSeq + row - extra][column + 1]; if(selectedRes >= selectedColoredRes->size()) { selectedRes = selectedColoredRes->size() - 1; // '-' } if(selectedRes >= 0 && !(*selectedColoredRes)[selectedRes].isNull()) { //printf("draw[1](%d,%d)\n", row, column); painter->drawPixmap((column) * boxWidth, row * boxSize, (*selectedColoredRes)[selectedRes]); } } else { // NOTE we need to have this check here. It was causing problems // when removing gaps sometimes. Seems to be ok now!!! if(row < colorAlignment->getRowSize() && row >= 0 && column < colorAlignment->getColSize() && column >= 0) { //printf("draw[2](%d,%d) [%d,%d]\n", row, column, // colorAlignment->getRowSize(), // colorAlignment->getColSize() // ); if(!(*colorAlignment)[row][column]->isNull()) { //printf("draw[3](%d,%d) [%d,%d]\n", row, column, // colorAlignment->getRowSize(), // colorAlignment->getColSize() // ); painter->drawPixmap((column) * boxWidth, row * boxSize,*(*colorAlignment)[row][column]); } } } } } } } void AlignmentWidget::changeBoxSizeAndFontSize(int _boxWidth, int _boxSize, QFont* fontPtr) { if (boxWidth != _boxWidth || boxSize != _boxSize) //nige: changed && to || { boxWidth = _boxWidth; boxSize = _boxSize; displayFont = *fontPtr; int lengthLongest = alignObjPtr->getLengthLongestSequence(firstSeq, lastSeq); resize(lengthLongest * boxWidth, (numSequencesSoFar + 1) * boxSize); defaultPixMap = QPixmap(boxWidth, boxSize); defaultPixMap.fill(Qt::white); generatePixMaps(); calculatePixMaps(); repaint(); } } /** * This is a wrapper for the function in the clustalw code. */ char AlignmentWidget::getAminoAcidCode(int i) { if(i < 0 || i >= clustalw::userParameters->getMaxAA()) { return '-'; } return clustalw::userParameters->getAminoAcidCode(i); } void AlignmentWidget::setUpWeakAndStrongGroups() { strongGroup.resize(9); strongGroup[0] = string("STA"); strongGroup[1] = string("NEQK"); strongGroup[2] = string("NHQK"); strongGroup[3] = string("NDEQ"); strongGroup[4] = string("QHRK"); strongGroup[5] = string("MILV"); strongGroup[6] = string("MILF"); strongGroup[7] = string("HY"); strongGroup[8] = string("FYW"); weakGroup.resize(11); weakGroup[0] = string("CSA"); weakGroup[1] = string("ATV"); weakGroup[2] = string("SAG"); weakGroup[3] = string("STNK"); weakGroup[4] = string("STPA"); weakGroup[5] = string("SGND"); weakGroup[6] = string("SNDEQK"); weakGroup[7] = string("NDEQHK"); weakGroup[8] = string("NEQHRK"); weakGroup[9] = string("FVLIM"); weakGroup[10] = string("HFY"); } void AlignmentWidget::calcConserveInfo() { if(firstSeq != 0 && lastSeq != 0) { int catident1[clustalw::NUMRES]; int catident2[clustalw::NUMRES]; int _fSeq, _lSeq; int _fRes, _lRes; _fSeq = firstSeq; _lSeq = lastSeq; _fRes = 1; _lRes = alignObjPtr->getLengthLongestSequence(_fSeq, _lSeq); conservationInfo.clear(); conservationInfo.resize(_lRes); int ident; char c; for(int i = _fRes; i <= _lRes; ++i) { conservationInfo[i - 1] = ' '; ident = 0; for(int j = 1; j <= strongGroup.size(); j++) { catident1[j - 1] = 0; } for(int j = 1; j <= weakGroup.size(); j++) { catident2[j - 1] = 0; } for(int j = _fSeq; j <= _lSeq; ++j) { if((i + _fRes - 1 < (*alignment)[j].size()) && (i + _fRes - 1 < (*alignment)[_fSeq].size())) { /* nige,18-12-06: maxAA already decremented in UserParameters; * introduce some locals for readability */ //if(((*alignment)[_fSeq][i + _fRes - 1] >= 0) && // ((*alignment)[_fSeq][i + _fRes - 1] // <= clustalw::userParameters->getMaxAA() - 2)) // Fowler, my ar*e... introduce some locals int thisRes = (*alignment)[_fSeq][i + _fRes - 1]; int maxAA = clustalw::userParameters->getMaxAA(); if (thisRes >= 0 && thisRes <= maxAA) { // Count how many are identical to the first sequence /* nige,18-12-06 */ //if((*alignment)[_fSeq][i + _fRes - 1] == // (*alignment)[j][i + _fRes - 1]) if(thisRes == (*alignment)[j][i + _fRes - 1]) /* nige,end */ { ++ident; } // Count how many are in the same category. for(int k = 1; k <= strongGroup.size(); k++) { for(int l = 0; (c = strongGroup[k - 1][l]); l++) { int resCode = (*alignment)[j][i + _fRes - 1]; if (getAminoAcidCode(resCode) == c) { catident1[k - 1]++; break; } } } for(int k = 1; k <= weakGroup.size(); k++) { for(int l = 0; (c = weakGroup[k - 1][l]); l++) { int resCode = (*alignment)[j][i + _fRes - 1]; if (getAminoAcidCode(resCode) == c) { catident2[k - 1]++; break; } } } } } } // Now do the conservation part for each block. if(ident == _lSeq - _fSeq + 1) { conservationInfo[i - 1] = '*'; // All residues the same! } else if (!clustalw::userParameters->getDNAFlag()) { for(int k = 1; k <= strongGroup.size(); k++) { if (catident1[k - 1] == _lSeq - _fSeq + 1) { // All residues member of the same category conservationInfo[i - 1] = ':'; break; } } if(conservationInfo[i - 1] == ' ') { for(int k = 1; k <= weakGroup.size(); k++) { if (catident2[k - 1] == _lSeq - _fSeq + 1) { // All residues member of the same category conservationInfo[i - 1] = '.'; break; } } } } } conserveDisplay = true; } } void AlignmentWidget::setColorScheme(ColorParameters* colors) { currentColorScheme = colors; blackAndWhite = false; generatePixMaps(); if(alignmentSet && showAlignment) { calculatePixMaps(); } } void AlignmentWidget::setPrintoutColorScheme(ColorParameters colors) { printoutColorScheme = colors; } void AlignmentWidget::setBlackAndWhite() { blackAndWhite = true; if(alignmentSet && showAlignment) { generatePixMaps(); calculatePixMaps(); } } void AlignmentWidget::setBackGroundColoring(bool _backGroundColor) { backGroundColor = _backGroundColor; if(alignmentSet && showAlignment) { generatePixMaps(); calculatePixMaps(); } } void AlignmentWidget::makeColorMask(clustalw::Array2D* colMask, ColorParameters* colorScheme) { QColor defaultColor = currentColorScheme->getDefaultColor(); int lengthLongest = alignObjPtr->getLengthLongestSequence(firstSeq, lastSeq); colMask->clearArray(); colMask->ResizeRect(numSequencesSoFar, lengthLongest); clustalw::Array2D intColMask; makeColorMask(&intColMask, colorScheme); for(int i = 0; i < colMask->getRowSize(); i++) { for(int j = 0; j < colMask->getColSize(); j++) { if(i < intColMask.getRowSize() && j < intColMask.getColSize()) { (*colMask)[i][j] = colorScheme->getColorFromNum(intColMask[i][j]); } else { (*colMask)[i][j] = defaultColor; } } } } void AlignmentWidget::makeColorMask(clustalw::Array2D* colMask, ColorParameters* colorScheme) { int lengthLongest = alignObjPtr->getLengthLongestSequence(firstSeq, lastSeq); colMask->clearArray(); colMask->ResizeRect(numSequencesSoFar, lengthLongest); for(int i = 0; i < colMask->getRowSize(); i++) { for(int j = 0; j < colMask->getColSize(); j++) { (*colMask)[i][j] = colorScheme->getDefaultIndex(); } } int currentSeq; int code; QString consensus; consensus = initConsensus(); std::string mystring = consensus.toStdString(); QChar alignRes; for(int seq = 0; seq < numSequencesSoFar; seq++) { currentSeq = firstSeq + seq; for(int res = 0; res < lengthLongest; res++) { if(res + 1 < (*alignment)[currentSeq].size() && (lastSeq >= currentSeq)) { code = (*alignment)[currentSeq][res + 1]; alignRes = QChar(getAminoAcidCode(code)); } else { alignRes = QChar('-'); } (*colMask)[seq][res] = colorScheme->getResidueColorNum(alignRes, consensus[res]); if((*colMask)[seq][res] > colorScheme->getNumColors()) { colorScheme->getResidueColorNum(alignRes, consensus[res]); } } } } QString AlignmentWidget::initConsensus() { QString consData; int numRes, seq, res, par, consTotal, i; QChar residue; int code; int numConsParams = currentColorScheme->getNumConsensusParams(); int currentSeq; int lengthLongest = alignObjPtr->getLengthLongestSequence(firstSeq, lastSeq); std::vector* consParams = currentColorScheme->getConPar(); consData.resize(lengthLongest + 1); for(int j = 0; j < consData.size(); j++) { consData[j] = ' '; } for(res = 0; res < lengthLongest; res++) { for(par = 0; par < numConsParams; par++) { consTotal = numRes = 0; for(seq = 0; seq < numSequencesSoFar; seq++) { currentSeq = firstSeq + seq; if(res + 1 < (*alignment)[currentSeq].size() && (lastSeq >= currentSeq)) { code = (*alignment)[currentSeq][res + 1]; residue = QChar(getAminoAcidCode(code)); residue = residue.toLower(); if(residue.isLetter()) { numRes++; } for(i = 0; i < (*consParams)[par].length; i++) { if(residue == (*consParams)[par].cutOffList[i]) { consTotal++; } } } } if(numRes != 0) { if(((consTotal * 100) / numRes) >= (*consParams)[par].cutOffPercent) { consData[res] = (*consParams)[par].consensusName; } } } } return consData; } void AlignmentWidget::clearRangeSelection() { firstColumnSelected = -1; secondColumn = -1; update(); } int AlignmentWidget::getFirstSelectedCol() { if (firstColumnSelected < secondColumn) return firstColumnSelected; else return secondColumn; } int AlignmentWidget::getSecondSelectedCol() { if (firstColumnSelected < secondColumn) return secondColumn; else return firstColumnSelected; } int AlignmentWidget::getExtraRowNum() { int extraRow = 1; if(secStruct == 1) { extraRow = 2; } else { extraRow = 1; } return extraRow; } void AlignmentWidget::generatePixMaps() { QFontMetrics fontMetrics(displayFont); // 0 has the default, colors then, selected = numColors+1, std::string aminoAcid = clustalw::userParameters->getAminoAcidCodes(); int numDiffRes = aminoAcid.size(); //code = userParameters->resIndex(userParameters->getAminoAcidCodes(), search[i]); int numColors = currentColorScheme->getNumColors(); // number of rows = numColors + white int numRows = numColors + 1; defaultIndex = 0; coloredLetters.clearArray(); coloredLetters.ResizeRect(numRows, numDiffRes); QColor currentColor; // For each of the colors, create the PixMaps QColor penColor; QBrush boxesBrush; char currentChar; for(int i = 0; i < numRows; i++) { if(i == 0) { currentColor = currentColorScheme->getColorFromNum(i); penColor = QColor(Qt::black); boxesBrush = QBrush(currentColor.rgb()); } else if(i <= numColors) { currentColor = currentColorScheme->getColorFromNum(i); if(backGroundColor) { penColor = QColor(Qt::black); boxesBrush = QBrush(currentColor.rgb()); } else { penColor = currentColor; boxesBrush = QColor(Qt::white); } } else { currentColor = currentColorScheme->getColorFromNum(0); // default penColor = QColor(Qt::black); boxesBrush = QBrush(currentColor.rgb()); } for(int j = 0; j < aminoAcid.size(); j++) { // get a pen color and a background color!!!!! coloredLetters[i][j] = QPixmap(boxWidth, boxSize); QPainter paintChar(&coloredLetters[i][j]); coloredLetters[i][j].fill(boxesBrush.color()); currentChar = aminoAcid[j]; paintChar.setFont(displayFont); paintChar.setPen(QPen(penColor)); paintChar.drawText((boxWidth / 2) - fontMetrics.width(currentChar)/2, offSetFromTop + fontMetrics.ascent(), QString(QChar(currentChar))); } } QBrush selectedBrush, exceptionalBrush, lowScoreBrush; selectedBrush = QBrush(selectedColor->rgb()); lowScoreBrush = QBrush(slateGrey->rgb()); exceptionalBrush = QBrush(lightGrey->rgb()); int length = aminoAcid.size(); selectedColoredRes->clear(); selectedColoredRes->resize(length); lowScoreColoredRes->clear(); lowScoreColoredRes->resize(length); exceptionalColoredRes->clear(); exceptionalColoredRes->resize(length); for(int i = 0; i < length; i++) { currentChar = aminoAcid[i]; (*selectedColoredRes)[i] = QPixmap(boxWidth, boxSize); (*selectedColoredRes)[i].fill(selectedBrush.color()); QPainter paintChar(&(*selectedColoredRes)[i]); paintChar.setFont(displayFont); paintChar.drawText((boxWidth / 2) - fontMetrics.width(currentChar)/2, offSetFromTop + fontMetrics.ascent(), QString(QChar(currentChar))); (*lowScoreColoredRes)[i] = QPixmap(boxWidth, boxSize); (*lowScoreColoredRes)[i].fill(lowScoreBrush.color()); QPainter paintChar2(&(*lowScoreColoredRes)[i]); paintChar2.setFont(displayFont); paintChar2.setPen(QPen(QColor(Qt::white))); //nige paintChar2.drawText((boxWidth / 2) - fontMetrics.width(currentChar)/2, offSetFromTop + fontMetrics.ascent(), QString(QChar(currentChar))); (*exceptionalColoredRes)[i] = QPixmap(boxWidth, boxSize); (*exceptionalColoredRes)[i].fill(exceptionalBrush.color()); QPainter paintChar3(&(*exceptionalColoredRes)[i]); paintChar3.setFont(displayFont); paintChar3.setPen(QPen(QColor(Qt::white))); //nige paintChar3.drawText((boxWidth / 2) - fontMetrics.width(currentChar)/2, offSetFromTop + fontMetrics.ascent(), QString(QChar(currentChar))); } QBrush conservationBrush; conservationBrush = QBrush(backGroundGrey->rgb()); int numConserveChars = conservationChars.size(); conservationPixmaps->resize(numConserveChars); displayFont.setBold(true); for(int i = 0; i < numConserveChars; i++) { (*conservationPixmaps)[i] = QPixmap(boxWidth, boxSize); (*conservationPixmaps)[i].fill(conservationBrush.color()); QPainter paintChar(&(*conservationPixmaps)[i]); currentChar = conservationChars[i]; paintChar.setFont(displayFont); paintChar.drawText((boxWidth / 2) - fontMetrics.width(currentChar)/2, offSetFromTop + fontMetrics.ascent(), QString(QChar(currentChar))); } } clustalw::Array2D AlignmentWidget::getPrintoutColorMask() { clustalw::Array2D printoutColorMask; makeColorMask(&printoutColorMask, &printoutColorScheme); return printoutColorMask; } clustalx-2.1/AlignmentViewerWidget.h0000644000175000017500000001157311470725216017442 0ustar ddineenddineen/** * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie * This widget contains an AlignmentWidget, HistogramWidget and a SeqNamesWidget. * This is the main widget for displaying the alignment * * Changes: * * 16-01-07,Nigel Brown(EMBL): Added Q_MOS_MAC preprocessor checks for Mac OS * X port, which uses AlignmentWidgetScroll in place of AlignmentWidget, and * HistogramWidgetScroll in place of HistogramWidget. This is to work around a * scrolling limitation in Qt4 QScrollArea. THESE CHANGES SHOULD BE REMOVED * WHEN QT4 QSCROLLAREA IS FIXED! * * Mark: Jan 16th 2007 * I have added an access function hasSeqNameWidgetFocus(). * * 31-01-07,Nigel Brown(EMBL): added defaultFontSize member. * * 12-4-07, Mark Larkin : Changed destructor. We dont need to delete QObjects. * * 04-05-07,Nigel Brown(EMBL): renamed hasSeqNameWidgetFocus() to hasFocus(). * * 27-06-07,Nigel Brown(EMBL): For screen layout balancing: added * recvNumSequencesChanged() and balanceNumSequences() slots, and * numSequencesChanged() signal. * * 20-7-07, Mark, I added in the resizeEvent and moveNamesScrollBarToRight function. ****************************************************************************/ #ifndef ALIGNMENTVIEWERWIDGET_H #define ALIGNMENTVIEWERWIDGET_H //AW not needed anymore #include "compile.h" //nige #include #include #include "clustalW/alignment/Alignment.h" #include "AlignmentWidget.h" #include "HistogramWidget.h" #include "SeqNameWidget.h" #include "ColorParameters.h" #include "PostscriptFileParams.h" class QFont; class QScrollArea; class QScrollBar; class QGroupBox; class QGridLayout; class QVBoxLayout; /** * This class is an extension of the widget QMainWindow. It contains an * alignment widget, a histogram widget and a sequence name widget. * It is currently used as an alignment viewer, but it will be possible to extend its * functionality to an alignment editor. */ class AlignmentViewerWidget : public QWidget { Q_OBJECT public: AlignmentViewerWidget(QWidget *parent = 0); ~AlignmentViewerWidget(); void setAlignmentView(clustalw::Alignment* alignPtr, int fSeq, int lSeq); void updateView(int fSeq, int lSeq); void updateView(); void updateHistogram(int fSeq, int lSeq); void showExceptionalRes(bool show); void showLowScoringSegments(bool show, clustalw::Array2D* _lowScoreRes); int getFirstSeq(){return firstSeq;} int getLastSeq(){return lastSeq;} int getLengthLongestSeqInRange(); QScrollBar* getAlignmentScrollBar(); void setColorScheme(ColorParameters* colors); void setPrintoutColorScheme(ColorParameters colors); void setBlackAndWhite(); void setBackGroundColoring(bool backgroundColor); void clearSequenceSelection(); void clearRangeSelection(); void selectAllSequences(); bool outputColumnScores(); void saveSequences(QString inputFileName, int type); int getNumberOfSeqsSelected(); const std::vector* getSelectedRows(); void setProfileNum(int prfNum){profileNum = prfNum;} void writeToPSFile(PostscriptFileParams postscriptParams); bool seqsLoaded(); ColorParameters* getPtrToCurrentColorScheme() {return alignWidget->getPtrToCurrentColorScheme();} int getFirstSelectedCol(){return alignWidget->getFirstSelectedCol();} int getSecondSelectedCol(){return alignWidget->getSecondSelectedCol();} bool hasFocus(); //- nige protected: //void paintEvent(QPaintEvent *event); void resizeEvent ( QResizeEvent * event ); /* * slots are functions, they can be linked to signals */ public slots: void updateFontAndBoxes(const QString& font); void recvNumSequencesChanged(int count); //nige signals: void numSequencesChanged(int count); //nige private: void createGridBox(); void sendInfoToAlignWidget(); void sendInfoToNameWidget(); int makeGapPenaltyData(); int makeSecStructData(); void moveNamesScrollBarToRight(); int firstSeq; int lastSeq; QGroupBox *gridGroupBox; QGridLayout *layoutGridBox; /* * Widgets to display the alignent and the histogram. */ AlignmentWidget *alignWidget; QScrollArea *alignmentScroll; HistogramWidget *histWidget; QScrollArea *conservationScroll; QScrollArea *namesScroll; QVBoxLayout *mainlayout; /* * Widget to display the sequence names */ SeqNameWidget *seqNames; int defaultFontSize; //nige int defaultBoxSize; int defaultBoxWidth; int alignLength; /* * The main font for the application */ QFont* displayFont; clustalw::Alignment* alignmentToDisplay; std::vector histogram; // vector of column heights clustalw::Array2D* exRes; // Exceptional residues. clustalw::Array2D* lowScoreRes; // low scoring residues. int profileNum; QString secStructData; QString ssName; int nHead; }; #endif clustalx-2.1/AlignmentViewerWidget.cpp0000644000175000017500000006371411470725216020001 0ustar ddineenddineen/** * Changes: * * 16-01-07,Nigel Brown(EMBL): Added Q_MOS_MAC preprocessor checks for Mac OS * X port, which uses AlignmentWidgetScroll in place of AlignmentWidget, and * HistogramWidgetScroll in place of HistogramWidget. This is to work around a * scrolling limitation in Qt4 QScrollArea. THESE CHANGES SHOULD BE REMOVED * WHEN QT4 QSCROLLAREA IS FIXED! * * Andreas Wilm: fixed in Qt 4.3.0, see bug entry * http://lists.trolltech.com/qt-interest/2007-01/thread00541-0.html * and * http://trolltech.org/developer/task-tracker/index_html?method=entry&id=147640 * * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * Mark: 19-01-2007. Changed the width of the boxes to make them narrower. Also * added some larger font sizes. updateFontAndBoxes is the function I changed. * * 30-01-07,Nigel Brown(EMBL): changed sizePolicy settings for * CLU_AVOID_MAC_SCROLL_BUG compiled histWidget and alignWidget. Changed * setSizePolicy() on alignWidget and histWidget. * * 31-01-07,Nigel Brown(EMBL): added defaultFontSize member. Changed * updateFontAndBoxes() to use OS_MAC from compile.h to set OS-specific * box sizes: Mac boxes now much tighter vertically around characters (ugly - * not portable). Removed default case, setting locals at outset. * * 03-02-07,Nigel Brown(EMBL): changed outputColumnScores() to call * FileDialog::getSaveDataFileName() in place of * FileDialog::getSaveFileName(); the new version supplies a file suffix hint, * which is 'qscores' as used in clustalX. Disabled "Error: Bad name format" * message box in outputColumnScores() since this occurred on 'Cancel' * action. Added 'All Files (*)' selection. * * 16-02-07,Nigel Brown(EMBL): Changed font to Courier New; made boxwidth, * boxsize automatically derive from font size; added more font sizes. Changed * parameter order on AlignmentWidget::changeBoxSizeAndFontSize(). * * 20-02-07,Nigel Brown(EMBL): Changed SeqNames size policy to expanding; * installed event filter on namesScroll in createGridBox(); small changes to * createGridBox() layout; hasSeqNameWidgetFocus() tests both the QScrollarea * and, if necessary, the child SeqNameWidget for focus. The latter was needed * after changing SeqNameWidget resizing and event handling. * * 15-03-07,Nigel Brown(EMBL): added font sizes to include classical * typographic scalings. * * 12-4-07, Mark Larkin : Changed destructor. We dont need to delete QObjects. * * 04-05-07,Nigel Brown(EMBL): hasSeqNameWidgetFocus() now tests all children * for focus; renamed hasSeqNameWidgetFocus() to hasFocus(). * * 09-05-07,Nigel Brown(EMBL): Added calls in createGridBox() to new * AlignmentWidget::registerScrollArea() and * SeqNameWidget::registerScrollArea(), allowing each instance to know its own * containing QScrollArea (This is not the same as parentWidget()). * * 27-06-07,Nigel Brown(EMBL): For screen layout balancing: added * recvNumSequencesChanged() and balanceNumSequences() slots, and * numSequencesChanged() signal. *****************************************************************************/ #include #include #include #include "AlignmentViewerWidget.h" #include "SaveSeqFile.h" #include "clustalW/alignment/AlignmentOutput.h" #include "PSPrinter.h" #include "FileDialog.h" #include AlignmentViewerWidget::AlignmentViewerWidget(QWidget *parent) : QWidget(parent), namesScroll(0), profileNum(0), nHead(0) { firstSeq = 0; lastSeq = 0; alignmentToDisplay = 0; defaultFontSize = 10; /* * 9 out of 10 Goldilocks prefer 'Courier New' * * nige (with apologies to the three bears) **/ //displayFont = new QFont("Helvetica"); //too chunky //displayFont = new QFont("Courier"); //too heavy displayFont = new QFont("Courier New"); //just right if (! displayFont->exactMatch()) displayFont->setFamily("Courier"); //heavier displayFont->setPixelSize(defaultFontSize); QFontMetrics fontMetrics(*displayFont); QSize size = fontMetrics.size(Qt::TextSingleLine, QString("W")); defaultBoxWidth = size.width(); defaultBoxSize = size.height(); //cout << defaultFontSize << " @ " << defaultBoxWidth << " x " << defaultBoxSize << endl; exRes = new clustalw::Array2D(); alignWidget = new AlignmentWidget(defaultBoxWidth, defaultBoxSize, displayFont); alignWidget->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Expanding); histWidget = new HistogramWidget(defaultBoxWidth, defaultBoxSize); histWidget->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); seqNames = new SeqNameWidget(defaultBoxSize, displayFont); //seqNames->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Expanding); //- nige seqNames->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); createGridBox(); /* * create a layout for this widget. */ mainlayout = new QVBoxLayout; mainlayout->addWidget(gridGroupBox); setLayout(mainlayout); updateFontAndBoxes("10"); } AlignmentViewerWidget::~AlignmentViewerWidget() { delete exRes; // Mark 12-4-07, changed destructor. } void AlignmentViewerWidget::setAlignmentView(clustalw::Alignment* alignPtr, int fSeq, int lSeq) { alignmentToDisplay = alignPtr; firstSeq = fSeq; lastSeq = lSeq; sendInfoToAlignWidget(); sendInfoToNameWidget(); } void AlignmentViewerWidget::updateView() { nHead = makeSecStructData(); if(nHead == 0) { nHead = makeGapPenaltyData(); } seqNames->updateDisplay(firstSeq, lastSeq, nHead, ssName); updateHistogram(firstSeq, lastSeq); alignWidget->updateDisplay(firstSeq, lastSeq, exRes, nHead, secStructData); moveNamesScrollBarToRight();//Mark 20-07-07 } void AlignmentViewerWidget::updateView(int fSeq, int lSeq) { firstSeq = fSeq; lastSeq = lSeq; updateView(); } int AlignmentViewerWidget::getLengthLongestSeqInRange() { return alignmentToDisplay->getLengthLongestSequence(firstSeq, lastSeq); } void AlignmentViewerWidget::showLowScoringSegments(bool show, clustalw::Array2D* _lowScoreRes) { lowScoreRes = _lowScoreRes; alignWidget->showLowScoreSeg(show, lowScoreRes); } void AlignmentViewerWidget::showExceptionalRes(bool show) { alignWidget->showExceptionalResidues(show); } void AlignmentViewerWidget::updateHistogram(int fSeq, int lSeq) { int numSeqs = lastSeq - firstSeq + 1; if(fSeq != 0 && lSeq != 0) { exRes->clearArray(); exRes->ResizeRect(numSeqs, alignmentToDisplay->getLengthLongestSequence(firstSeq, lastSeq)); histogram = *(alignmentToDisplay->QTcalcHistColumnHeights(firstSeq - 1, numSeqs, exRes)); histWidget->addHistogramInfo(&histogram); } else { histWidget->clearHistogram(); } } void AlignmentViewerWidget::updateFontAndBoxes(const QString& font) { int newFontSize = font.toUInt(); switch (newFontSize) { case 72: //- case 60: //- case 48: //- case 36: //- case 30: case 28: case 26: case 24: case 22: case 21: //- case 20: case 18: case 16: case 14: case 12: case 11: case 10: case 9: case 8: break; default: newFontSize = defaultFontSize; } displayFont->setPixelSize(newFontSize); QFontMetrics fontMetrics(*displayFont); QSize size = fontMetrics.size(Qt::TextSingleLine, QString("W")); int newBoxWidth = size.width(); int newBoxSize = size.height(); //cout << newFontSize << " @ " << newBoxWidth << " x " << newBoxSize << endl; histWidget->updateBoxSize(newBoxWidth, newBoxSize); alignmentScroll->setVisible(false); alignWidget->changeBoxSizeAndFontSize(newBoxWidth, newBoxSize, displayFont); alignmentScroll->setVisible(true); seqNames->changeBoxSizeAndFontSize(newBoxSize, displayFont); repaint(); } void AlignmentViewerWidget::createGridBox() { /* * Create a grid group box for the alignment viewer part */ gridGroupBox = new QGroupBox(); layoutGridBox = new QGridLayout; alignmentScroll = new QScrollArea; alignmentScroll->setWidget(alignWidget); alignWidget->registerScrollArea(alignmentScroll); //- nige conservationScroll = new QScrollArea; conservationScroll->setWidget(histWidget); conservationScroll->setFixedHeight(histWidget->height()); alignmentScroll->setBackgroundRole(QPalette::Base); alignmentScroll->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOn); conservationScroll->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff); conservationScroll->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff); conservationScroll->setBackgroundRole(QPalette::Base); namesScroll = new QScrollArea; namesScroll->setWidget(seqNames); namesScroll->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff); namesScroll->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOn); namesScroll->setBackgroundRole(QPalette::Base); /** * 20-02-07,Nigel: child intercepts resize events to adjust own size. */ namesScroll->viewport()->installEventFilter(seqNames); seqNames->registerScrollArea(namesScroll); //- nige QScrollBar* alignHorizontalScroll = alignmentScroll->horizontalScrollBar(); QScrollBar* conserveHorizontalScroll = conservationScroll->horizontalScrollBar(); QScrollBar* alignVerticalScroll = alignmentScroll->verticalScrollBar(); QScrollBar* namesVerticalScroll = namesScroll->verticalScrollBar(); connect(alignHorizontalScroll, SIGNAL(valueChanged(int)), conserveHorizontalScroll, SLOT(setValue(int))); connect(alignVerticalScroll, SIGNAL(valueChanged(int)), namesVerticalScroll, SLOT(setValue(int))); connect(namesVerticalScroll, SIGNAL(valueChanged(int)), alignVerticalScroll, SLOT(setValue(int))); /* * Set the layout of the widgets. */ layoutGridBox->addWidget(namesScroll, 0, 0, 1, 1); layoutGridBox->addWidget(alignmentScroll, 0, 1, 1, 5); layoutGridBox->addWidget(conservationScroll, 1, 1, 1, 5); /* * Set stretch and size policies. */ layoutGridBox->setRowStretch(0, 5); layoutGridBox->setRowStretch(1, 0); layoutGridBox->setColumnStretch(0, 1); layoutGridBox->setColumnStretch(1, 5); //layoutGridBox->setRowStretch(0, 20); //layoutGridBox->setRowStretch(2, 0); //layoutGridBox->setColumnStretch(0, 1); //layoutGridBox->setColumnStretch(2, 3); gridGroupBox->setLayout(layoutGridBox); gridGroupBox->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); } void AlignmentViewerWidget::setColorScheme(ColorParameters* colors) { alignWidget->setColorScheme(colors); } void AlignmentViewerWidget::setPrintoutColorScheme(ColorParameters colors) { alignWidget->setPrintoutColorScheme(colors); } void AlignmentViewerWidget::setBlackAndWhite() { alignWidget->setBlackAndWhite(); } void AlignmentViewerWidget::setBackGroundColoring(bool backgroundColor) { alignWidget->setBackGroundColoring(backgroundColor); } /** * The function getAlignmentScrollBar is used to return the horizontal scrollbar from * the alignment scroll area. This is to allow the scroll lock in the profile viewing * mode. * @return Pointer to the horizontal scroll bar in alignment scroll area. */ QScrollBar* AlignmentViewerWidget::getAlignmentScrollBar() { return alignmentScroll->horizontalScrollBar(); } void AlignmentViewerWidget::sendInfoToAlignWidget() { alignWidget->setAlignment(alignmentToDisplay->getSeqArray(), alignmentToDisplay); } void AlignmentViewerWidget::sendInfoToNameWidget() { seqNames->setNames(alignmentToDisplay); } void AlignmentViewerWidget::clearSequenceSelection() { seqNames->clearSequenceSelection(); } void AlignmentViewerWidget::clearRangeSelection() { alignWidget->clearRangeSelection(); } void AlignmentViewerWidget::selectAllSequences() { seqNames->selectAllSequences(); } void AlignmentViewerWidget::saveSequences(QString inputFileName, int type) { if(firstSeq > 0 && lastSeq >= firstSeq) { int lastRes = alignmentToDisplay->getLengthLongestSequence(firstSeq, lastSeq); rangeParams alignmentRange; alignmentRange.firstSeq = firstSeq; alignmentRange.lastSeq = lastSeq; alignmentRange.firstRes = 1; alignmentRange.lastRes = lastRes; int code; SaveSeqFile saveSeqs(inputFileName, type, alignmentRange); code = saveSeqs.exec(); if(code == QDialog::Accepted) { // save sequences as QString saveFileName = saveSeqs.getFileName(); if(saveFileName.size() > 0) { // Save the file!! clustalw::AlignmentOutput alignmentOutput; alignmentOutput.openAlignmentOutput(saveFileName.toStdString()); alignmentOutput.createAlignmentOutput(alignmentToDisplay, firstSeq, lastSeq); } else { QString tempFileName = QDir::currentPath() + "temp"; QString message = "File name cannot be blank. Using default:\n" + tempFileName; QMessageBox::information(this, "", message, QMessageBox::Ok); } } } else { QMessageBox::information(this, "", "No sequences loaded.", QMessageBox::Ok); } } int AlignmentViewerWidget::getNumberOfSeqsSelected() { return seqNames->getNumberOfSeqsSelected(); } const std::vector* AlignmentViewerWidget::getSelectedRows() { return seqNames->getSelectedRows(); } bool AlignmentViewerWidget::outputColumnScores() { int numSeqsSelected = seqNames->getNumberOfSeqsSelected(); if(numSeqsSelected == 0) { QMessageBox::information(this, "ClustalQt", "Select sequences to be written by clicking on the names.", QMessageBox::Ok); return false; } else { QString fileName; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; fileName = fd.getSaveDataFileName("qscores", this, tr("Choose a file name to save under."), "Score files (*.qscores);;All Files (*)"); if(fileName == "") { //QMessageBox::information(this, "ClustalQt", "Error: Bad name format.", //nige // QMessageBox::Ok); //nige return false; } else { // Open the file QFile columnScoreFile(fileName); if(!columnScoreFile.open(QIODevice::WriteOnly)) { QMessageBox::information(this, "ClustalQt", "Could not open save file.", QMessageBox::Ok); return false; } else { QTextStream out(&columnScoreFile); // Get the sequence numbers to be output from the names object const std::vector* selected = seqNames->getSelectedRows(); // Get the sequences from the alignmentToDisplay object!! const clustalw::SeqArray* seqArray = alignmentToDisplay->getSeqArray(); // Get the scores from the histogram object! const std::vector* columnScores = histWidget->getColumnScores(); int val; char res; bool gapCol; int colScore; int gapPos1 = clustalw::userParameters->getGapPos1(); int gapPos2 = clustalw::userParameters->getGapPos2(); // Get the maximum length of the sequences int maxLength = 0; int lengthCurrentSeq = 0; int numSeqs = alignmentToDisplay->getNumSeqs(); for(int i = 0; i < numSeqs; i++) { if((*selected)[i] == 1) { lengthCurrentSeq = alignmentToDisplay->getSeqLength(i + 1); if(lengthCurrentSeq > maxLength) { maxLength = lengthCurrentSeq; } } } for(int j = 1; j <= maxLength; j++) { // Check for a column of gaps! gapCol = true; for(int i = 1; i <= numSeqs; i++) { if((*selected)[i - 1] == 1) { if(j <= alignmentToDisplay->getSeqLength(i)) { val = (*seqArray)[i][j]; if(val != gapPos1 && val != gapPos2) { gapCol = false; break; } } } } if(gapCol == false) { for(int i = 1; i <= numSeqs; i++) { if((*selected)[i - 1] == 1) { if(j > alignmentToDisplay->getSeqLength(i)) { res = '-'; } else { val = (*seqArray)[i][j]; if(val == gapPos1 || val == gapPos2) { res = '-'; } else { res = clustalw::userParameters->getAminoAcidCode(val); } } // Now write to file! out << res << " "; } } // AW: problem: this comes from histogram data // which sets max values to 99 instead of 100 colScore = (*columnScores)[j - 1]; out << "\t" << colScore <<"\n"; } } columnScoreFile.close(); std::string name = fileName.toStdString(); clustalw::utilityObject->info("File %s saved", name.c_str()); return true; } } } } int AlignmentViewerWidget::makeGapPenaltyData() { int i,n = 0; if(firstSeq > 0 && lastSeq >= firstSeq) { char val; int len = alignmentToDisplay->getSeqLength(firstSeq); ssName = ""; secStructData = ""; bool ok = false; vector* gapMask; if (profileNum == 1) { secStructData += QString(len, ' '); if (clustalw::userParameters->getStructPenalties1() == clustalw::GMASK && clustalw::userParameters->getUseSS1() == true) { gapMask = alignmentToDisplay->getGapPenaltyMask1(); ok = true; } } else if (profileNum == 2) { if (clustalw::userParameters->getStructPenalties2() == clustalw::GMASK && clustalw::userParameters->getUseSS2() == true) { gapMask = alignmentToDisplay->getGapPenaltyMask2(); ok = true; } } if(ok) { n = 1; ssName = "Gap Penalties"; for(i = 0; i < len && i < gapMask->size(); i++) { val = (*gapMask)[i]; if (val == clustalw::userParameters->getGapPos1()) { secStructData[i] = '-'; } else { secStructData[i] = val; } } } } return(n); } int AlignmentViewerWidget::makeSecStructData() { int i,n = 0; char val; if(firstSeq > 0 && lastSeq >= firstSeq) { vector ssMask; int len = alignmentToDisplay->getSeqLength(firstSeq); ssName = ""; secStructData = ""; vector* _secStructMask; bool ok = false; if (profileNum == 1) { if (clustalw::userParameters->getStructPenalties1() == clustalw::SECST && clustalw::userParameters->getUseSS1() == true) { _secStructMask = alignmentToDisplay->getSecStructMask1(); ok = true; } } else if (profileNum == 2) { if (clustalw::userParameters->getStructPenalties2() == clustalw::SECST && clustalw::userParameters->getUseSS2() == true) { _secStructMask = alignmentToDisplay->getSecStructMask2(); ok = true; } } if(ok) { n = 1; ssName = "Structures"; ssMask.assign(len + 10, 0); secStructData = ""; secStructData += QString(len, ' '); for (i = 0; i < len && i < _secStructMask->size();i++) { ssMask[i] = _secStructMask->at(i); } clustalw::AlignmentOutput out; out.printSecStructMask(len, _secStructMask, &ssMask); for(i = 0; i < len && i < ssMask.size(); i++) { val = ssMask[i]; if (val == clustalw::userParameters->getGapPos1()) { secStructData[i] = '-'; } else { secStructData[i] = val; } } } } return(n); } void AlignmentViewerWidget::writeToPSFile(PostscriptFileParams postscriptParams) { if(firstSeq != 0 && lastSeq != 0) { PostscriptFileParams params = postscriptParams; clustalw::Array2D colorMask; colorMask = alignWidget->getPrintoutColorMask(); psPrintParams printParams; printParams.alignPtr = alignmentToDisplay; printParams.colorMask = &colorMask; if(alignWidget->getShowExceptionalResidues() && exRes != 0) { printParams.showExRes = true; printParams.exRes = *exRes; } else { printParams.showExRes = false; } printParams.firstSeq = firstSeq; printParams.histogram = &histogram; printParams.lastSeq = lastSeq; if(alignWidget->getShowLowScoreSeg() && lowScoreRes != 0) { printParams.showLowScoreRes = true; printParams.lowScoreRes = *lowScoreRes; } else { printParams.showLowScoreRes = false; } if((profileNum == 1 && clustalw::userParameters->getStructPenalties1() != clustalw::NONE && clustalw::userParameters->getUseSS1() == true) || (profileNum == 2 && clustalw::userParameters->getStructPenalties2() != clustalw::NONE && clustalw::userParameters->getUseSS2() == true)) { printParams.nameHeader.resize(2); printParams.nameHeader[0] = ""; printParams.nameHeader[1] = seqNames->getNameSSHeader(); printParams.seqHeader.resize(2); printParams.seqHeader[0] = alignWidget->getConservationInfo(); printParams.seqHeader[1] = alignWidget->getSecStructInfo(); } else { printParams.nameHeader.resize(1); printParams.nameHeader[0] = ""; printParams.seqHeader.resize(1); printParams.seqHeader[0] = alignWidget->getConservationInfo(); } printParams.nSeqs = lastSeq - firstSeq + 1;; // Note need to do something with backGroundColors printParams.backGroundColors = alignWidget->getBackGroundColor(); PSPrinter myPrinter(&printParams); myPrinter.writePSFile(¶ms); QString qtName = params.getWriteSeqsToFile(); std::string name = qtName.toStdString(); clustalw::utilityObject->info("Postscript file %s written", name.c_str()); } else { QMessageBox::warning(this, tr("Error"), tr("Error: No file loaded.")); } } bool AlignmentViewerWidget::seqsLoaded() { if(firstSeq != 0 && lastSeq != 0) { return true; } else { return false; } } bool AlignmentViewerWidget::hasFocus() { int c = 0; // nige again c += namesScroll->hasFocus(); c += seqNames->hasFocus(); c += alignmentScroll->hasFocus(); c += alignWidget->hasFocus(); //these seem not to work: c += conservationScroll->hasFocus(); c += histWidget->hasFocus(); return (c > 0 ? true : false); } //nige void AlignmentViewerWidget::recvNumSequencesChanged(int count) { //printf("RECV/EMIT(AVW, recvNumSequencesChanged) %0x (%d)\n", (int)this, count); emit numSequencesChanged(count); } void AlignmentViewerWidget::resizeEvent ( QResizeEvent * event ) { moveNamesScrollBarToRight();//Mark 20-7-07 } /*Mark 20-7-07*/ void AlignmentViewerWidget::moveNamesScrollBarToRight() { if(namesScroll != 0) { QScrollBar* horizNameScrollBar = namesScroll->horizontalScrollBar(); int scrollBarWidth = horizNameScrollBar->maximumWidth(); horizNameScrollBar->setValue(scrollBarWidth); } } clustalx-2.1/AlignmentParameters.h0000644000175000017500000000425211470725216017134 0ustar ddineenddineen#ifndef ALIGNMENTPARAMETERS_H #define ALIGNMENTPARAMETERS_H #include class QPushButton; class QGroupBox; class QRadioButton; class QComboBox; class QLabel; class QButtonGroup; class QLineEdit; class QVBoxLayout; class QGridLayout; class QHBoxLayout; class QHBoxLayout; class QShowEvent; class AlignmentParameters : public QDialog { Q_OBJECT public: AlignmentParameters(); protected: void showEvent(QShowEvent* event); private slots: void accept(); void browseProtein(); void browseDNA(); void userProteinClicked(); void userDNAClicked(); private: void setUpLayout(); void setUpProteinMatRadioButtons(); void setUpDNAMatRadioButtons(); void setUpOtherParams(); void setUpLoadButtons(); void setInitialProteinMat(); void setInitialDNAMat(); void setInitialParams(); void radioSetUsingUserProtein(); QPushButton* okButton; QPushButton* loadProtMat; QPushButton* loadDNAMat; QGroupBox* paramGridBox; QGroupBox* proteinMatGridBox; QGroupBox* dnaMatGridBox; QGroupBox* verticalBox; QGroupBox* loadDNAGroup; QGroupBox* loadProteinGroup; QVBoxLayout* mainLayout; QGridLayout* grid; QGridLayout* protMatLayout; QGridLayout* dnaMatLayout; QVBoxLayout* multipleParamLayout; QHBoxLayout* horizLayoutAA; QHBoxLayout* horizLayoutDNA; QButtonGroup* proteinMat; QRadioButton* blosum; QRadioButton* pam; QRadioButton* gonnet; QRadioButton* identity; QRadioButton* userProt; QButtonGroup* DNAMat; QRadioButton* IUB; QRadioButton* clustal; QRadioButton* userDNA; QComboBox* useNegMat; QLineEdit* gapOpen; QLineEdit* gapExtend; QLineEdit* delayDivergent; QLineEdit* dnaTranWeight; QLabel* useNegMatLabel; QLabel* gapOpenLabel; QLabel* gapExtendLabel; QLabel* delayDivergentLabel; QLabel* dnaTranWeightLabel; QLabel* userProteinMatrix; QLabel* userDNAMatrix; QString aaMatrixPath; QString dnaMatrixPath; std::string aaMatrixFile; std::string dnaMatrixFile; bool proteinMatLoaded; bool dnaMatLoaded; }; #endif clustalx-2.1/AlignmentParameters.cpp0000644000175000017500000003463111470725216017473 0ustar ddineenddineen/** * Changes: * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "AlignmentParameters.h" #include "clustalW/general/userparams.h" #include "clustalW/substitutionMatrix/globalmatrix.h" #include "FileDialog.h" AlignmentParameters::AlignmentParameters() { setWindowTitle("Alignment Parameters"); proteinMatLoaded = false; dnaMatLoaded = false; paramGridBox = new QGroupBox; proteinMatGridBox = new QGroupBox(tr("Protein Weight Matrix")); dnaMatGridBox = new QGroupBox(tr("DNA Weight Matrix")); verticalBox = new QGroupBox(tr("Multiple Parameters")); mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Fixed); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpLayout(); mainLayout->addWidget(okButton); mainLayout->addWidget(verticalBox); setLayout(mainLayout); } void AlignmentParameters::showEvent(QShowEvent* event) { setInitialProteinMat(); setInitialDNAMat(); setInitialParams(); } void AlignmentParameters::setUpLayout() { setUpOtherParams(); grid = new QGridLayout; grid->addWidget(gapOpenLabel, 0, 0); grid->addWidget(gapOpen, 0, 1); grid->addWidget(gapExtendLabel, 1, 0); grid->addWidget(gapExtend, 1, 1); grid->addWidget(delayDivergentLabel, 2, 0); grid->addWidget(delayDivergent, 2, 1); grid->addWidget(dnaTranWeightLabel, 3, 0); grid->addWidget(dnaTranWeight, 3, 1); grid->addWidget(useNegMatLabel, 4, 0); grid->addWidget(useNegMat, 4, 1); paramGridBox->setLayout(grid); setUpProteinMatRadioButtons(); protMatLayout = new QGridLayout; protMatLayout->addWidget(blosum, 0, 0); protMatLayout->addWidget(pam, 0, 1); protMatLayout->addWidget(gonnet, 1, 0); protMatLayout->addWidget(identity, 1, 1); protMatLayout->addWidget(userProt, 0, 2); proteinMatGridBox->setLayout(protMatLayout); setUpDNAMatRadioButtons(); dnaMatLayout = new QGridLayout; dnaMatLayout->addWidget(IUB, 0, 0); dnaMatLayout->addWidget(clustal, 0, 1); dnaMatLayout->addWidget(userDNA, 0, 2); dnaMatGridBox->setLayout(dnaMatLayout); setUpLoadButtons(); multipleParamLayout = new QVBoxLayout; multipleParamLayout->addWidget(paramGridBox); multipleParamLayout->addWidget(proteinMatGridBox); multipleParamLayout->addWidget(loadProteinGroup); multipleParamLayout->addWidget(dnaMatGridBox); multipleParamLayout->addWidget(loadDNAGroup); verticalBox->setLayout(multipleParamLayout); } void AlignmentParameters::setUpProteinMatRadioButtons() { proteinMat = new QButtonGroup; proteinMat->setExclusive(true); blosum = new QRadioButton("BLOSUM series"); pam = new QRadioButton("PAM series"); gonnet = new QRadioButton("Gonnet series"); identity = new QRadioButton("Identity matrix"); userProt = new QRadioButton("User defined"); connect(userProt, SIGNAL(clicked()), this, SLOT(userProteinClicked())); proteinMat->addButton(blosum); proteinMat->addButton(pam); proteinMat->addButton(gonnet); proteinMat->addButton(identity); proteinMat->addButton(userProt); } void AlignmentParameters::setInitialProteinMat() { if((clustalw::subMatrix->getMatrixNum() - 1) == clustalw::AABLOSUM) { blosum->setChecked(true); } else if((clustalw::subMatrix->getMatrixNum() - 1) == clustalw::AAPAM) { pam->setChecked(true); } else if((clustalw::subMatrix->getMatrixNum() - 1) == clustalw::AAGONNET) { gonnet->setChecked(true); } else if((clustalw::subMatrix->getMatrixNum() - 1) == clustalw::AAIDENTITY) { identity->setChecked(true); } else if((clustalw::subMatrix->getMatrixNum() - 1) == clustalw::AAUSERDEFINED) { userProt->setChecked(true); } } void AlignmentParameters::radioSetUsingUserProtein() { userProt->setChecked(true); blosum->setChecked(false); pam->setChecked(false); gonnet->setChecked(false); identity->setChecked(false); } void AlignmentParameters::userProteinClicked() { if(!proteinMatLoaded) { browseProtein(); } } void AlignmentParameters::setUpDNAMatRadioButtons() { DNAMat = new QButtonGroup; DNAMat->setExclusive(true); IUB = new QRadioButton("IUB"); clustal = new QRadioButton("CLUSTALW(1.6)"); userDNA = new QRadioButton("User defined"); connect(userDNA, SIGNAL(clicked()), this, SLOT(userDNAClicked())); DNAMat->addButton(IUB); DNAMat->addButton(clustal); DNAMat->addButton(userDNA); } void AlignmentParameters::setInitialDNAMat() { if((clustalw::subMatrix->getDNAMatrixNum() - 1) == clustalw::DNAIUB) { IUB->setChecked(true); } else if((clustalw::subMatrix->getDNAMatrixNum() - 1) == clustalw::DNACLUSTALW) { clustal->setChecked(true); } else if((clustalw::subMatrix->getDNAMatrixNum() - 1) == clustalw::DNAUSERDEFINED) { userDNA->setChecked(true); } } void AlignmentParameters::userDNAClicked() { if(!dnaMatLoaded) { browseDNA(); } } void AlignmentParameters::setUpOtherParams() { // Need to intialy set the parameters to be either DNA or protein. // Could be setting DNA if the alignment we read in is DNA. gapOpen = new QLineEdit; gapOpenLabel = new QLabel(tr("Gap Opening [0-100]: ")); gapExtend = new QLineEdit; gapExtendLabel = new QLabel(tr("Gap Extension [0-100]: ")); delayDivergent = new QLineEdit; delayDivergentLabel = new QLabel(tr("Delay Divergent Sequences(%): ")); dnaTranWeight = new QLineEdit; dnaTranWeightLabel = new QLabel(tr("DNA Transition Weight[0-1] ")); useNegMat = new QComboBox(); useNegMat->addItem("Off"); useNegMat->addItem("On"); useNegMatLabel = new QLabel(tr("Use Negative Matrix ")); loadProtMat = new QPushButton(tr("Load protein matrix:")); loadProtMat->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); connect(loadProtMat, SIGNAL(clicked()), this, SLOT(browseProtein())); loadDNAMat = new QPushButton(tr("Load DNA matrix:")); loadDNAMat->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); connect(loadDNAMat, SIGNAL(clicked()), this, SLOT(browseDNA())); } void AlignmentParameters::setInitialParams() { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAParams(); } else { clustalw::userParameters->setProtParams(); } QString num; num = num.setNum(clustalw::userParameters->getGapOpen()); gapOpen->setText(num); num = num.setNum(clustalw::userParameters->getGapExtend()); gapExtend->setText(num); num = num.setNum(clustalw::userParameters->getDivergenceCutoff()); delayDivergent->setText(num); num = num.setNum(clustalw::userParameters->getTransitionWeight()); dnaTranWeight->setText(num); if(clustalw::userParameters->getUseNegMatrix()) { useNegMat->setCurrentIndex(1); } else { useNegMat->setCurrentIndex(0); } } void AlignmentParameters::setUpLoadButtons() { loadProteinGroup = new QGroupBox; horizLayoutAA = new QHBoxLayout; userProteinMatrix = new QLabel(""); horizLayoutAA->addWidget(loadProtMat); horizLayoutAA->addWidget(userProteinMatrix); loadProteinGroup->setLayout(horizLayoutAA); loadDNAGroup = new QGroupBox; horizLayoutDNA = new QHBoxLayout; userDNAMatrix = new QLabel(""); horizLayoutDNA->addWidget(loadDNAMat); horizLayoutDNA->addWidget(userDNAMatrix); loadDNAGroup->setLayout(horizLayoutDNA); } void AlignmentParameters::browseProtein() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; aaMatrixPath = fd.getOpenFileName(this, tr("Find Protein Matrix")); string path = aaMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getUserMatFromFile(userFile, clustalw::Protein, clustalw::MultipleAlign)) { aaMatrixFile = string(userFile); proteinMatLoaded = true; userProteinMatrix->setText(aaMatrixFile.c_str()); radioSetUsingUserProtein(); } else { aaMatrixFile = ""; proteinMatLoaded = false; userProteinMatrix->setText(aaMatrixFile.c_str()); setInitialProteinMat(); } delete []userFile; } else // we pressed cancel { if(!proteinMatLoaded) { setInitialProteinMat(); } } } void AlignmentParameters::browseDNA() { // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; dnaMatrixPath = fd.getOpenFileName(this, tr("Find DNA Matrix")); string path = dnaMatrixPath.toStdString(); if(path != "") { int lenOfString = path.length(); char* userFile = new char[lenOfString + 1]; strcpy(userFile, path.c_str()); if (clustalw::subMatrix->getUserMatFromFile(userFile, clustalw::DNA, clustalw::MultipleAlign)) { dnaMatrixFile = string(userFile); dnaMatLoaded = true; userDNAMatrix->setText(dnaMatrixFile.c_str()); userDNA->setChecked(true); } else { dnaMatrixFile = ""; dnaMatLoaded = false; setInitialDNAMat(); userDNAMatrix->setText(dnaMatrixFile.c_str()); } delete []userFile; } else // we pressed cancel { if(!dnaMatLoaded) { setInitialDNAMat(); } } } void AlignmentParameters::accept() { bool ok; // check _gapExtention QString gExtend = gapExtend->text(); float extend; extend = gExtend.toFloat(&ok); if(ok && extend >= 0.0 && extend <= 100.0) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAGapExtend(extend); } else { clustalw::userParameters->setAAGapExtend(extend); } } // check _gapOpen QString gOpen = gapOpen->text(); float open; open = gOpen.toFloat(&ok); if(ok && open >= 0.0 && open <= 100.0) { if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAGapOpen(open); } else { clustalw::userParameters->setAAGapOpen(open); } } // check _delayDivSeq QString divPc = delayDivergent->text(); int delay; delay = divPc.toInt(&ok); if(ok && delay >= 0 && delay <= 100) { clustalw::userParameters->setDivergenceCutoff(delay); } // check _DNATranWeight QString tWeight = dnaTranWeight->text(); float tranWeight; tranWeight = tWeight.toFloat(&ok); if(ok && tranWeight >= 0.0 && tranWeight <= 1.0) { clustalw::userParameters->setTransitionWeight(tranWeight); } // Protein Weight Matrix if(blosum->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("blosum", 1, clustalw::Protein, clustalw::MultipleAlign); } else if(pam->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("pam", 2, clustalw::Protein, clustalw::MultipleAlign); } else if(gonnet->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("gonnet", 3, clustalw::Protein, clustalw::MultipleAlign); } else if(identity->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("id", 4, clustalw::Protein, clustalw::MultipleAlign); } else if(userProt->isChecked()) { if(aaMatrixFile != "") { clustalw::subMatrix->setCurrentNameAndNum(aaMatrixFile, 5, clustalw::Protein, clustalw::MultipleAlign); } else // Set to a default if a problem with the { clustalw::subMatrix->setCurrentNameAndNum("gonnet", 3, clustalw::Protein, clustalw::MultipleAlign); } } // DNA matrix if(IUB->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("iub", 1, clustalw::DNA, clustalw::MultipleAlign); } else if(clustal->isChecked()) { clustalw::subMatrix->setCurrentNameAndNum("clustalw", 2, clustalw::DNA, clustalw::MultipleAlign); } else if(userDNA->isChecked()) { if(dnaMatrixFile != "") { clustalw::subMatrix->setCurrentNameAndNum(dnaMatrixFile, 3, clustalw::DNA, clustalw::MultipleAlign); } else // Set it to the default { clustalw::subMatrix->setCurrentNameAndNum("iub", 1, clustalw::DNA, clustalw::MultipleAlign); } } if(useNegMat->currentIndex() == 1) { clustalw::userParameters->setUseNegMatrix(true); } else { clustalw::userParameters->setUseNegMatrix(false); } // NOTE it is important to set the parameters here. if(clustalw::userParameters->getDNAFlag()) { clustalw::userParameters->setDNAParams(); } else { clustalw::userParameters->setProtParams(); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/AlignmentFormatOptions.h0000644000175000017500000000171311470725216017634 0ustar ddineenddineen#ifndef ALIGNMENTFORMATOPTIONS_H #define ALIGNMENTFORMATOPTIONS_H #include class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class AlignmentFormatOptions : public QDialog { Q_OBJECT public: AlignmentFormatOptions(); protected: private slots: void accept(); private: void setUpLayout(); void setUpRadioButtons(); void setUpOtherParams(); QPushButton* okButton; QGroupBox* fileGridBox; QGroupBox* otherParams; QGroupBox* verticalBox; QCheckBox* clustal; QCheckBox* gcg; QCheckBox* gde; QCheckBox* fasta; QCheckBox* nbrf; QCheckBox* phylip; QCheckBox* nexus; QComboBox* gdeOutputCase; QComboBox* clustalSequenceNumbers; QComboBox* outputOrder; QComboBox* parameterOutput; QLabel* gdeOutputCaseLabel; QLabel* clustalSequenceNumbersLabel; QLabel* outputOrderLabel; QLabel* parameterOutputLabel; }; #endif clustalx-2.1/AlignmentFormatOptions.cpp0000644000175000017500000001466511470725216020201 0ustar ddineenddineen#include #include #include #include #include #include #include #include #include #include "AlignmentFormatOptions.h" #include "clustalW/general/userparams.h" AlignmentFormatOptions::AlignmentFormatOptions() { setWindowTitle("Output Format Options"); fileGridBox = new QGroupBox("Output Files"); otherParams = new QGroupBox; QVBoxLayout* mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); setUpLayout(); mainLayout->addWidget(okButton); mainLayout->addWidget(fileGridBox); mainLayout->addWidget(otherParams); setLayout(mainLayout); } void AlignmentFormatOptions::setUpLayout() { setUpRadioButtons(); QGridLayout *grid = new QGridLayout; grid->addWidget(clustal, 0, 0); grid->addWidget(nbrf, 0, 1); grid->addWidget(gcg, 1, 0); grid->addWidget(phylip, 1, 1); grid->addWidget(gde, 2, 0); grid->addWidget(nexus, 2, 1); grid->addWidget(fasta, 3, 0); fileGridBox->setLayout(grid); setUpOtherParams(); QGridLayout *otherParamLayout = new QGridLayout; otherParamLayout->addWidget(gdeOutputCaseLabel, 0, 0); otherParamLayout->addWidget(gdeOutputCase, 0, 1); otherParamLayout->addWidget(clustalSequenceNumbersLabel, 1, 0); otherParamLayout->addWidget(clustalSequenceNumbers, 1, 1); otherParamLayout->addWidget(outputOrderLabel, 2, 0); otherParamLayout->addWidget(outputOrder, 2, 1); otherParamLayout->addWidget(parameterOutputLabel, 3, 0); otherParamLayout->addWidget(parameterOutput, 3, 1); otherParams->setLayout(otherParamLayout); } void AlignmentFormatOptions::setUpRadioButtons() { clustal = new QCheckBox("CLUSTAL format"); gcg = new QCheckBox("GCG/MSF format"); gde = new QCheckBox("GDE format"); fasta = new QCheckBox("FASTA format"); nbrf = new QCheckBox("NBRF/PIR format"); phylip = new QCheckBox("PHYLIP format"); nexus = new QCheckBox("NEXUS format"); if(clustalw::userParameters->getOutputClustal()) { clustal->setChecked(true); } if(clustalw::userParameters->getOutputGCG()) { gcg->setChecked(true); } if(clustalw::userParameters->getOutputGde()) { gde->setChecked(true); } if(clustalw::userParameters->getOutputFasta()) { fasta->setChecked(true); } if(clustalw::userParameters->getOutputNbrf()) { nbrf->setChecked(true); } if(clustalw::userParameters->getOutputPhylip()) { phylip->setChecked(true); } if(clustalw::userParameters->getOutputNexus()) { nexus->setChecked(true); } } void AlignmentFormatOptions::setUpOtherParams() { gdeOutputCase = new QComboBox(); gdeOutputCase->addItem("Lower"); gdeOutputCase->addItem("Upper"); if(clustalw::userParameters->getLowercase()) { gdeOutputCase->setCurrentIndex(0); } else { gdeOutputCase->setCurrentIndex(1); } gdeOutputCaseLabel = new QLabel(tr("GDE output case :")); clustalSequenceNumbers = new QComboBox(); clustalSequenceNumbers->addItem("Off"); clustalSequenceNumbers->addItem("On"); if(clustalw::userParameters->getClSeqNumbers()) { clustalSequenceNumbers->setCurrentIndex(1); } else { clustalSequenceNumbers->setCurrentIndex(0); } clustalSequenceNumbersLabel = new QLabel(tr("CLUSTALW sequence numbers :")); outputOrder = new QComboBox(); outputOrder->addItem("Aligned"); outputOrder->addItem("Input"); if(clustalw::userParameters->getOutputOrder() == clustalw::ALIGNED) { outputOrder->setCurrentIndex(0); } else { outputOrder->setCurrentIndex(1); } outputOrderLabel = new QLabel(tr("Output order :")); parameterOutput = new QComboBox(); parameterOutput->addItem("Off"); parameterOutput->addItem("On"); if(clustalw::userParameters->getSaveParameters()) { parameterOutput->setCurrentIndex(1); } else { parameterOutput->setCurrentIndex(0); } parameterOutputLabel = new QLabel(tr("Parameters output :")); } void AlignmentFormatOptions::accept() { // Output file options if(clustal->isChecked()) { clustalw::userParameters->setOutputClustal(true); } else { clustalw::userParameters->setOutputClustal(false); } if(gcg->isChecked()) { clustalw::userParameters->setOutputGCG(true); } else { clustalw::userParameters->setOutputGCG(false); } if(gde->isChecked()) { clustalw::userParameters->setOutputGde(true); } else { clustalw::userParameters->setOutputGde(false); } if(fasta->isChecked()) { clustalw::userParameters->setOutputFasta(true); } else { clustalw::userParameters->setOutputFasta(false); } if(nbrf->isChecked()) { clustalw::userParameters->setOutputNbrf(true); } else { clustalw::userParameters->setOutputNbrf(false); } if(phylip->isChecked()) { clustalw::userParameters->setOutputPhylip(true); } else { clustalw::userParameters->setOutputPhylip(false); } if(nexus->isChecked()) { clustalw::userParameters->setOutputNexus(true); } else { clustalw::userParameters->setOutputNexus(false); } // GDE output file case if(gdeOutputCase->currentIndex() == 0) { clustalw::userParameters->setLowercase(true); } else { clustalw::userParameters->setLowercase(false); } if(clustalSequenceNumbers->currentIndex() == 1) { clustalw::userParameters->setClSeqNumbers(true); } else { clustalw::userParameters->setClSeqNumbers(false); } if(outputOrder->currentIndex() == 0) { clustalw::userParameters->setOutputOrder(clustalw::ALIGNED); } else { clustalw::userParameters->setOutputOrder(clustalw::INPUT); } if(parameterOutput->currentIndex() == 1) { clustalw::userParameters->setSaveParameters(true); } else { clustalw::userParameters->setSaveParameters(false); } setResult(QDialog::Accepted); hide(); } clustalx-2.1/AlignOutputFileNames.h0000644000175000017500000000535211470725216017233 0ustar ddineenddineen/** * Changes: * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. */ #ifndef ALIGNOUTPUTFILENAMES_H #define ALIGNOUTPUTFILENAMES_H #include #include #include "ClustalQtParams.h" class QPushButton; class QGroupBox; class QComboBox; class QLabel; class QCheckBox; class QVBoxLayout; class QGridLayout; class QLineEdit; class AlignOutputFileNames : public QDialog { Q_OBJECT public: AlignOutputFileNames(QString inputFileName, FileNameDialogType _type, AlignType _alignType, QString profileName = ""); clustalw::AlignmentFileNames getNames(); bool getRealignEndGapPen(){return realignEndGapPen;} protected: private slots: void accept(); void cancel(); void browseGuideTreeSeq(); void browseGuideTreeProfile(); void browseInputTreeSeq(); void browseInputTreeProfile(); void browseClustal(); void browseNBRF(); void browseGCG(); void browseNexus(); void browseFasta(); void browsePhylip(); void browseGDE(); private: void browse(QLineEdit* lineEditName); void browseInputTree(QLineEdit* lineEditName); void setUpLayout(); void setUpTopBox(); void setUpOptional(); void setAllPtrsToNull(); QString getPathFromFileName(QString fileName); QVBoxLayout* mainLayout; QGridLayout* topBoxLayout; QGridLayout* fileLayout; QGridLayout* otherParamLayout; QPushButton* okButton; QPushButton* alignOutCancelButton; QPushButton* guideTreeSeqBrowseButton; QPushButton* guideTreeProfile2BrowseButton; QPushButton* clustalBrowseButton; QPushButton* nbrfBrowseButton; QPushButton* gcgBrowseButton; QPushButton* phylipBrowseButton; QPushButton* gdeBrowseButton; QPushButton* nexusBrowseButton; QPushButton* fastaBrowseButton; QGroupBox* fileBox; QGroupBox* topBox; QComboBox* realignSegEndGapPenalties; QLabel* guideTreeNameLabelSeq; QLabel* outputFilesLabel; QLabel* clustalNameLabel; QLabel* nbrfNameLabel; QLabel* gcgNameLabel; QLabel* phylipNameLabel; QLabel* gdeNameLabel; QLabel* nexusNameLabel; QLabel* fastaNameLabel; QLabel* realignSegEndGapPenaltiesLabel; QLineEdit* guideTreeNameSeq; QLineEdit* guideTreeNameProfile2; QLineEdit* clustalName; QLineEdit* nbrfName; QLineEdit* gcgName; QLineEdit* phylipName; QLineEdit* gdeName; QLineEdit* nexusName; QLineEdit* fastaName; FileNameDialogType type; AlignType alignType; QString title; QString filePath; QString profile2Path; auto_ptr outFileNames; static const int MinLineEditWidth = 250; bool realignEndGapPen; }; #endif clustalx-2.1/AlignOutputFileNames.cpp0000644000175000017500000004173311470725216017571 0ustar ddineenddineen/** * Changes: * * Mark: 18-01-2007. Changed from using QFileDialog to FileDialog. * * 30-03-07,Nigel Brown(EMBL): renamed variables 'AlignProf1ToProf2' to * 'AlignProf2ToProf1' and 'AlignProf1ToProf2WithTree' to * 'AlignProf2ToProf1WithTree', for consistency with clustalx and user * interface menu naming. * * 12-4-07, Mark Larkin, Removed destructor. No need to delete QObjects. * * 18-06-07,Nigel Brown(EMBL): Made the 'ok' button in each dialogue be the * default instead of a Browse button. */ #include #include #include #include #include #include #include #include #include #include #include "AlignOutputFileNames.h" #include "clustalW/general/userparams.h" #include "clustalW/general/utils.h" #include "ClustalQtParams.h" #include "FileDialog.h" AlignOutputFileNames::AlignOutputFileNames(QString inputFileName, FileNameDialogType _type, AlignType _alignType, QString profileName) : type(_type), alignType(_alignType) { setAllPtrsToNull(); if(type == CompleteAlign) { title = "Complete Alignment"; } else if(type == AlignFromTree) { title = "Alignment From Guide Tree"; } else if(type == RealignSelectedSeqs) { title = "Realign Sequences"; } else if(type == RealignSeqRange) { title = "Realign Residue Range"; } else if(type == DoGuideTreeOnly) { title = "Create Tree"; } else if(type == AlignProf2ToProf1) { title = "Profile To Profile Alignment"; } else if(type == AlignSeqsToProf1) { title = "Sequences To Profile Alignment"; } else if(type == AlignProf2ToProf1WithTree) { title = "Profile Alignment From Tree"; } else if(type == AlignSeqsToProf1WithTree) { title = "Sequence To Profile Alignment From Tree"; } else { title = ""; } setWindowTitle(title); filePath = getPathFromFileName(inputFileName); if(profileName != "") { profile2Path = getPathFromFileName(profileName); } else { profile2Path = ""; } mainLayout = new QVBoxLayout; okButton = new QPushButton(tr("OK")); okButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(okButton, SIGNAL(clicked()), this, SLOT(accept())); alignOutCancelButton = new QPushButton(tr("Cancel")); alignOutCancelButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); connect(alignOutCancelButton, SIGNAL(clicked()), this, SLOT(cancel())); fileBox = new QGroupBox(); setUpLayout(); mainLayout->addWidget(topBox); mainLayout->addWidget(fileBox); setLayout(mainLayout); } QString AlignOutputFileNames::getPathFromFileName(QString fileName) { QString filePath; if(fileName.size() > 0) { std::string path = ""; clustalw::utilityObject->getPath(fileName.toStdString(), &path); if(path.size() > 0) { filePath = QString(path.c_str()); } else { filePath = QDir::currentPath() + QString(QDir::separator()) + "temp"; } } return filePath; } void AlignOutputFileNames::setUpOptional() { int row = 0; fileLayout = new QGridLayout; if(type != DoGuideTreeOnly) { outputFilesLabel = new QLabel(tr("Output Alignment Files")); fileLayout->addWidget(outputFilesLabel, row, 0); row++; if(clustalw::userParameters->getOutputClustal()) { clustalNameLabel = new QLabel(tr("Clustal: ")); clustalName = new QLineEdit(); clustalName->setText(filePath + "aln"); clustalName->setMinimumWidth(MinLineEditWidth); clustalBrowseButton = new QPushButton("Browse"); clustalBrowseButton->setAutoDefault(false); //nige connect(clustalBrowseButton, SIGNAL(clicked()), this, SLOT(browseClustal())); fileLayout->addWidget(clustalNameLabel, row, 0); fileLayout->addWidget(clustalName, row, 1); fileLayout->addWidget(clustalBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputNbrf()) { nbrfNameLabel = new QLabel(tr("NBRF/PIR: ")); nbrfName = new QLineEdit(); nbrfName->setText(filePath + "pir"); nbrfName->setMinimumWidth(MinLineEditWidth); nbrfBrowseButton = new QPushButton("Browse"); nbrfBrowseButton->setAutoDefault(false); //nige connect(nbrfBrowseButton, SIGNAL(clicked()), this, SLOT(browseNBRF())); fileLayout->addWidget(nbrfNameLabel, row, 0); fileLayout->addWidget(nbrfName, row, 1); fileLayout->addWidget(nbrfBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputGCG()) { gcgNameLabel = new QLabel(tr("GCG/MSF: ")); gcgName = new QLineEdit(); gcgName->setText(filePath + "msf"); gcgName->setMinimumWidth(MinLineEditWidth); gcgBrowseButton = new QPushButton("Browse"); gcgBrowseButton->setAutoDefault(false); //nige connect(gcgBrowseButton, SIGNAL(clicked()), this, SLOT(browseGCG())); fileLayout->addWidget(gcgNameLabel, row, 0); fileLayout->addWidget(gcgName, row, 1); fileLayout->addWidget(gcgBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputPhylip()) { phylipNameLabel = new QLabel(tr("Phylip: ")); phylipName = new QLineEdit(); phylipName->setText(filePath + "phy"); phylipName->setMinimumWidth(MinLineEditWidth); phylipBrowseButton = new QPushButton("Browse"); phylipBrowseButton->setAutoDefault(false); //nige connect(phylipBrowseButton, SIGNAL(clicked()), this, SLOT(browsePhylip())); fileLayout->addWidget(phylipNameLabel, row, 0); fileLayout->addWidget(phylipName, row, 1); fileLayout->addWidget(phylipBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputGde()) { gdeNameLabel = new QLabel(tr("GDE: ")); gdeName = new QLineEdit(); gdeName->setText(filePath + "gde"); gdeName->setMinimumWidth(MinLineEditWidth); gdeBrowseButton = new QPushButton("Browse"); gdeBrowseButton->setAutoDefault(false); //nige connect(gdeBrowseButton, SIGNAL(clicked()), this, SLOT(browseGDE())); fileLayout->addWidget(gdeNameLabel, row, 0); fileLayout->addWidget(gdeName, row, 1); fileLayout->addWidget(gdeBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputNexus()) { nexusNameLabel = new QLabel(tr("Nexus: ")); nexusName = new QLineEdit(); nexusName->setText(filePath + "nxs"); nexusName->setMinimumWidth(MinLineEditWidth); nexusBrowseButton = new QPushButton("Browse"); nexusBrowseButton->setAutoDefault(false); //nige connect(nexusBrowseButton, SIGNAL(clicked()), this, SLOT(browseNexus())); fileLayout->addWidget(nexusNameLabel, row, 0); fileLayout->addWidget(nexusName, row, 1); fileLayout->addWidget(nexusBrowseButton, row, 2); row++; } if(clustalw::userParameters->getOutputFasta() && type != RealignSeqRange) { fastaNameLabel= new QLabel(tr("Fasta: ")); fastaName = new QLineEdit(); fastaName->setText(filePath + "fasta"); fastaName->setMinimumWidth(MinLineEditWidth); fastaBrowseButton = new QPushButton("Browse"); fastaBrowseButton->setAutoDefault(false); //nige connect(fastaBrowseButton, SIGNAL(clicked()), this, SLOT(browseFasta())); fileLayout->addWidget(fastaNameLabel, row, 0); fileLayout->addWidget(fastaName, row, 1); fileLayout->addWidget(fastaBrowseButton, row, 2); row++; } if(type == RealignSeqRange) { realignSegEndGapPenaltiesLabel = new QLabel(tr("Realign Segment End Gap Penalties")); realignSegEndGapPenalties = new QComboBox; realignSegEndGapPenalties->addItem("ON"); realignSegEndGapPenalties->addItem("OFF"); fileLayout->addWidget(realignSegEndGapPenaltiesLabel, row, 0, 1, 2); fileLayout->addWidget(realignSegEndGapPenalties, row, 2, 1, 1); row++; } } okButton->setAutoDefault(true); //nige fileLayout->addWidget(okButton, row, 0); fileLayout->addWidget(alignOutCancelButton, row, 1); fileBox->setLayout(fileLayout); } void AlignOutputFileNames::setUpLayout() { setUpTopBox(); setUpOptional(); } void AlignOutputFileNames::setUpTopBox() { topBox = new QGroupBox(); topBoxLayout = new QGridLayout(); guideTreeSeqBrowseButton = new QPushButton("Browse"); guideTreeSeqBrowseButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); guideTreeSeqBrowseButton->setAutoDefault(false); //nige if(type == AlignFromTree || type == AlignProf2ToProf1WithTree || type == AlignSeqsToProf1WithTree) { guideTreeNameLabelSeq = new QLabel(tr("Input Guide Tree:")); connect(guideTreeSeqBrowseButton, SIGNAL(clicked()), this, SLOT(browseInputTreeSeq())); } else { guideTreeNameLabelSeq = new QLabel(tr("Output Guide Tree:")); connect(guideTreeSeqBrowseButton, SIGNAL(clicked()), this, SLOT(browseGuideTreeSeq())); } topBoxLayout->addWidget(guideTreeNameLabelSeq, 0, 0); guideTreeNameSeq = new QLineEdit(); guideTreeNameSeq->setMinimumWidth(MinLineEditWidth); guideTreeNameSeq->setText(filePath + "dnd"); topBoxLayout->addWidget(guideTreeNameSeq, 1, 0); topBoxLayout->addWidget(guideTreeSeqBrowseButton, 1, 1); if(type == AlignProf2ToProf1 || type == AlignProf2ToProf1WithTree) { guideTreeNameProfile2 = new QLineEdit(); guideTreeNameProfile2->setMinimumWidth(MinLineEditWidth); guideTreeNameProfile2->setText(profile2Path + "dnd"); topBoxLayout->addWidget(guideTreeNameProfile2, 2, 0); guideTreeProfile2BrowseButton = new QPushButton("Browse"); guideTreeProfile2BrowseButton->setSizePolicy(QSizePolicy::Maximum, QSizePolicy::Maximum); guideTreeProfile2BrowseButton->setAutoDefault(false); //nige if(type == AlignProf2ToProf1) { connect(guideTreeProfile2BrowseButton, SIGNAL(clicked()), this, SLOT(browseGuideTreeProfile())); } else { connect(guideTreeProfile2BrowseButton, SIGNAL(clicked()), this, SLOT(browseInputTreeProfile())); } topBoxLayout->addWidget(guideTreeProfile2BrowseButton, 2, 1); } topBox->setLayout(topBoxLayout); } void AlignOutputFileNames::browseInputTreeSeq() { browseInputTree(guideTreeNameSeq); } void AlignOutputFileNames::browseInputTreeProfile() { browseInputTree(guideTreeNameProfile2); } void AlignOutputFileNames::browseInputTree(QLineEdit* lineEditName) { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getOpenFileName(this, tr("Input Guide Tree")); if(myFile != "") { lineEditName->setText(myFile); } } void AlignOutputFileNames::browseGuideTreeSeq() { browse(guideTreeNameSeq); } void AlignOutputFileNames::browseGuideTreeProfile() { browse(guideTreeNameProfile2); } void AlignOutputFileNames::browseClustal() { browse(clustalName); } void AlignOutputFileNames::browseNBRF() { browse(nbrfName); } void AlignOutputFileNames::browseGCG() { browse(gcgName); } void AlignOutputFileNames::browseNexus() { browse(nexusName); } void AlignOutputFileNames::browseFasta() { browse(fastaName); } void AlignOutputFileNames::browsePhylip() { browse(phylipName); } void AlignOutputFileNames::browseGDE() { browse(gdeName); } void AlignOutputFileNames::accept() { outFileNames.reset(new clustalw::AlignmentFileNames); if(guideTreeNameSeq != 0) { if(guideTreeNameSeq->text() == "") { outFileNames->treeFile = filePath.toStdString() + "dnd"; } else { QString file = guideTreeNameSeq->text(); outFileNames->treeFile = file.toStdString(); } } if(guideTreeNameProfile2 != 0) { if(guideTreeNameProfile2->text() == "") { outFileNames->profile2TreeFile = profile2Path.toStdString() + "dnd"; } else { QString file = guideTreeNameProfile2->text(); outFileNames->profile2TreeFile = file.toStdString(); } } if(clustalName != 0) { if(clustalName->text() == "") { outFileNames->clustalFile = filePath.toStdString() + "aln"; } else { QString file = clustalName->text(); outFileNames->clustalFile = file.toStdString(); } } if(nbrfName != 0) { if(nbrfName->text() == "") { outFileNames->nrbfFile = filePath.toStdString() + "pir"; } else { QString file = nbrfName->text(); outFileNames->nrbfFile = file.toStdString(); } } if(gcgName != 0) { if(gcgName->text() == "") { outFileNames->gcgFile = filePath.toStdString() + "gcg"; } else { QString file = gcgName->text(); outFileNames->gcgFile = file.toStdString(); } } if(phylipName != 0) { if(phylipName->text() == "") { outFileNames->phylipFile = filePath.toStdString() + "phy"; } else { QString file = phylipName->text(); outFileNames->phylipFile = file.toStdString(); } } if(gdeName != 0) { if(gdeName->text() == "") { outFileNames->gdeFile = filePath.toStdString() + "gde"; } else { QString file = gdeName->text(); outFileNames->gdeFile = file.toStdString(); } } if(nexusName != 0) { if(nexusName->text() == "") { outFileNames->nexusFile = filePath.toStdString() + "nxs"; } else { QString file = nexusName->text(); outFileNames->nexusFile = file.toStdString(); } } if(fastaName != 0) { if(fastaName->text() == "") { outFileNames->fastaFile = filePath.toStdString() + "fasta"; } else { QString file = fastaName->text(); outFileNames->fastaFile = file.toStdString(); } } if(realignSegEndGapPenalties != 0) { if(realignSegEndGapPenalties->currentIndex() == 0) { realignEndGapPen = true; } else { realignEndGapPen = false; } } setResult(QDialog::Accepted); hide(); } void AlignOutputFileNames::cancel() { setResult(QDialog::Rejected); hide(); } void AlignOutputFileNames::browse(QLineEdit* lineEditName) { QString myFile; // Mark Jan 18th 2007: changes to remember working Dir FileDialog fd; myFile = fd.getSaveFileName(this, tr("Save as")); if(myFile != "") { lineEditName->setText(myFile); } } clustalw::AlignmentFileNames AlignOutputFileNames::getNames() { return *(outFileNames.get()); } void AlignOutputFileNames::setAllPtrsToNull() { realignSegEndGapPenalties = 0; realignSegEndGapPenaltiesLabel = 0; fastaBrowseButton = 0; fastaName = 0; fastaNameLabel = 0; nexusBrowseButton = 0; nexusName = 0; nexusNameLabel = 0; gdeBrowseButton = 0; gdeName = 0; gdeNameLabel = 0; phylipBrowseButton = 0; phylipName = 0; phylipNameLabel = 0; gcgBrowseButton = 0; gcgName = 0; gcgNameLabel = 0; nbrfBrowseButton = 0; nbrfName = 0; nbrfNameLabel = 0; clustalBrowseButton = 0; clustalName = 0; clustalNameLabel = 0; outputFilesLabel = 0; fileLayout = 0; guideTreeSeqBrowseButton = 0; guideTreeNameSeq = 0; guideTreeProfile2BrowseButton = 0; guideTreeNameProfile2 = 0; guideTreeNameLabelSeq = 0; topBoxLayout = 0; topBox = 0; fileBox = 0; alignOutCancelButton = 0; okButton = 0; mainLayout = 0; } clustalx-2.1/0000755000175000017500000000000011470725217013017 5ustar ddineenddineenclustalx-2.1/COPYING.LESSER0000644000175000017500000001674311457601126015056 0ustar ddineenddineen GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. clustalx-2.1/COPYING0000644000175000017500000010451311457601157014057 0ustar ddineenddineen GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read .