cif-tools-1.0.1b/0000775000175000017500000000000014200427744013420 5ustar maartenmaartencif-tools-1.0.1b/.gitignore0000664000175000017500000000003514200427744015406 0ustar maartenmaarten.vscode/ .gdb_history build/ cif-tools-1.0.1b/CMakeLists.txt0000664000175000017500000001177714200427744016175 0ustar maartenmaarten# SPDX-License-Identifier: BSD-2-Clause # Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAcGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmake_minimum_required(VERSION 3.15) # set the project name project(cif-tools VERSION 1.0.1 LANGUAGES CXX) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(GNUInstallDirs) include(CheckFunctionExists) include(CheckIncludeFiles) include(CheckLibraryExists) include(CMakePackageConfigHelpers) include(Dart) include(FindFilesystem) include(GenerateExportHeader) set(CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) find_package(Filesystem REQUIRED) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers") elseif(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") endif() if(NOT "$ENV{CCP4}" STREQUAL "") set(CCP4 $ENV{CCP4}) list(PREPEND CMAKE_MODULE_PATH "${CCP4}/Lib") list(APPEND CMAKE_PREFIX_PATH ${CCP4}) if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_PREFIX_PATH ${CCP4}) endif() endif() if(MSVC) # make msvc standards compliant... add_compile_options(/permissive-) macro(get_WIN32_WINNT version) if (WIN32 AND CMAKE_SYSTEM_VERSION) set(ver ${CMAKE_SYSTEM_VERSION}) string(REPLACE "." "" ver ${ver}) string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver}) set(${version} "0x${ver}") endif() endmacro() get_WIN32_WINNT(ver) add_definitions(-D_WIN32_WINNT=${ver}) # On Windows, do not install in the system location if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND NOT BUILD_FOR_CCP4) message(STATUS "The library and auxiliary files will be installed in $ENV{LOCALAPPDATA}/${PROJECT_NAME}") set(CMAKE_INSTALL_PREFIX "$ENV{LOCALAPPDATA}/${PROJECT_NAME}" CACHE PATH "..." FORCE) endif() # Find out the processor type for the target if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") set(COFF_TYPE "x64") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386") set(COFF_TYPE "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ARM64") set(COFF_TYPE "arm64") else() message(FATAL_ERROR "Unsupported or unknown processor type ${CMAKE_SYSTEM_PROCESSOR}") endif() set(COFF_SPEC "--coff=${COFF_TYPE}") endif() if(UNIX AND NOT APPLE) # On Linux, install in the $HOME/.local folder by default if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) message(WARNING "The library and auxiliary files will be installed in $ENV{HOME}/.local") set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "..." FORCE) endif() endif() # Create a revision file, containing the current git version info include(VersionString) write_version_header() # Optionally use mrc to create resources find_package(Mrc) if(MRC_FOUND) option(USE_RSRC "Use mrc to create resources" ON) else() message(WARNING "Not using resources since mrc was not found") endif() if(USE_RSRC) message("Using resources compiled with ${MRC}") add_compile_definitions(USE_RSRC) endif() set (Boost_DETAILED_FAILURE_MSG ON) find_package(Boost 1.70.0 REQUIRED COMPONENTS program_options) find_package(cifpp 3.0.1 CONFIG REQUIRED HINTS $ENV{LOCALAPPDATA}/cifpp) list(APPEND programs pdb2cif cif2pdb cif-diff cif-drop cif-grep cif-merge cif-validate mmCQL) foreach(PROGRAM IN LISTS programs) add_executable(${PROGRAM} ${CMAKE_SOURCE_DIR}/src/${PROGRAM}.cpp ${CMAKE_SOURCE_DIR}/src/pr-main.cpp ${RESOURCE_FILE}) if(USE_RSRC) mrc_target_resources(${PROGRAM} ${CIFPP_SHARE_DIR}/mmcif_pdbx_v50.dic) endif() target_include_directories(${PROGRAM} PRIVATE cifpp::cifpp ${CMAKE_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}) target_link_libraries(${PROGRAM} PRIVATE cifpp::cifpp Boost::program_options) install(TARGETS ${PROGRAM} RUNTIME DESTINATION ${BIN_INSTALL_DIR}) install(FILES doc/${PROGRAM}.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) endforeach() cif-tools-1.0.1b/LICENSE0000664000175000017500000000247214200427744014432 0ustar maartenmaartenSPDX-License-Identifier: BSD-2-Clause Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.cif-tools-1.0.1b/README.md0000664000175000017500000000074014200427744014700 0ustar maartenmaartencif-tools ========= The cif-tools suite of programs are tools you can use to examine and manipulate mmCIF and PDB files. Requirements ------------ The tools are based on [libcif++](https://github.com/PDB-REDO/libcifpp) and the code is written in modern C++ so you need a compiler capable of handling C++17 code. Building -------- Make sure you install libcif++ first before building. After that, building should be as easy as typing: ``` ./configure make make install ``` cif-tools-1.0.1b/changelog0000664000175000017500000000025614200427744015275 0ustar maartenmaartenVersion 1.0.1a - Remove GNU autoconf files. Version 1.0.1 - Update version string output - Move to cmake instead of GNU autoconf and friends version 1.0.1 - Initial releasecif-tools-1.0.1b/cmake/0000775000175000017500000000000014200427744014500 5ustar maartenmaartencif-tools-1.0.1b/cmake/FindFilesystem.cmake0000664000175000017500000000432714200427744020435 0ustar maartenmaarten# Simplistic reimplementation of https://github.com/vector-of-bool/CMakeCM/blob/master/modules/FindFilesystem.cmake if(TARGET std::filesystem) return() endif() cmake_minimum_required(VERSION 3.10) include(CMakePushCheckState) include(CheckIncludeFileCXX) include(CheckCXXSourceCompiles) cmake_push_check_state() set(CMAKE_CXX_STANDARD 17) check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER) mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER) set(code [[ #include #include int main() { auto cwd = std::filesystem::current_path(); return EXIT_SUCCESS; } ]]) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 8.4.0) # >> https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed set(CXX_FILESYSTEM_NO_LINK_NEEDED 0) else() # Check a simple filesystem program without any linker flags check_cxx_source_compiles("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED) endif() if(CXX_FILESYSTEM_NO_LINK_NEEDED) set(_found 1) else() set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES}) # Add the libstdc++ flag set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs) check_cxx_source_compiles("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED) set(_found ${CXX_FILESYSTEM_STDCPPFS_NEEDED}) if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED) # Try the libc++ flag set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs) check_cxx_source_compiles("${code}" CXX_FILESYSTEM_CPPFS_NEEDED) set(_found ${CXX_FILESYSTEM_CPPFS_NEEDED}) endif() endif() if(_found) add_library(std::filesystem INTERFACE IMPORTED) set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17) if(CXX_FILESYSTEM_NO_LINK_NEEDED) # Nothing to add... elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED) set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME stdc++fs) elseif(CXX_FILESYSTEM_CPPFS_NEEDED) set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME c++fs) endif() endif() cmake_pop_check_state() set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE) if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND) message(FATAL_ERROR "Cannot run simple program using std::filesystem") endif() cif-tools-1.0.1b/cmake/GetGitRevisionDescription.cmake0000664000175000017500000002245714200427744022622 0ustar maartenmaarten# - Returns a version string from Git # # These functions force a re-configure on each git commit so that you can # trust the values of the variables in your build system. # # get_git_head_revision( [ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR]) # # Returns the refspec and sha hash of the current head revision # # git_describe( [ ...]) # # Returns the results of git describe on the source tree, and adjusting # the output so that it tests false if an error occurs. # # git_describe_working_tree( [ ...]) # # Returns the results of git describe on the working tree (--dirty option), # and adjusting the output so that it tests false if an error occurs. # # git_get_exact_tag( [ ...]) # # Returns the results of git describe --exact-match on the source tree, # and adjusting the output so that it tests false if there was no exact # matching tag. # # git_local_changes() # # Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes. # Uses the return code of "git diff-index --quiet HEAD --". # Does not regard untracked files. # # Requires CMake 2.6 or newer (uses the 'function' command) # # Original Author: # 2009-2020 Ryan Pavlik # http://academic.cleardefinition.com # # Copyright 2009-2013, Iowa State University. # Copyright 2013-2020, Ryan Pavlik # Copyright 2013-2020, Contributors # SPDX-License-Identifier: BSL-1.0 # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) if(__get_git_revision_description) return() endif() set(__get_git_revision_description YES) # We must run the following at "include" time, not at function call time, # to find the path to this module rather than the path to a calling list file get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH) # Function _git_find_closest_git_dir finds the next closest .git directory # that is part of any directory in the path defined by _start_dir. # The result is returned in the parent scope variable whose name is passed # as variable _git_dir_var. If no .git directory can be found, the # function returns an empty string via _git_dir_var. # # Example: Given a path C:/bla/foo/bar and assuming C:/bla/.git exists and # neither foo nor bar contain a file/directory .git. This wil return # C:/bla/.git # function(_git_find_closest_git_dir _start_dir _git_dir_var) set(cur_dir "${_start_dir}") set(git_dir "${_start_dir}/.git") while(NOT EXISTS "${git_dir}") # .git dir not found, search parent directories set(git_previous_parent "${cur_dir}") get_filename_component(cur_dir "${cur_dir}" DIRECTORY) if(cur_dir STREQUAL git_previous_parent) # We have reached the root directory, we are not in git set(${_git_dir_var} "" PARENT_SCOPE) return() endif() set(git_dir "${cur_dir}/.git") endwhile() set(${_git_dir_var} "${git_dir}" PARENT_SCOPE) endfunction() function(get_git_head_revision _refspecvar _hashvar) _git_find_closest_git_dir("${CMAKE_CURRENT_SOURCE_DIR}" GIT_DIR) if("${ARGN}" STREQUAL "ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR") set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR TRUE) else() set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR FALSE) endif() if(NOT "${GIT_DIR}" STREQUAL "") file(RELATIVE_PATH _relative_to_source_dir "${CMAKE_SOURCE_DIR}" "${GIT_DIR}") if("${_relative_to_source_dir}" MATCHES "[.][.]" AND NOT ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR) # We've gone above the CMake root dir. set(GIT_DIR "") endif() endif() if("${GIT_DIR}" STREQUAL "") set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE) set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE) return() endif() # Check if the current source dir is a git submodule or a worktree. # In both cases .git is a file instead of a directory. # if(NOT IS_DIRECTORY ${GIT_DIR}) # The following git command will return a non empty string that # points to the super project working tree if the current # source dir is inside a git submodule. # Otherwise the command will return an empty string. # execute_process( COMMAND "${GIT_EXECUTABLE}" rev-parse --show-superproject-working-tree WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT "${out}" STREQUAL "") # If out is empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule file(READ ${GIT_DIR} submodule) string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE ${submodule}) string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE) get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH) get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} ABSOLUTE) set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD") else() # GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree file(READ ${GIT_DIR} worktree_ref) # The .git directory contains a path to the worktree information directory # inside the parent git repo of the worktree. # string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir ${worktree_ref}) string(STRIP ${git_worktree_dir} git_worktree_dir) _git_find_closest_git_dir("${git_worktree_dir}" GIT_DIR) set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD") endif() else() set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD") endif() set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data") if(NOT EXISTS "${GIT_DATA}") file(MAKE_DIRECTORY "${GIT_DATA}") endif() if(NOT EXISTS "${HEAD_SOURCE_FILE}") return() endif() set(HEAD_FILE "${GIT_DATA}/HEAD") configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY) configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in" "${GIT_DATA}/grabRef.cmake" @ONLY) include("${GIT_DATA}/grabRef.cmake") set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE) set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE) endfunction() function(git_describe _var) if(NOT GIT_FOUND) find_package(Git QUIET) endif() get_git_head_revision(refspec hash) if(NOT GIT_FOUND) set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) return() endif() if(NOT hash) set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) return() endif() # TODO sanitize #if((${ARGN}" MATCHES "&&") OR # (ARGN MATCHES "||") OR # (ARGN MATCHES "\\;")) # message("Please report the following error to the project!") # message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}") #endif() #message(STATUS "Arguments to execute_process: ${ARGN}") execute_process( COMMAND "${GIT_EXECUTABLE}" describe --tags --always ${hash} ${ARGN} WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE res OUTPUT_VARIABLE out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT res EQUAL 0) set(out "${out}-${res}-NOTFOUND") endif() set(${_var} "${out}" PARENT_SCOPE) endfunction() function(git_describe_working_tree _var) if(NOT GIT_FOUND) find_package(Git QUIET) endif() if(NOT GIT_FOUND) set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) return() endif() execute_process( COMMAND "${GIT_EXECUTABLE}" describe --dirty ${ARGN} WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE res OUTPUT_VARIABLE out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT res EQUAL 0) set(out "${out}-${res}-NOTFOUND") endif() set(${_var} "${out}" PARENT_SCOPE) endfunction() function(git_get_exact_tag _var) git_describe(out --exact-match ${ARGN}) set(${_var} "${out}" PARENT_SCOPE) endfunction() function(git_local_changes _var) if(NOT GIT_FOUND) find_package(Git QUIET) endif() get_git_head_revision(refspec hash) if(NOT GIT_FOUND) set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) return() endif() if(NOT hash) set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) return() endif() execute_process( COMMAND "${GIT_EXECUTABLE}" diff-index --quiet HEAD -- WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE res OUTPUT_VARIABLE out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(res EQUAL 0) set(${_var} "CLEAN" PARENT_SCOPE) else() set(${_var} "DIRTY" PARENT_SCOPE) endif() endfunction() cif-tools-1.0.1b/cmake/GetGitRevisionDescription.cmake.in0000664000175000017500000000251214200427744023215 0ustar maartenmaarten# # Internal file for GetGitRevisionDescription.cmake # # Requires CMake 2.6 or newer (uses the 'function' command) # # Original Author: # 2009-2010 Ryan Pavlik # http://academic.cleardefinition.com # Iowa State University HCI Graduate Program/VRAC # # Copyright 2009-2012, Iowa State University # Copyright 2011-2015, Contributors # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) # SPDX-License-Identifier: BSL-1.0 set(HEAD_HASH) file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) if(HEAD_CONTENTS MATCHES "ref") # named branch string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") if(EXISTS "@GIT_DIR@/${HEAD_REF}") configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) else() configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY) file(READ "@GIT_DATA@/packed-refs" PACKED_REFS) if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}") set(HEAD_HASH "${CMAKE_MATCH_1}") endif() endif() else() # detached HEAD configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) endif() if(NOT HEAD_HASH) file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) string(STRIP "${HEAD_HASH}" HEAD_HASH) endif() cif-tools-1.0.1b/cmake/VersionString.cmake0000664000175000017500000000603414200427744020321 0ustar maartenmaarten# SPDX-License-Identifier: BSD-2-Clause # Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmake_minimum_required(VERSION 3.15) # Create a revision file, containing the current git version info, if any function(write_version_header) include(GetGitRevisionDescription) if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND)) git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty) if(BUILD_VERSION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?") set(BUILD_GIT_TAGREF "${CMAKE_MATCH_2}") if(CMAKE_MATCH_3) set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}*") else() set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}") endif() endif() else() set(BUILD_VERSION_STRING "no git info available") endif() include_directories(${CMAKE_BINARY_DIR} PRIVATE) string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC) if(ARGC GREATER 0) set(VAR_PREFIX "${ARGV0}") endif() file(WRITE "${CMAKE_BINARY_DIR}/revision.hpp.in" [[// Generated revision file #pragma once #include const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@"; const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@"; const char k@VAR_PREFIX@VersionGitTag[] = "@BUILD_GIT_TAGREF@"; const char k@VAR_PREFIX@BuildInfo[] = "@BUILD_VERSION_STRING@"; const char k@VAR_PREFIX@BuildDate[] = "@BUILD_DATE_TIME@"; inline void write_version_string(std::ostream &os, bool verbose) { os << k@VAR_PREFIX@ProjectName << " version " << k@VAR_PREFIX@VersionNumber << std::endl; if (verbose) { os << "build: " << k@VAR_PREFIX@BuildInfo << ' ' << k@VAR_PREFIX@BuildDate << std::endl; if (k@VAR_PREFIX@VersionGitTag[0] != 0) os << "git tag: " << k@VAR_PREFIX@VersionGitTag << std::endl; } } ]]) configure_file("${CMAKE_BINARY_DIR}/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY) endfunction() cif-tools-1.0.1b/doc/0000775000175000017500000000000014200427744014165 5ustar maartenmaartencif-tools-1.0.1b/doc/cif-diff.10000664000175000017500000000273314200427744015723 0ustar maartenmaarten.TH cif-diff 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME cif\-diff \- A tool to compare two mmCIF files .SH SYNOPSIS cif\-diff [OPTION] file1 file2 .SH DESCRIPTION The mmCIF format does not specify the order in which categories or fields should be recorded. Using this tool, you can reorder a second file in the same way as the first before comparing the content making it easier to spot differences. .SH OPTIONS .TP \fB--category\fR=arg Limit the comparison to this category, default is to show differences in all categories. This option can be specified multiple times. .TP \fB--max-diff-count\fR=nr Display at most \fBnr\fR items per category. Default is 5 items, if you specify zero the number of items will unlimited. .sp This option has no effect when the \fB--text\fR option is specified. .TP \fB--text\fR Default is to display the differences in some kind of summary. But if you prefer to use a more graphical diff: this option will use vimdiff to display the differences. .TP \fB--icase\fR Pass the \fIignore case\fR option to vimdiff. .TP \fB--iwhite\fR Pass the \fIignore white\fR option to vimdiff. .TP \fB--verbose\fR,\fB-v\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-drop\fR, \fBcif-grep\fR, \fBcif-merge\fR, \fBcif-validate\fR, \fBcif2pdb\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/cif-drop.10000664000175000017500000000162614200427744015757 0ustar maartenmaarten.TH cif-drop 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME cif\-drop \- A tool to drop columns from mmCIF files .SH SYNOPSIS cif\-drop [OPTION] file1 [file2..] .SH DESCRIPTION This tool makes it easy to drop a single column from a category in mmCIF files. .SH OPTIONS .TP \fB--output\fR=, \fB-o\fR Write output to \fI\fR, default is \fIstdout\fR. .TP \fB--column\fR=, \fB-c\fR Colunn to drop, should be of the form \fB'_category.item'\fR with the leading underscore. Can be specified multiple times. .TP \fB--verbose\fR,\fB-v\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-grep\fR, \fBcif-merge\fR, \fBcif-validate\fR, \fBcif2pdb\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/cif-grep.10000664000175000017500000000322414200427744015744 0ustar maartenmaarten.TH cif-grep 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME cif\-grep \- A tool like grep to print fields in mmCIF files that match patterns .SH SYNOPSIS cif\-grep [OPTION] pattern file1 [file2..] .SH DESCRIPTION This tool tries to work in a similar way as \fIgrep\fR by searching mmCIF files comparing the content of fields to patterns. .SH OPTIONS Multiple files can be specified to search. If a directory name if specified, all files in that directory will be searched. Use the \fI--recursive\fR flag to do a recursive search. .TP \fB--item\fR=, \fB-i\fR Limit the search to only the item specified in \fI\fR. Default is to search all items. Item should be of the form \fB'_category.item'\fR with the leading underscore. .TP \fB--quiet\fR, \fB-q\fR Print only the file names that match the pattern. .TP \fB--count\fR, \fB-c\fR Only show the number of hits. .TP \fB--invert-match\fR, \fB-v\fR Only select the fields that do \fBnot\fR match the pattern. .TP \fB--line-number\fR, \fB-n\fR Print the line numbers. .TP \fB--no-filename\fR, \fB-h\fR Do not print the filename. .TP \fB--with-filename\fR, \fB-H\fR Do print the filename. .TP \fB--files-with-matches\fR, \fB-l\fR Print only the names of the files containing matches. .TP \fB--recursive\fR, \fB-r\fR Search recursively. .TP \fB--verbose\fR,\fB-V\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-drop\fR, \fBcif-merge\fR, \fBcif-validate\fR, \fBcif2pdb\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/cif-merge.10000664000175000017500000000272514200427744016113 0ustar maartenmaarten.TH cif-merge 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME cif\-merge \- Merge the content of a modified PDB file into a mmCIF file .SH SYNOPSIS cif\-merge [OPTION] inputFile donorFile .SH DESCRIPTION Unfortunately, a lot of tools in the crystallographic area are still not capable of working with mmCIF files in a proper way, forcing you to feed them PDB files. Or, when they do support mmCIF, they write out mutilated files without lots of the original information contained in the input file. .sp To remedy this, \fIcif\-merge\fR can be used to create a new mmCIF file containing information from the processed PDB (or mmCIF) file and fill in the blanks using a donor mmCIF file. .SH OPTIONS .TP \fB--input\fR=, \fB-i\fR The file that was processed by an old tool, missing the additional data. .TP \fB--donor\fR= The donor file, containing the extra information. .TP \fB--output\fR=, \fB-o\fR Write output to the specified \fI\fR, default is to write to \fIstdout\fR. .TP \fB--dict\fR= Dictionary file containing restraints for residues in the processed file. .TP \fB--verbose\fR,\fB-v\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-drop\fR, \fBcif-grep\fR, \fBcif-validate\fR, \fBcif2pdb\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/cif-validate.10000664000175000017500000000241014200427744016574 0ustar maartenmaarten.TH cif-validate 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME cif\-validate \- Validate the content of a mmCIF file .SH SYNOPSIS cif\-validate [OPTION] file .SH DESCRIPTION The mmCIF format is not really new, but the implementation of this format has been taken up slowly. Unfornately, the knowledge of the ins and outs of the format are still poorly understood and many files written contain errors. .sp \fIcif\-validate\fR can be used to help debug the creation of mmCIF files. By default it uses the dictionary provided by \fIlibcifpp\fR (which may be updated automatically, depending on your installion). But you can also specify your own dictionary file using the \fB--dict\fR option. .SH OPTIONS .TP \fB--dict\fR= The mmCIF dictionary file to use. The default is \fImmcif_pdbx_v50\fR. .TP \fB--validate-links\fR Validate all links. Will output information about missing parent category items. .TP \fB--verbose\fR,\fB-v\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-drop\fR, \fBcif-grep\fR, \fBcif-merge\fR, \fBcif2pdb\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/cif2pdb.10000664000175000017500000000237214200427744015564 0ustar maartenmaarten.TH cif2pdb 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME cif2pdb \- Convert the contents of a mmCIF file into PDB format .SH SYNOPSIS cif2pdb [OPTION] input [output] .SH DESCRIPTION This tools attempts to write out the contents of a mmCIF file in PDB format. .sp Note that this will likely drop lots of information that cannot be represented in PDB format and might even fail completely due to too many atoms or other overruns. .sp Like most of the other \fIcif-tools\fR this file can read compressed mmCIF files directly. .SH OPTIONS Input and output files do not need the option flag. If no output file is given, the result is printed to /fIstdout\fR. .TP \fB--dict\fR= The mmCIF dictionary file to use. The default is \fImmcif_pdbx_v50\fR. .TP \fB--no-validate\fR Omit the validation of the input mmCIF file. This will force output even in case the input file contains errors. .TP \fB--verbose\fR,\fB-V\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-drop\fR, \fBcif-grep\fR, \fBcif-merge\fR, \fBcif-validate\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/mmCQL.10000664000175000017500000000312314200427744015217 0ustar maartenmaarten.TH mmCQL 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME mmCQL \- SQL-like tool to manipulate mmCIF files .SH SYNOPSIS mmCQL [OPTION] input [output] .SH DESCRIPTION The mmCIF format is a structured format and with a proper dictionary it even looks a lot like a database with relations between categories that act as tables. .sp In pipelines processing mmCIF files it is often required to simply change the content of a single field, or add some data, delete it, etc. For this the mmCQL tool offers a \fBSQL\fR like language. .sp This tool is not a complete and finished piece of software, it lack perhaps a lot of functionality. However, it is already very useful to simply lookup some value or change items based on some criteria. .SH OPTIONS .TP \fB--force\fR Write output file even if the name of the output file is the same as the input file, which will of course overwrite the input file. .TP \fB--script\fR=, \fB-f\fR Read commands from the file \fB .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-drop\fR, \fBcif-grep\fR, \fBcif-merge\fR, \fBcif2pdb\fR, \fBmmCQL\fR, \fBpdb2cif\fR. cif-tools-1.0.1b/doc/pdb2cif.10000664000175000017500000000262214200427744015562 0ustar maartenmaarten.TH pdb2cif 1 "2020-11-23" "version 1.0.1" "User Commands" .if n .ad l .nh .SH NAME pdb2cif \- Convert the content of a PDB file into mmCIF format .SH SYNOPSIS pdb2cif [OPTION] input [output] .SH DESCRIPTION This tools attempts to write out the contents of a PDB file in mmCIF format. .sp The conversion of a PDB file into mmCIF format is not trivial. The numbering schemes differ and in some cases it is even necessary to do a sequence alignment to recover correct sequence numbers. .sp When \fBCCP4\fR is installed and sourced correctly, this tool will use the information in the \fBCCP4\fR monomers library to guide the construction of a correct mmCIF file. In the absence of \fBCCP4\fR a best effort is done. .SH OPTIONS Input and output files do not need the option flag. If no output file is given, the result is printed to /fIstdout\fR. Both input and output files can be compressed. .TP \fB--dict\fR= Specify a dictionary file containing restraints for residues specific to this file. .TP \fB--validate\fR Validate the file before writing the output. .TP \fB--verbose\fR,\fB-V\fR Be more verbose, useful to diagnose validation errors. .SH AUTHOR Written by Maarten L. Hekkelman .SH "REPORTING BUGS" Report bugs at https://github.com/PDB-REDO/cif-tools/issues .SH "SEE ALSO" \fBcif-diff\fR, \fBcif-drop\fR, \fBcif-grep\fR, \fBcif-merge\fR, \fBcif-validate\fR, \fBcif2pdb\fR, \fBmmCQL\fR. cif-tools-1.0.1b/src/0000775000175000017500000000000014200427744014207 5ustar maartenmaartencif-tools-1.0.1b/src/cif-diff.cpp0000664000175000017500000003324014200427744016364 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include #include #include // #include #include #include #include #include "cif++/Cif++.hpp" #include "cif++/Cif2PDB.hpp" #include "cif++/Structure.hpp" #include "cif++/CifParser.hpp" #include "cif++/CifValidator.hpp" #include "cif++/CifUtils.hpp" namespace po = boost::program_options; namespace ba = boost::algorithm; namespace fs = std::filesystem; namespace io = boost::iostreams; namespace c = mmcif; // -------------------------------------------------------------------- class templateParser : public cif::SacParser { public: templateParser(std::istream& is) : SacParser(is) { } virtual void produceDatablock(const std::string& name) { } virtual void produceCategory(const std::string& name) { } virtual void produceRow() { } virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) { std::string tag = "_" + category + "." + item; if (find(mOrder.rbegin(), mOrder.rend(), tag) == mOrder.rend()) mOrder.push_back(tag); } std::vector mOrder; }; // -------------------------------------------------------------------- void compareCategories(cif::Category& a, cif::Category& b, size_t maxDiffCount) { using namespace std::placeholders; // set tagsA(a.fields()), tagsB(b.fields()); // // if (tagsA != tagsB) // std::cout << "Unequal number of fields" << std::endl; auto& validator = a.getValidator(); auto catValidator = validator.getValidatorForCategory(a.name()); if (catValidator == nullptr) throw std::runtime_error("missing cat validator"); typedef std::function compType; std::vector> tags; auto keys = catValidator->mKeys; std::vector keyIx; for (auto& tag: a.fields()) { auto iv = catValidator->getValidatorForItem(tag); if (iv == nullptr) throw std::runtime_error("missing item validator"); auto tv = iv->mType; if (tv == nullptr) throw std::runtime_error("missing type validator"); tags.push_back(std::make_tuple(tag, std::bind(&cif::ValidateType::compare, tv, std::placeholders::_1, std::placeholders::_2))); auto pred = [tag](const std::string& s) -> bool { return cif::iequals(tag, s) == 0; }; if (find_if(keys.begin(), keys.end(), pred) == keys.end()) keyIx.push_back(tags.size() - 1); } a.reorderByIndex(); b.reorderByIndex(); auto rowLess = [&](const cif::Row& a, const cif::Row& b) -> bool { int d = 0; for (auto kix: keyIx) { std::string tag; compType compare; tie(tag, compare) = tags[kix]; d = compare(a[tag].c_str(), b[tag].c_str()); if (d != 0) break; } return d < 0; }; // std::vector rowsA(a.begin(), a.end()), rowsB(b.begin(), b.end()); // sort(rowsA.begin(), rowsA.end(), rowLess); // sort(rowsB.begin(), rowsB.end(), rowLess); auto ai = a.begin(), bi = b.begin(); struct Diff { virtual ~Diff() {} std::string key(cif::Row r, std::vector& keys) { std::vector v; for (auto k: keys) v.push_back(r[k].as()); return "[" + ba::join(v, ", ") + "]"; } virtual void report(std::vector& keys) = 0; }; struct ExtraADiff : public Diff { cif::Row A; ExtraADiff(cif::Row r) : A(r) {} virtual void report(std::vector& keys) { std::cout << "Extra row in A with key " << key(A, keys) << std::endl; } }; struct ExtraBDiff : public Diff { cif::Row B; ExtraBDiff(cif::Row r) : B(r) {} virtual void report(std::vector& keys) { std::cout << "Extra row in B with key " << key(B, keys) << std::endl; } }; struct ValueDiff : public Diff { cif::Row A, B; std::vector missingA, missingB, different; ValueDiff(cif::Row a, cif::Row b, std::vector&& missingA, std::vector&& missingB, std::vector&& different) : A(a), B(b), missingA(move(missingA)), missingB(move(missingB)), different(move(different)) {} virtual void report(std::vector& keys) { std::cout << "Differences in rows with key " << key(A, keys) << std::endl; for (auto& item: different) { std::cout << " " << item << " (A): '" << A[item].as() << '\'' << std::endl << " " << item << " (B): '" << B[item].as() << '\'' << std::endl; } for (auto& item: missingA) { std::cout << " " << item << " (A): " << std::endl << " " << item << " (B): '" << B[item].as() << '\'' << std::endl; } for (auto& item: missingB) { std::cout << " " << item << " (A): '" << A[item].as() << '\'' << std::endl << " " << item << " (B): " << std::endl; } } }; std::vector diffs; while ((maxDiffCount == 0 or diffs.size() < maxDiffCount) and (ai != a.end() or bi != b.end())) { if (ai == a.end()) { diffs.push_back(new ExtraBDiff{ *bi++ }); continue; } if (bi == b.end()) { diffs.push_back(new ExtraADiff{ *ai++ }); continue; } cif::Row ra = *ai, rb = *bi; if (rowLess(ra, rb)) { diffs.push_back(new ExtraADiff{ *ai++ }); continue; } if (rowLess(rb, ra)) { diffs.push_back(new ExtraBDiff{ *bi++ }); continue; } std::vector missingA, missingB, different; for (auto& tt: tags) { std::string tag; compType compare; tie(tag, compare) = tt; // make it an option to compare unapplicable to empty or something const char* ta = ra[tag].c_str(); if (strcmp(ta, ".") == 0) ta = ""; const char* tb = rb[tag].c_str(); if (strcmp(tb, ".") == 0) tb = ""; if (compare(ta, tb) != 0) { if (*ta == 0) missingA.push_back(tag); else if (*tb == 0) missingB.push_back(tag); else different.push_back(tag); } } ++ai; ++bi; if (not missingA.empty() or not missingB.empty() or not different.empty()) diffs.push_back(new ValueDiff{ ra, rb, move(missingA), move(missingB), move(different) }); } if (not diffs.empty()) { std::cout << std::string(cif::get_terminal_width(), '-') << std::endl << "Differences in values for category " << a.name() << std::endl << std::endl; for (auto diff: diffs) { diff->report(keys); delete diff; } if (diffs.size() == maxDiffCount) std::cout << "..." << std::endl; std::cout << std::endl; } } void compareCifs(cif::Datablock& dbA, cif::Datablock& dbB, const cif::iset& categories, int maxDiffCount) { std::vector catA, catB; for (auto& cat: dbA) catA.push_back(cat.name()); sort(catA.begin(), catA.end()); for (auto& cat: dbB) catB.push_back(cat.name()); sort(catB.begin(), catB.end()); // loop over categories twice, to group output // First iteration is to list missing categories. std::vector missingA, missingB; auto catA_i = catA.begin(), catB_i = catB.begin(); while (catA_i != catA.end() and catB_i != catB.end()) { std::string nA = *catA_i; ba::to_lower(nA); std::string nB = *catB_i; ba::to_lower(nB); int d = nA.compare(nB); if (d > 0) { auto& cat = dbB[*catB_i++]; if (not cat.empty()) missingA.push_back(cat.name()); } else if (d < 0) { auto& cat = dbA[*catA_i++]; if (not cat.empty()) missingB.push_back(cat.name()); } else ++catA_i, ++catB_i; } while (catA_i != catA.end()) missingB.push_back(*catA_i++); while (catB_i != catB.end()) missingA.push_back(*catB_i++); if (categories.empty()) { if (not missingA.empty()) std::cout << "Categories missing in A: " << ba::join(missingA, ", ") << std::endl << std::endl; if (not missingB.empty()) std::cout << "Categories missing in B: " << ba::join(missingB, ", ") << std::endl << std::endl; } // Second loop, now compare category values catA_i = catA.begin(), catB_i = catB.begin(); while (catA_i != catA.end() and catB_i != catB.end()) { std::string nA = *catA_i; ba::to_lower(nA); std::string nB = *catB_i; ba::to_lower(nB); int d = nA.compare(nB); if (d > 0) ++catB_i; else if (d < 0) ++catA_i; else { if (categories.empty() or categories.count(nA)) compareCategories(dbA[*catA_i], dbB[*catB_i], maxDiffCount); ++catA_i; ++catB_i; } } } void compareCifsText(c::File& a, c::File& b, bool icase, bool iwhite) { // temp files for vimdiff char generated[] = "/tmp/pdb2cif-diff-B-XXXXXX.cif", original[] = "/tmp/pdb2cif-diff-A-XXXXXX.cif"; int fd[2]; if ((fd[0] = mkstemps(generated, 4)) < 0 or (fd[1] = mkstemps(original, 4)) < 0) { std::cerr << "Error creating temp files: " << strerror(errno) << std::endl; exit(1); } io::file_descriptor_sink gen(fd[0], io::close_handle), orig(fd[1], io::close_handle); { io::filtering_stream out; out.push(orig); a.data().write(out); } // Next the converted cif file { io::filtering_stream out; out.push(gen); std::vector order; a.data().getTagOrder(order); b.data().write(out, order); } std::vector nArgv = { "/usr/bin/vimdiff" }; if (icase) { nArgv.push_back("-c"); nArgv.push_back("set diffopt+=icase"); } if (iwhite) { nArgv.push_back("-c"); nArgv.push_back("set diffopt-=iwhite"); } nArgv.push_back(original); nArgv.push_back(generated); nArgv.push_back(nullptr); int pid = fork(); if (pid <= 0) { if (execv(nArgv[0], const_cast(nArgv.data())) < 0) std::cerr << "Failed to execute vimdiff" << std::endl; exit(1); } int status; waitpid(pid, &status, 0); if (WIFEXITED(status)) { unlink(generated); unlink(original); } } int pr_main(int argc, char* argv[]) { po::options_description visible_options("cif-diff options file1 file2"); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("verbose,v", "Verbose output") ("category", po::value>(), "Limit comparison to this category, default is all categories. Can be specified multiple times") ("max-diff-count", po::value(), "Maximum number of diff items per category, enter zero (0) for unlimited, default is 5") ("text", "Text based diff (using vimdiff) based on the order of the cif version") ("icase", "Ignore case (vimdiff option)") ("iwhite", "Ignore whitespace (vimdiff option)"); po::options_description hidden_options("hidden options"); hidden_options.add_options() ("input,i", po::value>(),"Input files") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 2); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or vm.count("input") == 0 or vm["input"].as>().size() != 2) { std::cerr << visible_options << std::endl; exit(1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); int maxDiffCount = 5; if (vm.count("max-diff-count")) maxDiffCount = vm["max-diff-count"].as(); cif::iset categories; if (vm.count("category")) { for (auto cs: vm["category"].as>()) { for (auto si = ba::make_split_iterator(cs, ba::token_finder(ba::is_any_of(",; "), ba::token_compress_on)); not si.eof(); ++si) { std::string cat(si->begin(), si->end()); ba::to_lower(cat); categories.insert(cat); } } } auto input = vm["input"].as>(); c::File file1{fs::path(input[0])}; c::File file2{fs::path(input[1])}; if (vm.count("text")) compareCifsText(file1, file2, vm.count("icase"), vm.count("iwhite")); else compareCifs(file1.data(), file2.data(), categories, maxDiffCount); return 0; } cif-tools-1.0.1b/src/cif-drop.cpp0000664000175000017500000001115114200427744016415 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include // #include #include #include #include "cif++/Cif++.hpp" #include "cif++/Cif2PDB.hpp" #include "cif++/Structure.hpp" #include "cif++/CifParser.hpp" #include "cif++/CifValidator.hpp" #include "cif++/CifUtils.hpp" namespace po = boost::program_options; namespace ba = boost::algorithm; namespace fs = std::filesystem; namespace io = boost::iostreams; int drop(std::istream& is, std::set& columns) { cif::File in(is); for (auto c: columns) { std::string cat, item; std::tie(cat, item) = cif::splitTagName(c); // loop over all datablocks for (auto& db: in) { auto& c = db[cat]; if (not c.empty()) c.drop(item); } } in.save(std::cout); return 0; } int pr_main(int argc, char* argv[]) { po::options_description visible_options("cif-diff options file1 file2"); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("verbose,v", "Verbose output") ("output,o", "Write output to this file, default is to the terminal (stdout)") ("column,c", po::value>(), "Column to drop, should be of the form '_category.item' with the leading underscore. Can be specified multiple times."); po::options_description hidden_options("hidden options"); hidden_options.add_options() ("input,i", po::value(), "Input file") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 2); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or vm.count("input") == 0 or vm.count("column") == 0) { std::cerr << visible_options << std::endl; exit(1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); std::set columns; for (auto cs: vm["column"].as>()) { for (auto si = ba::make_split_iterator(cs, ba::token_finder(ba::is_any_of(",; "), ba::token_compress_on)); not si.eof(); ++si) { std::string c(si->begin(), si->end()); ba::to_lower(c); columns.insert(c); } } if (cif::VERBOSE) { std::cerr << "Dropping columns:" << std::endl; for (auto c: columns) std::cerr << " " << c << std::endl; std::cerr << std::endl; } fs::path file = vm["input"].as(); std::ifstream is(file); if (not is.is_open()) { std::cerr << "Could not open input file" << std::endl; exit(1); } std::ofstream f; if (vm.count("output")) { f.open(vm["output"].as()); if (not f.is_open()) { std::cerr << "Could not open output file" << std::endl; exit(1); } std::cout.rdbuf(f.rdbuf()); } return drop(is, columns); } cif-tools-1.0.1b/src/cif-grep.cpp0000664000175000017500000002157214200427744016416 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include #include // #include #include #include #include "cif++/Cif++.hpp" #include "cif++/Structure.hpp" #include "cif++/CifParser.hpp" #include "cif++/CifUtils.hpp" namespace po = boost::program_options; namespace ba = boost::algorithm; namespace fs = std::filesystem; namespace io = boost::iostreams; class grepParser : public cif::SacParser { public: grepParser(const std::string& file, std::istream& is, const std::string& pattern, bool quiet, bool printLineNr, bool invertMatch) : SacParser(is), mFile(file), mRx(pattern), mQuiet(quiet), mLineNr(printLineNr), mInvertMatch(invertMatch) { } grepParser(const std::string& file, std::istream& is, const std::string& tag, const std::string& pattern, bool quiet, bool printLineNr, bool invertMatch) : grepParser(file, is, pattern, quiet, printLineNr, invertMatch) { std::tie(mCat, mItem) = cif::splitTagName(tag); } size_t getMatches() const { return mMatches; } virtual void produceDatablock(const std::string& name) { } virtual void produceCategory(const std::string& name) { } virtual void produceRow() { } virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) { if ((mCat.empty() or cif::iequals(category, mCat)) and (mItem.empty() or cif::iequals(item, mItem)) and std::regex_search(value, mRx) == not mInvertMatch) { ++mMatches; if (not mQuiet) { if (not mFile.empty()) std::cout << mFile << ':'; if (mLineNr) std::cout << mLineNr << ':'; std::cout << value << std::endl; } } } std::string mFile; std::string mCat, mItem; std::regex mRx; size_t mMatches = 0; bool mQuiet, mLineNr, mInvertMatch; }; size_t cifGrep(const std::string& pattern, const std::string& tag, const std::string& file, std::istream& is, bool quiet, bool printLineNr, bool invertMatch) { size_t result = 0; if (tag.empty()) { grepParser gp(file, is, pattern, quiet, printLineNr, invertMatch); gp.parseFile(); result = gp.getMatches(); } else { grepParser gp(file, is, tag, pattern, quiet, printLineNr, invertMatch); gp.parseFile(); result = gp.getMatches(); } return result; } int pr_main(int argc, char* argv[]) { po::options_description visible_options("cif-grep [option...] pattern [file ...]"); visible_options.add_options() ("item,i", po::value(), "Item tag to scan, default is all item values") ("help", "Display help message") ("version", "Print version") ("quiet,q", "Only print files matching pattern") ("count,c", "Only show number of hits") ("invert-match,v", "Select fields NOT matching the pattern") ("line-number,n", "Print line numbers") ("no-filename,h", "Don't print the filename") ("with-filename,H", "Do print the filename") ("verbose,V", "Verbose output") ("files-with-matches,l", "Print only names of files containing matches") ("recursive,r", "Search recursively"); po::options_description hidden_options("hidden options"); hidden_options.add_options() ("pattern", po::value(), "Pattern") ("input", po::value>(), "Input files") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("pattern", 1); p.add("input", -1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or vm.count("pattern") == 0) { std::cerr << visible_options << std::endl; exit(vm.count("help") ? 0 : 1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); bool quiet = vm.count("quiet") > 0; bool filenamesOnly = vm.count("files-with-matches") > 0; bool countOnly = vm.count("count") > 0; bool noFileNames = filenamesOnly == false and vm.count("no-filename") > 0; bool doFileNames = vm.count("with-filename") > 0; bool lineNumbers = vm.count("line-number") > 0; bool invertMatch = vm.count("invert-match") > 0; size_t count = 0; quiet = quiet or countOnly; std::string pattern = vm["pattern"].as(); std::string tag; if (vm.count("item")) { tag = vm["item"].as(); std::string cat, item; std::tie(cat, item) = cif::splitTagName(tag); if (cat.empty()) throw std::runtime_error("Invalid category in tag: '" + cat + '\''); if (item.empty()) throw std::runtime_error("Invalid item: '" + item + '\''); if (cif::VERBOSE) std::cerr << "matching only for category: " << cat << " and item " << item << std::endl; } size_t result = false; if (vm.count("input") == 0 and not vm.count("recursive")) { result = cifGrep(pattern, tag, "stdin", std::cin, quiet or filenamesOnly, lineNumbers, invertMatch); if (doFileNames or (filenamesOnly and result != 0)) std::cout << "stdin" << std::endl; if (countOnly) std::cout << result << std::endl; } else { std::vector files; if (vm.count("input")) files = vm["input"].as>(); if (vm.count("recursive")) { if (files.empty()) files.push_back(fs::current_path()); std::vector expanded; for (auto file: files) { if (fs::is_directory(file)) { for (auto i = fs::recursive_directory_iterator(file); i != fs::recursive_directory_iterator(); ++i) { fs::path p = i->path(); if (fs::is_regular_file(p)) expanded.push_back(p.string()); } } else expanded.push_back(file); } files = expanded; } std::vector> filesWithSizes; size_t totalSize = 0; transform(files.begin(), files.end(), back_inserter(filesWithSizes), [&totalSize](const std::string& f) -> std::tuple { size_t size = fs::file_size(f); totalSize += size; return std::make_tuple(size, f); }); if (doFileNames) noFileNames = false; else if (files.size() <= 1) noFileNames = true; for (auto file: filesWithSizes) { fs::path f; size_t size; std::tie(size, f) = file; if (not fs::is_regular_file(f)) continue; if (cif::VERBOSE) std::cerr << f << std::endl; std::ifstream infile(f, std::ios_base::in | std::ios_base::binary); if (not infile.is_open()) throw std::runtime_error("Could not open file " + f.string()); io::filtering_stream in; if (f.extension() == ".gz") in.push(io::gzip_decompressor()); in.push(infile); try { size_t r = cifGrep(pattern, tag, noFileNames ? "" : f.filename().string(), in, quiet or filenamesOnly, lineNumbers, invertMatch); count += r; if (cif::VERBOSE or (countOnly and not noFileNames)) std::cout << f << ':' << r << std::endl; if (r > 0) result = true; } catch (const std::exception& e) { std::cerr << std::endl << "exception for " << f << std::endl << " => " << e.what() << std::endl; } } } if (noFileNames and countOnly) std::cout << count << std::endl; return result ? 0 : 1; } cif-tools-1.0.1b/src/cif-merge.cpp0000664000175000017500000002000114200427744016542 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include #include // #include #include #include #include #include #include "cif++/Cif++.hpp" #include "cif++/Cif2PDB.hpp" #include "cif++/Structure.hpp" #include "cif++/CifParser.hpp" #include "cif++/CifValidator.hpp" #include "cif++/CifUtils.hpp" namespace po = boost::program_options; namespace ba = boost::algorithm; namespace fs = std::filesystem; namespace io = boost::iostreams; namespace c = mmcif; using cif::iequals; // -------------------------------------------------------------------- void updateEntryID(cif::File& target, const std::string& entryID) { auto& db = target.firstDatablock(); if (db.getName() != entryID) db.setName(entryID); for (auto r: db["entry"]) r["id"] = entryID; } void transplant(cif::File& target, cif::File& donor) { auto& dbt = target.firstDatablock(); auto& dbd = donor.firstDatablock(); for (auto c: { "struct", "struct_keywords", "audit_author", "citation", "citation_author", "diffrn", "diffrn_radiation", "diffrn_radiation_wavelength" }) { auto cd = dbd.get(c); if (cd == nullptr or cd->empty()) continue; auto ct = dbt.get(c); if (ct == nullptr) { dbt.emplace(c); ct = dbt.get(c); } ct->clear(); for (auto r: *cd) ct->emplace(r); } std::string exptlMethod; auto dExplt = dbd["exptl"].find(cif::Key("entry_id") == dbd.getName()); if (dExplt.size() != 1) throw std::runtime_error("Invalid number of exptl records in donor file, should be exactly one this version of cif-merge"); cif::tie(exptlMethod) = dExplt.front().get("method"); auto tExplt = dbt["exptl"].find(cif::Key("entry_id") == dbt.getName()); if (tExplt.empty()) { auto c = dbt.emplace("exptl"); std::get<0>(c)->emplace({ { "entry_id", dbt.getName() }, { "method", exptlMethod } }); } else tExplt.front()["method"] = exptlMethod; // create a mapping for the entity_ids in both files const std::map kSrcMap{ { "man", "entity_src_gen" }, { "nat", "entity_src_nat" }, { "syn", "pdbx_entity_src_syn" } }; std::map d2tEntityIds; auto& targetEntity = dbt["entity"]; for (auto r : targetEntity) { std::string id, type, dEntityID; cif::tie(id, type) = r.get("id", "type"); if (iequals(type, "polymer")) { auto t = dbt["entity_poly"][cif::Key("entity_id") == id]; std::string polyType, seq; cif::tie(polyType, seq) = t.get("type", "pdbx_seq_one_letter_code"); auto d = dbd["entity_poly"][cif::Key("type") == polyType and cif::Key("pdbx_seq_one_letter_code") == seq]; if (d.empty()) { if (cif::VERBOSE) std::cerr << "Cannot map entity " << id << " in target file to an entity in the donor" << std::endl; continue; } dEntityID = d["entity_id"].as(); // copy over refseq auto sr = dbd["struct_ref"][cif::Key("entity_id") == dEntityID]; if (not sr.empty()) { sr["entity_id"] = id; dbt["struct_ref"].emplace(sr); std::string refID = sr["id"].as(); for (auto r: dbd["struct_ref_seq"].find(cif::Key("ref_id") == refID)) dbt["struct_ref_seq"].emplace(r); } } else if (iequals(type, "non-polymer")) { auto t = dbt["pdbx_entity_nonpoly"][cif::Key("entity_id") == id]; std::string compID; cif::tie(compID) = t.get("comp_id"); auto d = dbd["pdbx_entity_nonpoly"][cif::Key("comp_id") == compID]; if (d.empty()) { if (cif::VERBOSE) std::cerr << "Cannot map entity " << id << " in target file to an entity in the donor" << std::endl; continue; } cif::tie(dEntityID) = d.get("entity_id"); } else if (iequals(type, "water")) { cif::tie(dEntityID) = dbd["entity"][cif::Key("type") == type].get("id"); } else if (cif::VERBOSE) std::cerr << "Unsupported entity type: " << type << std::endl; if (dEntityID.empty()) continue; std::string srcMethod, description, weight; cif::tie(srcMethod, description, weight) = dbd["entity"][cif::Key("id") == dEntityID].get("src_method", "pdbx_description", "formula_weight"); r["src_method"] = srcMethod; r["pdbx_description"] = description; r["formula_weight"] = weight; if (kSrcMap.count(srcMethod)) { std::string srcRec = kSrcMap.at(srcMethod); auto d = dbd[srcRec][cif::Key("entity_id") == dEntityID]; if (not d.empty()) { d["entity_id"] = id; dbt.emplace(srcRec); dbt[srcRec].emplace(d); } } } } int pr_main(int argc, char* argv[]) { po::options_description visible_options("cif-merge [options] inputFile donorFile "); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("verbose,v", "Verbose output") ("input,i", po::value(), "Modified PDB file") ("output,o", po::value(), "Output file, default is stdout (terminal)") ("donor", po::value(), "CIF file (or PDB ID for this file) containing the data to collect data from") ("dict", po::value(), "Dictionary file containing restraints for residues in this specific target") ; po::options_description hidden_options("hidden options"); hidden_options.add_options() ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 1); p.add("donor", 1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or vm.count("input") == 0 or vm.count("donor") == 0) { std::cerr << visible_options << std::endl; exit(1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); // Load dict, if any if (vm.count("dict")) c::CompoundFactory::instance().pushDictionary(vm["dict"].as()); // Read input file mmcif::File cf{vm["input"].as()}; // Read donor file mmcif::File df{vm["donor"].as()}; updateEntryID(cf.file(), df.data().getName()); transplant(cf.file(), df.file()); if (vm.count("output")) cf.save(vm["output"].as()); else cf.file().save(std::cout); return 0; } cif-tools-1.0.1b/src/cif-tools.hpp0000664000175000017500000000270214200427744016620 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include "revision.hpp" cif-tools-1.0.1b/src/cif-validate.cpp0000664000175000017500000000623514200427744017251 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include "cif++/Cif++.hpp" #include "cif++/CifUtils.hpp" namespace po = boost::program_options; int pr_main(int argc, char* argv[]) { po::options_description visible_options("cif-validate [option...] file"); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("dict", po::value()->default_value("mmcif_pdbx_v50"), "The mmCIF dictionary to use, can be either mmcif_ddl, mmcif_pdbx or a path to the actual dictionary file") ("validate-links", "Validate all links") ("verbose,v", "Verbose output"); po::options_description hidden_options("hidden options"); hidden_options.add_options() ("input", po::value(), "Input file") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help")) { std::cerr << visible_options << std::endl; exit(vm.count("help") ? 0 : 1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); cif::File f; f.loadDictionary(vm["dict"].as().c_str()); if (vm.count("input") == 0) f.load(std::cin); else f.load(vm["input"].as()); int result = f.isValid() ? 0 : 1; if (vm.count("validate-links")) f.validateLinks(); return result; } cif-tools-1.0.1b/src/cif2pdb.cpp0000664000175000017500000001057714200427744016236 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include // #include #include #include #include "cif++/Cif++.hpp" #include "cif++/Cif2PDB.hpp" #include "cif++/Structure.hpp" namespace po = boost::program_options; namespace fs = std::filesystem; namespace io = boost::iostreams; namespace c = mmcif; int pr_main(int argc, char* argv[]) { po::options_description visible_options("cif2pdb options input [output]"); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("verbose,v", "Verbose output") ("no-validate", "Omit validation of the mmCIF file, forcing output in case of errors") ("dict", po::value(), "The mmCIF dictionary to use, can be either mmcif_ddl, mmcif_pdbx or a path to the actual dictionary file"); po::options_description hidden_options("hidden options"); hidden_options.add_options() ("input", po::value(), "Input file") ("output,o", po::value(), "Output file, default stdout") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 1); p.add("output", 1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or vm.count("input") == 0) { std::cerr << visible_options << std::endl; exit(1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); std::string input = vm["input"].as(); std::regex pdbIdRx(R"(\d\w{3})"); fs::path file = input; // #warning "compile time PDB_DIR?" // if (not fs::exists(file) and std::regex_match(input, pdbIdRx)) // file = fs::path(PDB_DIR) / "mmCIF" / input.substr(1, 2) / (input + ".cif.gz"); cif::File f; if (vm.count("dict")) { std::string dict = vm["dict"].as(); f.loadDictionary(dict.c_str()); } else f.loadDictionary("mmcif_pdbx_v50"); f.load(file); if (not vm.count("no-validate") and not f.isValid()) { std::cerr << "This input mmCIF file is not valid"; if (not cif::VERBOSE) std::cerr << ", use the --verbose option to see what errors were found" << std::endl; exit(1); } if (vm.count("output")) { file = vm["output"].as(); std::ofstream outfile(file.c_str(), std::ios_base::out | std::ios_base::binary); io::filtering_stream out; if (file.extension() == ".gz") out.push(io::gzip_compressor()); out.push(outfile); WritePDBFile(out, f); } else WritePDBFile(std::cout, f); return 0; } cif-tools-1.0.1b/src/mmCQL.cpp0000664000175000017500000007635114200427744015700 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include #include // #include #include "cif++/Cif++.hpp" #include "cif++/Structure.hpp" #include "cif++/CifValidator.hpp" #include "cif++/CifUtils.hpp" namespace po = boost::program_options; namespace ba = boost::algorithm; namespace fs = std::filesystem; // namespace io = boost::iostreams; namespace c = mmcif; using unicode = char32_t; namespace zeep { // inlines /// \brief Append a single unicode character to an utf-8 string inline void append(std::string& s, unicode uc) { if (uc < 0x080) s += (static_cast(uc)); else if (uc < 0x0800) { char ch[2] = { static_cast(0x0c0 | (uc >> 6)), static_cast(0x080 | (uc & 0x3f)) }; s.append(ch, 2); } else if (uc < 0x00010000) { char ch[3] = { static_cast(0x0e0 | (uc >> 12)), static_cast(0x080 | ((uc >> 6) & 0x3f)), static_cast(0x080 | (uc & 0x3f)) }; s.append(ch, 3); } else { char ch[4] = { static_cast(0x0f0 | (uc >> 18)), static_cast(0x080 | ((uc >> 12) & 0x3f)), static_cast(0x080 | ((uc >> 6) & 0x3f)), static_cast(0x080 | (uc & 0x3f)) }; s.append(ch, 4); } } /// \brief remove the last unicode character from an utf-8 string inline unicode pop_last_char(std::string& s) { unicode result = 0; if (not s.empty()) { std::string::iterator ch = s.end() - 1; if ((*ch & 0x0080) == 0) { result = *ch; s.erase(ch); } else { int o = 0; do { result |= (*ch & 0x03F) << o; o += 6; --ch; } while (ch != s.begin() and (*ch & 0x0C0) == 0x080); switch (o) { case 6: result |= (*ch & 0x01F) << 6; break; case 12: result |= (*ch & 0x00F) << 12; break; case 18: result |= (*ch & 0x007) << 18; break; } s.erase(ch, s.end()); } } return result; } // this code only works if the input is valid utf-8 /// \brief return the first unicode and the advanced pointer from a string template std::tuple get_first_char(Iter ptr) { unicode result = static_cast(*ptr); ++ptr; if (result > 0x07f) { unsigned char ch[3]; if ((result & 0x0E0) == 0x0C0) { ch[0] = static_cast(*ptr); ++ptr; result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); } else if ((result & 0x0F0) == 0x0E0) { ch[0] = static_cast(*ptr); ++ptr; ch[1] = static_cast(*ptr); ++ptr; result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); } else if ((result & 0x0F8) == 0x0F0) { ch[0] = static_cast(*ptr); ++ptr; ch[1] = static_cast(*ptr); ++ptr; ch[2] = static_cast(*ptr); ++ptr; result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); } } return std::make_tuple(result, ptr); } // -------------------------------------------------------------------- inline std::string to_hex(uint32_t i) { char s[sizeof(i) * 2 + 3]; char* p = s + sizeof(s); *--p = 0; const char kHexChars[] = "0123456789abcdef"; while (i) { *--p = kHexChars[i & 0x0F]; i >>= 4; } *--p = 'x'; *--p = '0'; return p; } } // ----------------------------------------------------------------------- namespace cql { using unicode = uint32_t; using cif::iequals; class Statement; typedef std::shared_ptr StatementPtr; // ----------------------------------------------------------------------- class Statement { public: Statement(const Statement& ) = delete; Statement& operator=(const Statement&) = delete; virtual ~Statement() {} virtual void Execute() = 0; protected: Statement() {} }; // ----------------------------------------------------------------------- class StatementList : public Statement { public: StatementList() {} void Add(StatementPtr stmt) { mStatements.emplace_back(stmt); } virtual void Execute() { for (auto stmt: mStatements) stmt->Execute(); } private: std::vector mStatements; }; // ----------------------------------------------------------------------- class SelectStatement : public Statement { public: SelectStatement(cif::Category& category, bool distinct, std::vector&& items, cif::Condition&& where) : mCategory(category), mDistinct(distinct), mItems(std::move(items)), mWhere(std::move(where)) {} virtual void Execute() { std::vector fields(mItems.size()); std::unordered_set seen; std::cout << ba::join(mItems, "\t") << std::endl; for (auto r: mCategory.find(std::move(mWhere))) { transform(mItems.begin(), mItems.end(), fields.begin(), [r](auto item) { cif::detail::ItemReference ref = r[item]; return ref.as(); }); std::string line = ba::join(fields, "\t"); bool seenLine = seen.count(line); if (not mDistinct or not seenLine) std::cout << line << std::endl; if (mDistinct and not seenLine) seen.insert(line); } } private: cif::Category& mCategory; bool mDistinct; std::vector mItems; cif::Condition mWhere; }; // ----------------------------------------------------------------------- class DeleteStatement : public Statement { public: DeleteStatement(cif::Category& category, cif::Condition&& where) : mCategory(category), mWhere(std::move(where)) {} virtual void Execute() { cif::RowSet remove(mCategory); mWhere.prepare(mCategory); for (auto r: mCategory) { if (mWhere(mCategory, r)) remove.insert(remove.end(), r); } for (auto r: remove) mCategory.erase(r); std::cout << "Number of removed rows " << remove.size() << std::endl; } private: cif::Category& mCategory; cif::Condition mWhere; }; // ----------------------------------------------------------------------- class UpdateStatement : public Statement { public: UpdateStatement(cif::Category& category, std::vector>&& itemValuePairs, cif::Condition&& where) : mCategory(category), mItemValuePairs(std::move(itemValuePairs)), mWhere(std::move(where)) {} virtual void Execute() { size_t updated = 0; mWhere.prepare(mCategory); for (auto r: mCategory) { if (mWhere(mCategory, r)) { for (auto iv: mItemValuePairs) r[iv.first] = iv.second; ++updated; } } std::cout << "Number of updated rows: " << updated << std::endl; } private: cif::Category& mCategory; std::vector> mItemValuePairs; cif::Condition mWhere; }; // ----------------------------------------------------------------------- class Parser { public: Parser(cif::Datablock& db) : mDb(db) {} StatementPtr Parse(std::streambuf* is); private: enum class Token { eoln, undef, braceopen, braceclose, dot, comma, colon, semicolon, asterisk, eq_, lt_, le_, gt_, ge_, ne_, string, integer, number, ident, select, distinct, from, update, set, where, and_, or_, not_, insert, delete_, into, values, is_, null_ }; std::string Describe(Token token) { switch (token) { case Token::eoln: return ""; case Token::undef: return ""; case Token::braceopen: return "'('"; case Token::braceclose: return "')'"; case Token::dot: return "'.'"; case Token::comma: return "','"; case Token::colon: return "':'"; case Token::semicolon: return "';'"; case Token::asterisk: return "'*'"; case Token::eq_: return "'='"; case Token::lt_: return "'<'"; case Token::le_: return "'<='"; case Token::gt_: return "'>'"; case Token::ge_: return "'>='"; case Token::ne_: return "'<>'"; case Token::string: return "string"; case Token::integer: return "integer"; case Token::number: return "number"; case Token::ident: return "identifier"; case Token::select: return "SELECT"; case Token::distinct: return "DISTINCT"; case Token::from: return "FROM"; case Token::update: return "UPDATE"; case Token::set: return "SET"; case Token::where: return "WHERE"; case Token::and_: return "AND"; case Token::or_: return "OR"; case Token::not_: return "NOT"; case Token::insert: return "INSERT"; case Token::delete_: return "DELETE"; case Token::into: return "INTO"; case Token::values: return "VALUES"; case Token::is_: return "IS"; case Token::null_: return "NULL"; default: assert(false); return "unknown token"; } } uint8_t GetNextByte(); unicode GetNextUnicode(); unicode GetNextChar(); Token GetNextToken(); void Retract(); void Match(Token token); // parser rules StatementPtr ParseStatement(); StatementPtr ParseSelect(); StatementPtr ParseDelete(); StatementPtr ParseUpdate(); std::vector ParseItemList(); cif::Condition ParseWhereClause(cif::Category& cat); cif::Condition ParseNotWhereClause(cif::Category& cat); cif::Datablock& mDb; std::streambuf* mIs; Token mLookahead; std::stack mBuffer; std::string mToken; double mTokenFloat; int64_t mTokenInteger; }; // ----------------------------------------------------------------------- uint8_t Parser::GetNextByte() { int result = mIs->sbumpc(); if (result == std::streambuf::traits_type::eof()) result = 0; return static_cast(result); } unicode Parser::GetNextUnicode() { unicode result = GetNextByte(); if (result & 0x080) { uint8_t ch[3]; if ((result & 0x0E0) == 0x0C0) { ch[0] = GetNextByte(); if ((ch[0] & 0x0c0) != 0x080) throw std::runtime_error("Invalid utf-8"); result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); } else if ((result & 0x0F0) == 0x0E0) { ch[0] = GetNextByte(); ch[1] = GetNextByte(); if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080) throw std::runtime_error("Invalid utf-8"); result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); } else if ((result & 0x0F8) == 0x0F0) { ch[0] = GetNextByte(); ch[1] = GetNextByte(); ch[2] = GetNextByte(); if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080 or (ch[2] & 0x0c0) != 0x080) throw std::runtime_error("Invalid utf-8"); result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); if (result > 0x10ffff) throw std::runtime_error("invalid utf-8 character (out of range)"); } } return result; } unicode Parser::GetNextChar() { unicode result = 0; if (not mBuffer.empty()) // if buffer is not empty we already did all the validity checks { result = mBuffer.top(); mBuffer.pop(); } else { result = GetNextUnicode(); if (result >= 0x080) { if (result == 0x0ffff or result == 0x0fffe) throw std::runtime_error("character " + zeep::to_hex(result) + " is not allowed"); // surrogate support else if (result >= 0x0D800 and result <= 0x0DBFF) { unicode uc2 = GetNextChar(); if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) result = (result - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; else throw std::runtime_error("leading surrogate character without trailing surrogate character"); } else if (result >= 0x0DC00 and result <= 0x0DFFF) throw std::runtime_error("trailing surrogate character without a leading surrogate"); } } // append(mToken, result); // somehow, append refuses to inline, so we have to do it ourselves if (result < 0x080) mToken += (static_cast(result)); else if (result < 0x0800) { char ch[2] = { static_cast(0x0c0 | (result >> 6)), static_cast(0x080 | (result & 0x3f))}; mToken.append(ch, 2); } else if (result < 0x00010000) { char ch[3] = { static_cast(0x0e0 | (result >> 12)), static_cast(0x080 | ((result >> 6) & 0x3f)), static_cast(0x080 | (result & 0x3f))}; mToken.append(ch, 3); } else { char ch[4] = { static_cast(0x0f0 | (result >> 18)), static_cast(0x080 | ((result >> 12) & 0x3f)), static_cast(0x080 | ((result >> 6) & 0x3f)), static_cast(0x080 | (result & 0x3f))}; mToken.append(ch, 4); } return result; } void Parser::Retract() { assert(not mToken.empty()); mBuffer.push(zeep::pop_last_char(mToken)); } bool is_name_start_char(unicode uc) { return (uc >= L'A' and uc <= L'Z') or uc == L'_' or (uc >= L'a' and uc <= L'z') or (uc >= 0x0C0 and uc <= 0x0D6) or (uc >= 0x0D8 and uc <= 0x0F6) or (uc >= 0x0F8 and uc <= 0x02FF) or (uc >= 0x0370 and uc <= 0x037D) or (uc >= 0x037F and uc <= 0x01FFF) or (uc >= 0x0200C and uc <= 0x0200D) or (uc >= 0x02070 and uc <= 0x0218F) or (uc >= 0x02C00 and uc <= 0x02FEF) or (uc >= 0x03001 and uc <= 0x0D7FF) or (uc >= 0x0F900 and uc <= 0x0FDCF) or (uc >= 0x0FDF0 and uc <= 0x0FFFD) or (uc >= 0x010000 and uc <= 0x0EFFFF); } bool is_name_char(unicode uc) { return (uc >= '0' and uc <= '9') or uc == 0x0B7 or is_name_start_char(uc) or (uc >= 0x00300 and uc <= 0x0036F) or (uc >= 0x0203F and uc <= 0x02040); } Parser::Token Parser::GetNextToken() { enum class State { Start, Negative, Zero, NegativeZero, Number, NumberFraction, NumberExpSign, NumberExpDigit1, NumberExpDigit2, Literal, String, Escape, EscapeHex1, EscapeHex2, EscapeHex3, EscapeHex4, Less, Greater } state = State::Start; Token token = Token::undef; double fraction = 1.0, exponent = 1; bool negative = false, negativeExp = false; unicode hx; mToken.clear(); while (token == Token::undef) { unicode ch = GetNextChar(); switch (state) { case State::Start: switch (ch) { case 0: token = Token::eoln; break; case '(': token = Token::braceopen; break; case ')': token = Token::braceclose; break; // case '[': // token = Token::LeftBracket; // break; // case ']': // token = Token::RightBracket; // break; case '.': token = Token::dot; break; case ',': token = Token::comma; break; case ':': token = Token::colon; break; case ';': token = Token::semicolon; break; case '*': token = Token::asterisk; break; case '=': token = Token::eq_; break; case '<': state = State::Less; break; case '>': state = State::Greater; break; case ' ': case '\n': case '\r': case '\t': mToken.clear(); break; case '\'': mToken.pop_back(); state = State::String; break; case '-': state = State::Negative; break; default: if (ch == '0') { state = State::Zero; mTokenInteger = 0; } else if (ch >= '1' and ch <= '9') { mTokenInteger = ch - '0'; state = State::Number; } else if (is_name_start_char(ch)) state = State::Literal; else throw std::runtime_error("invalid character (" + zeep::to_hex(ch) + "/'" + (isprint(ch) ? static_cast(ch) : '.') + "') in command"); } break; case State::Less: if (ch == '=') token = Token::le_; else if (ch == '>') token = Token::ne_; else { Retract(); token = Token::lt_; } break; case State::Greater: if (ch == '=') token = Token::ge_; else { Retract(); token = Token::gt_; } break; case State::Negative: if (ch == '0') state = State::NegativeZero; else if (ch >= '1' and ch <= '9') { state = State::Number; mTokenInteger = ch - '0'; negative = true; } else throw std::runtime_error("invalid character '-' in command"); break; case State::NegativeZero: if (ch >= '0' or ch <= '9') throw std::runtime_error("invalid number in command, should not start with zero"); token = Token::number; break; case State::Zero: if (ch >= '0' or ch <= '9') throw std::runtime_error("invalid number in command, should not start with zero"); token = Token::number; break; case State::Number: if (ch >= '0' and ch <= '9') mTokenInteger = 10 * mTokenInteger + (ch - '0'); else if (ch == '.') { mTokenFloat = mTokenInteger; fraction = 0.1; state = State::NumberFraction; } else { Retract(); token = Token::integer; } break; case State::NumberFraction: if (ch >= '0' and ch <= '9') { mTokenFloat += fraction * (ch - '0'); fraction /= 10; } else if (ch == 'e' or ch == 'E') state = State::NumberExpSign; else { Retract(); token = Token::number; } break; case State::NumberExpSign: if (ch == '+') state = State::NumberExpDigit1; else if (ch == '-') { negativeExp = true; state = State::NumberExpDigit1; } else if (ch >= '0' and ch <= '9') { exponent = (ch - '0'); state = State::NumberExpDigit2; } break; case State::NumberExpDigit1: if (ch >= '0' and ch <= '9') { exponent = (ch - '0'); state = State::NumberExpDigit2; } else throw std::runtime_error("invalid floating point format in command"); break; case State::NumberExpDigit2: if (ch >= '0' and ch <= '9') exponent = 10 * exponent + (ch - '0'); else { Retract(); mTokenFloat *= pow(10, (negativeExp ? -1 : 1) * exponent); if (negative) mTokenFloat = -mTokenFloat; token = Token::number; } break; case State::Literal: if (not is_name_char(ch)) { Retract(); if (iequals(mToken, "SELECT")) token = Token::select; else if (iequals(mToken, "DISTINCT")) token = Token::distinct; else if (iequals(mToken, "FROM")) token = Token::from; else if (iequals(mToken, "UPDATE")) token = Token::update; else if (iequals(mToken, "SET")) token = Token::set; else if (iequals(mToken, "WHERE")) token = Token::where; else if (iequals(mToken, "AND")) token = Token::and_; else if (iequals(mToken, "OR")) token = Token::or_; else if (iequals(mToken, "NOT")) token = Token::not_; else if (iequals(mToken, "INSERT")) token = Token::insert; else if (iequals(mToken, "DELETE")) token = Token::delete_; else if (iequals(mToken, "INTO")) token = Token::into; else if (iequals(mToken, "VALUES")) token = Token::values; else if (iequals(mToken, "IS")) token = Token::is_; else if (iequals(mToken, "NULL")) token = Token::null_; else token = Token::ident; } break; case State::String: if (ch == '\'') { token = Token::string; mToken.pop_back(); } else if (ch == 0) throw std::runtime_error("Invalid unterminated std::string in command"); else if (ch == '\\') { state = State::Escape; mToken.pop_back(); } break; case State::Escape: switch (ch) { case '\'': case '\\': case '/': break; case 'n': mToken.back() = '\n'; break; case 't': mToken.back() = '\t'; break; case 'r': mToken.back() = '\r'; break; case 'f': mToken.back() = '\f'; break; case 'b': mToken.back() = '\b'; break; case 'u': state = State::EscapeHex1; mToken.pop_back(); break; default: throw std::runtime_error("Invalid escape sequence in command (\\" + std::string{static_cast(ch)} + ')'); } if (state == State::Escape) state = State::String; break; case State::EscapeHex1: if (ch >= 0 and ch <= '9') hx = ch - '0'; else if (ch >= 'a' and ch <= 'f') hx = 10 + ch - 'a'; else if (ch >= 'A' and ch <= 'F') hx = 10 + ch - 'A'; else throw std::runtime_error("Invalid hex sequence in command"); mToken.pop_back(); state = State::EscapeHex2; break; case State::EscapeHex2: if (ch >= 0 and ch <= '9') hx = 16 * hx + ch - '0'; else if (ch >= 'a' and ch <= 'f') hx = 16 * hx + 10 + ch - 'a'; else if (ch >= 'A' and ch <= 'F') hx = 16 * hx + 10 + ch - 'A'; else throw std::runtime_error("Invalid hex sequence in command"); mToken.pop_back(); state = State::EscapeHex3; break; case State::EscapeHex3: if (ch >= 0 and ch <= '9') hx = 16 * hx + ch - '0'; else if (ch >= 'a' and ch <= 'f') hx = 16 * hx + 10 + ch - 'a'; else if (ch >= 'A' and ch <= 'F') hx = 16 * hx + 10 + ch - 'A'; else throw std::runtime_error("Invalid hex sequence in command"); mToken.pop_back(); state = State::EscapeHex4; break; case State::EscapeHex4: if (ch >= 0 and ch <= '9') hx = 16 * hx + ch - '0'; else if (ch >= 'a' and ch <= 'f') hx = 16 * hx + 10 + ch - 'a'; else if (ch >= 'A' and ch <= 'F') hx = 16 * hx + 10 + ch - 'A'; else throw std::runtime_error("Invalid hex sequence in command"); mToken.pop_back(); zeep::append(mToken, hx); state = State::String; break; } } return token; } void Parser::Match(Token expected) { if (mLookahead != expected) throw std::runtime_error("Syntax error in command, expected " + Describe(expected) + " but found " + Describe(mLookahead) + " (" + mToken + ")"); mLookahead = GetNextToken(); } StatementPtr Parser::Parse(std::streambuf* is) { mIs = is; mLookahead = GetNextToken(); std::shared_ptr result(new StatementList()); while (mLookahead != Token::eoln) { auto stmt = ParseStatement(); result->Add(stmt); } return result; } // ----------------------------------------------------------------------- StatementPtr Parser::ParseStatement() { StatementPtr result; switch (mLookahead) { case Token::select: Match(Token::select); result = ParseSelect(); break; case Token::delete_: Match(Token::delete_); result = ParseDelete(); break; case Token::update: Match(Token::update); result = ParseUpdate(); break; default: // force error Match(Token::select); } Match(Token::semicolon); return result; } // ----------------------------------------------------------------------- StatementPtr Parser::ParseSelect() { bool distinct = false; if (mLookahead == Token::distinct) { distinct = true; Match(Token::distinct); } auto items = ParseItemList(); Match(Token::from); std::string cat = mToken; Match(Token::ident); auto category = mDb.get(cat); if (category == nullptr) throw std::runtime_error("Category " + cat + " is not defined in this file"); auto cv = category->getCatValidator(); if (cv != nullptr) { std::vector nItems; for (auto item: items) { if (item == "*") transform(cv->mItemValidators.begin(), cv->mItemValidators.end(), back_inserter(nItems), [cat](auto iv) { return iv.mTag; }); else nItems.push_back(item); } swap(items, nItems); items.erase(remove_if(items.begin(), items.end(), [category](auto item) { return not category->hasColumn(item); }), items.end()); for (auto item: items) { auto iv = cv->getValidatorForItem(item); if (iv == nullptr) throw std::runtime_error("Item " + item + " is not defined in the PDBx dictionary for category " + cat); } } if (mLookahead == Token::where) { Match(Token::where); return StatementPtr{ new SelectStatement(*category, distinct, std::move(items), ParseNotWhereClause(*category)) }; } else return StatementPtr{ new SelectStatement(*category, distinct, std::move(items), cif::All()) }; } // ----------------------------------------------------------------------- StatementPtr Parser::ParseDelete() { Match(Token::from); std::string cat = mToken; Match(Token::ident); auto category = mDb.get(cat); if (category == nullptr) throw std::runtime_error("Category " + cat + " is not defined in this file"); if (mLookahead == Token::where) { Match(Token::where); return StatementPtr{ new DeleteStatement(*category, ParseNotWhereClause(*category)) }; } else return StatementPtr{ new DeleteStatement(*category, cif::All()) }; } // ----------------------------------------------------------------------- StatementPtr Parser::ParseUpdate() { std::string cat = mToken; Match(Token::ident); auto category = mDb.get(cat); if (category == nullptr) throw std::runtime_error("Category " + cat + " is not defined in this file"); auto cv = category->getCatValidator(); Match(Token::set); std::vector> itemValuePairs; for (;;) { std::string item = mToken; Match(Token::ident); auto iv = cv ? cv->getValidatorForItem(item) : nullptr; if (cv and iv == nullptr) throw std::runtime_error("Invalid item '" + item + "' for category '" + cat + '\''); Match(Token::eq_); std::string value = mToken; switch (mLookahead) { case Token::integer: case Token::number: case Token::string: Match(mLookahead); break; default: Match(Token::string); } if (iv) iv->operator()(value); itemValuePairs.emplace_back(item, value); if (mLookahead == Token::comma) { Match(Token::comma); continue; } break; } if (mLookahead == Token::where) { Match(Token::where); return StatementPtr{ new UpdateStatement(*category, std::move(itemValuePairs), ParseNotWhereClause(*category)) }; } else return StatementPtr{ new UpdateStatement(*category, std::move(itemValuePairs), cif::All()) }; } // ----------------------------------------------------------------------- std::vector Parser::ParseItemList() { std::vector items; for (;;) { if (mLookahead == Token::asterisk) { Match(Token::asterisk); items.push_back("*"); } else { items.push_back(mToken); Match(Token::ident); } if (mLookahead == Token::comma) { Match(Token::comma); continue; } break; } return items; } // ----------------------------------------------------------------------- cif::Condition Parser::ParseNotWhereClause(cif::Category& cat) { cif::Condition result; if (mLookahead == Token::not_) { Match(Token::not_); result = cif::Not(ParseNotWhereClause(cat)); } else if (mLookahead == Token::braceopen) { Match(Token::braceopen); result = ParseNotWhereClause(cat); Match(Token::braceclose); } else { result = ParseWhereClause(cat); for (;;) { if (mLookahead == Token::and_) { Match(Token::and_); result = std::move(result) and ParseNotWhereClause(cat); continue; } if (mLookahead == Token::or_) { Match(Token::or_); result = std::move(result) or ParseNotWhereClause(cat); continue; } break; } } return result; } // ----------------------------------------------------------------------- cif::Condition Parser::ParseWhereClause(cif::Category& cat) { std::string item = mToken; Match(Token::ident); auto cv = cat.getCatValidator(); if (cv != nullptr and cv->getValidatorForItem(item) == nullptr) { throw std::runtime_error("Invalid item '" + item + "' for category '" + cat.name() + "' in where clause"); } if (mLookahead == Token::is_) { Match(mLookahead); if (mLookahead == Token::not_) { Match(mLookahead); Match(Token::null_); return cif::Key(item) != cif::Empty(); } else { Match(Token::null_); return cif::Key(item) == cif::Empty(); } } else { if (mLookahead < Token::eq_ or mLookahead > Token::ne_) Match(Token::eq_); auto oper = mLookahead; Match(mLookahead); cif::Condition c; std::string value = mToken; switch (mLookahead) { case Token::integer: case Token::number: case Token::string: Match(mLookahead); break; default: Match(Token::string); } switch (oper) { case Token::eq_: return cif::Key(item) == value; case Token::lt_: return cif::Key(item) < value; case Token::le_: return cif::Key(item) <= value; case Token::gt_: return cif::Key(item) > value; case Token::ge_: return cif::Key(item) >= value; case Token::ne_: return cif::Key(item) != value; default: throw std::logic_error("should never happen"); } } } } // ----------------------------------------------------------------------- int pr_main(int argc, char* argv[]) { po::options_description visible_options("mmCQL [options] input output"); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("verbose,v", "Verbose output") ("force", "Force writing of output file, even if it is the same as the input file") ("script,f", po::value(), "Read commands from script"); po::options_description hidden_options("hidden options"); hidden_options.add_options() ("input,i", po::value(), "Input file") ("output,o", po::value(), "Output file") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 1); p.add("output", 1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or not vm.count("input")) { std::cerr << visible_options << std::endl; exit(vm.count("help") != 0); } if (vm.count("output") and vm["output"].as() == vm["input"].as() and vm.count("force") == 0) { std::cerr << "Cowardly refusing to overwrite input file (specify --force to force overwriting)" << std::endl; exit(1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); auto input = vm["input"].as(); c::File file{fs::path(input)}; cql::Parser parser(file.data()); if (vm.count("script")) { std::ifstream cmdFile(vm["script"].as()); if (not cmdFile.is_open()) throw std::runtime_error("Failed to open command file " + vm["script"].as()); auto stmt = parser.Parse(cmdFile.rdbuf()); if (stmt) stmt->Execute(); } else { std::string cmd; while (std::getline(std::cin, cmd)) { try { std::istringstream is(cmd); auto stmt = parser.Parse(is.rdbuf()); if (stmt) stmt->Execute(); } catch(const std::exception& e) { std::cerr << e.what() << std::endl; } } } if (vm.count("output")) file.save(vm["output"].as()); return 0; } cif-tools-1.0.1b/src/pdb2cif.cpp0000664000175000017500000001137214200427744016230 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include #include // #include #include #include #include "cif++/Cif++.hpp" #include "cif++/PDB2Cif.hpp" #include "cif++/Structure.hpp" #include "cif++/Compound.hpp" namespace po = boost::program_options; namespace fs = std::filesystem; namespace io = boost::iostreams; namespace c = mmcif; int pr_main(int argc, char* argv[]) { std::string input; try { po::options_description visible_options("pdb2cif options input [output]"); visible_options.add_options() ("help,h", "Display help message") ("version", "Print version") ("verbose,v", "Verbose output") ("validate", "Validate output file before writing") ("dict", po::value(), "Dictionary file containing restraints for residues in this specific target") ; po::options_description hidden_options("hidden options"); hidden_options.add_options() ("input", po::value(), "Input file") ("output,o", po::value(), "Output file, default stdout") ("debug,d", po::value(), "Debug level (for even more verbose output)"); po::options_description cmdline_options; cmdline_options.add(visible_options).add(hidden_options); po::positional_options_description p; p.add("input", 1); p.add("output", 1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm); po::notify(vm); if (vm.count("version")) { write_version_string(std::cout, vm.count("verbose")); exit(0); } if (vm.count("help") or vm.count("input") == 0) { std::cerr << visible_options << std::endl; exit(1); } cif::VERBOSE = vm.count("verbose") != 0; if (vm.count("debug")) cif::VERBOSE = vm["debug"].as(); // Load dict, if any if (vm.count("dict")) c::CompoundFactory::instance().pushDictionary(vm["dict"].as()); input = vm["input"].as(); std::regex pdbIdRx(R"(\d\w{3})"); fs::path file = input; // #warning "compile time PDB_DIR?" // if (not fs::exists(file) and regex_match(input, pdbIdRx)) // file = fs::path(PDB_DIR) / "pdb" / input.substr(1, 2) / ("pdb" + input + ".ent.gz"); std::ifstream infile(file, std::ios_base::in | std::ios_base::binary); if (not infile.is_open()) throw std::runtime_error("Could not open file " + file.string()); io::filtering_stream in; if (file.extension() == ".gz") { in.push(io::gzip_decompressor()); file = file.stem(); } in.push(infile); cif::File f; ReadPDBFile(in, f); if (vm.count("validate") and not f.isValid()) throw std::runtime_error("The resulting mmCIF is not valid"); if (vm.count("output")) { file = vm["output"].as(); std::ofstream outfile(file, std::ios_base::out | std::ios_base::binary); io::filtering_stream out; if (file.extension() == ".gz") out.push(io::gzip_compressor()); out.push(outfile); f.save(out); } else f.save(std::cout); } catch (const std::exception& ex) { if (not input.empty()) std::cerr << "Error converting '" << input << '\'' << std::endl; throw; } return 0; } cif-tools-1.0.1b/src/pr-main.cpp0000664000175000017500000000742514200427744016266 0ustar maartenmaarten/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cif-tools.hpp" #include #include #include #include #include #include #include #include #include "cif++/Cif++.hpp" #include "cif++/CifUtils.hpp" int pr_main(int argc, char* argv[]); // -------------------------------------------------------------------- std::ostream& operator<<(std::ostream& os, const struct timeval& t) { uint64_t s = t.tv_sec; if (s > 24 * 60 * 60) { uint32_t days = s / (24 * 60 * 60); os << days << "d "; s %= 24 * 60 * 60; } if (s > 60 * 60) { uint32_t hours = s / (60 * 60); os << hours << "h "; s %= 60 * 60; } if (s > 60) { uint32_t minutes = s / 60; os << minutes << "m "; s %= 60; } double ss = s + 1e-6 * t.tv_usec; os << std::fixed << std::setprecision(1) << ss << 's'; return os; } std::ostream& operator<<(std::ostream& os, const std::chrono::duration& t) { uint64_t s = static_cast(std::trunc(t.count())); if (s > 24 * 60 * 60) { uint32_t days = s / (24 * 60 * 60); os << days << "d "; s %= 24 * 60 * 60; } if (s > 60 * 60) { uint32_t hours = s / (60 * 60); os << hours << "h "; s %= 60 * 60; } if (s > 60) { uint32_t minutes = s / 60; os << minutes << "m "; s %= 60; } double ss = s + 1e-6 * (t.count() - s); os << std::fixed << std::setprecision(1) << ss << 's'; return os; } class RUsage { public: ~RUsage() { if (cif::VERBOSE) { struct rusage u; auto end = std::chrono::system_clock::now(); std::chrono::duration diff = end - start; if (getrusage(RUSAGE_SELF, &u) == 0) std::cerr << "CPU usage: " << u.ru_utime << " user, " << u.ru_stime << " system, " << diff << " wall" << std::endl; else perror("Failed to get rusage"); } } std::chrono::time_point start = std::chrono::system_clock::now(); }; // -------------------------------------------------------------------- // recursively print exception whats: void print_what (const std::exception& e) { std::cerr << e.what() << std::endl; try { std::rethrow_if_nested(e); } catch (const std::exception& nested) { std::cerr << " >> "; print_what(nested); } } int main(int argc, char* argv[]) { int result = -1; RUsage r; try { result = pr_main(argc, argv); } catch (std::exception& ex) { print_what(ex); exit(1); } return result; } cif-tools-1.0.1b/todo.txt0000664000175000017500000000071514200427744015131 0ustar maartenmaartenTODO cif++ - validate range - validate child-parent relationships (is parent available? e.g.) - improve API PDB2CIF - implement remark 3 for all known programs - implement other remarks - implement hetero field in pdb2cif - DNA one letter code generation (X?) - hetero field in entity_poly_seq, pdbx_poly_seq_scheme - chem_comp.type? chem_comp genereren uit db? - partial charge calculation - water for peptides? cif-grep - more options for cif-grep