pax_global_header00006660000000000000000000000064124557405110014516gustar00rootroot0000000000000052 comment=a6d51b7157b5b5be6869c5c1f519b8b841bb31d7 hmat-oss-1.0.4/000077500000000000000000000000001245574051100132535ustar00rootroot00000000000000hmat-oss-1.0.4/.travis.yml000066400000000000000000000003211245574051100153600ustar00rootroot00000000000000language: cpp compiler: - gcc before_install: - sudo apt-get update -qq - sudo apt-get install -qq libblas-dev before_script: - mkdir build - cd build - cmake -DBUILD_EXAMPLES=ON .. script: make hmat-oss-1.0.4/CMake/000077500000000000000000000000001245574051100142335ustar00rootroot00000000000000hmat-oss-1.0.4/CMake/FindCBLAS.cmake000066400000000000000000000126671245574051100166760ustar00rootroot00000000000000# - Find CBLAS library # # This module finds an installed fortran library that implements the CBLAS # linear-algebra interface (see http://www.netlib.org/blas/), with CBLAS # interface. # # This module sets the following variables: # CBLAS_FOUND - set to true if a library implementing the CBLAS interface is found # CBLAS_LIBRARIES - list of libraries (using full path name) to link against to use CBLAS # CBLAS_INCLUDE_DIR - path to includes # CBLAS_INCLUDE_FILE - the file to be included to use CBLAS # SET(CBLAS_LIBRARIES) SET(CBLAS_INCLUDE_DIR) SET(CBLAS_INCLUDE_FILE) # CBLAS in Intel mkl FIND_PACKAGE(MKL) IF (MKL_FOUND AND NOT CBLAS_LIBRARIES) SET(CBLAS_LIBRARIES ${MKL_LIBRARIES}) SET(CBLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR}) SET(CBLAS_INCLUDE_FILE "mkl_cblas.h") ENDIF (MKL_FOUND AND NOT CBLAS_LIBRARIES) # Old CBLAS search SET(_verbose TRUE) INCLUDE(CheckFunctionExists) INCLUDE(CheckIncludeFile) MACRO(CHECK_ALL_LIBRARIES LIBRARIES _prefix _name _flags _list _include _search_include) # This macro checks for the existence of the combination of fortran libraries # given by _list. If the combination is found, this macro checks (using the # Check_Fortran_Function_Exists macro) whether can link against that library # combination using the name of a routine given by _name using the linker # flags given by _flags. If the combination of libraries is found and passes # the link test, LIBRARIES is set to the list of complete library paths that # have been found. Otherwise, LIBRARIES is set to FALSE. # N.B. _prefix is the prefix applied to the names of all cached variables that # are generated internally and marked advanced by this macro. SET(__list) FOREACH(_elem ${_list}) IF(__list) SET(__list "${__list} - ${_elem}") ELSE(__list) SET(__list "${_elem}") ENDIF(__list) ENDFOREACH(_elem) IF(_verbose) MESSAGE(STATUS "Checking for [${__list}]") ENDIF(_verbose) SET(_libraries_work TRUE) SET(${LIBRARIES}) SET(_combined_name) SET(_paths) FOREACH(_library ${_list}) SET(_combined_name ${_combined_name}_${_library}) # did we find all the libraries in the _list until now? # (we stop at the first unfound one) IF(_libraries_work) IF(APPLE) FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH ) ELSE(APPLE) FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH ) ENDIF(APPLE) MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY) IF(${_prefix}_${_library}_LIBRARY) GET_FILENAME_COMPONENT(_path ${${_prefix}_${_library}_LIBRARY} PATH) LIST(APPEND _paths ${_path}/../include ${_path}/../../include) ENDIF(${_prefix}_${_library}_LIBRARY) SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) SET(_libraries_work ${${_prefix}_${_library}_LIBRARY}) ENDIF(_libraries_work) ENDFOREACH(_library ${_list}) # Test include SET(_bug_search_include ${_search_include}) #CMAKE BUG!!! SHOULD NOT BE THAT IF(_bug_search_include) FIND_PATH(${_prefix}${_combined_name}_INCLUDE ${_include} ${_paths}) MARK_AS_ADVANCED(${_prefix}${_combined_name}_INCLUDE) IF(${_prefix}${_combined_name}_INCLUDE) IF (_verbose) MESSAGE(STATUS "Includes found") ENDIF (_verbose) SET(${_prefix}_INCLUDE_DIR ${${_prefix}${_combined_name}_INCLUDE}) SET(${_prefix}_INCLUDE_FILE ${_include}) ELSE(${_prefix}${_combined_name}_INCLUDE) SET(_libraries_work FALSE) ENDIF(${_prefix}${_combined_name}_INCLUDE) ELSE(_bug_search_include) SET(${_prefix}_INCLUDE_DIR) SET(${_prefix}_INCLUDE_FILE ${_include}) ENDIF(_bug_search_include) # Test this combination of libraries. IF(_libraries_work) SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS) SET(CMAKE_REQUIRED_LIBRARIES) MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS) SET(_libraries_work ${${_prefix}${_combined_name}_WORKS}) IF(_verbose AND _libraries_work) MESSAGE(STATUS "Libraries found") ENDIF(_verbose AND _libraries_work) ENDIF(_libraries_work) # Fin IF(NOT _libraries_work) SET(${LIBRARIES} NOTFOUND) ENDIF(NOT _libraries_work) ENDMACRO(CHECK_ALL_LIBRARIES) # Generic CBLAS library IF(NOT CBLAS_LIBRARIES) CHECK_ALL_LIBRARIES( CBLAS_LIBRARIES CBLAS cblas_dgemm "" "cblas" "cblas.h" TRUE ) ENDIF() # CBLAS in ATLAS library? (http://math-atlas.sourceforge.net/) IF(NOT CBLAS_LIBRARIES) CHECK_ALL_LIBRARIES( CBLAS_LIBRARIES CBLAS cblas_dgemm "" "cblas;atlas" "cblas.h" TRUE ) ENDIF() # CBLAS in BLAS library IF(NOT CBLAS_LIBRARIES) CHECK_ALL_LIBRARIES( CBLAS_LIBRARIES CBLAS cblas_dgemm "" "blas" "cblas.h" TRUE ) ENDIF() # Apple CBLAS library? IF(NOT CBLAS_LIBRARIES) CHECK_ALL_LIBRARIES( CBLAS_LIBRARIES CBLAS cblas_dgemm "" "Accelerate" "Accelerate/Accelerate.h" FALSE ) ENDIF() IF( NOT CBLAS_LIBRARIES ) CHECK_ALL_LIBRARIES( CBLAS_LIBRARIES CBLAS cblas_dgemm "" "vecLib" "vecLib/vecLib.h" FALSE ) ENDIF() include ( FindPackageHandleStandardArgs ) find_package_handle_standard_args ( CBLAS DEFAULT_MSG CBLAS_LIBRARIES ) hmat-oss-1.0.4/CMake/FindMKL.cmake000066400000000000000000000147451245574051100164740ustar00rootroot00000000000000# HMat-OSS (HMatrix library, open source software) # # Copyright (C) 2014-2015 Airbus Group SAS # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # http://github.com/jeromerobert/hmat-oss # - Find MKL installation. # Try to find MKL. The following values are defined # MKL_FOUND - True if MKL has been found in MKL # MKL_BLAS_FOUND - True if blas has been found in MKL (should be always ok) # MKL_CBLAS_FOUND - True if cblas has been found in MKL (OK for >=10.2) # MKL_LAPACKE_FOUND - True if lapacke has been found in MKL (OK for >=10.3) # MKL_BLACS_FOUND - True if blacs has been found in libraries listed in ${MKL_LIBRARIES} # MKL_SCALAPACK_FOUND - True if scalapack has been found in libraries listed in ${MKL_LIBRARIES} # MKL_DEFINITIONS - list of compilation definition -DFOO # + many HAVE_XXX... #See http://software.intel.com/sites/products/mkl/MKL_Link_Line_Advisor.html #TODO add scalapack, sequential, ilp64, ... # Allow to skip MKL detection even if MKLROOT is set in the environment. option(MKL_DETECT "Try to detect and use MKL." ON) if(MKL_DETECT) find_path(MKL_INCLUDE_DIRS NAMES mkl.h HINTS $ENV{MKLROOT}/include ${MKLROOT}/include) option(MKL_STATIC "Link with MKL statically." OFF) if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) set(MKL_ARCH "intel64") set(MKL_IL "lp64") else( CMAKE_SIZEOF_VOID_P EQUAL 8 ) set(MKL_ARCH "ia32") set(MKL_IL "c") endif( CMAKE_SIZEOF_VOID_P EQUAL 8 ) find_library(MKL_CORE_LIBRARY NAMES mkl_core.lib mkl_core libmkl_core.so HINTS $ENV{MKLROOT}/lib ${MKLROOT}/lib PATH_SUFFIXES ${MKL_ARCH}) get_filename_component(MKL_LIBRARY_DIR ${MKL_CORE_LIBRARY} PATH) include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-mkl=parallel" MKL_PARALLEL_COMPILER_FLAG) if(WIN32) if(MKL_STATIC) set(MKL_LINKER_FLAGS "mkl_intel_${MKL_IL}.lib mkl_core.lib mkl_intel_thread.lib") set(MKL_COMPILE_FLAGS "") else() set(MKL_LINKER_FLAGS "mkl_intel_${MKL_IL}_dll.lib mkl_core_dll.lib mkl_intel_thread_dll.lib") set(MKL_COMPILE_FLAGS "/Qmkl:parallel") endif() else() if(MKL_STATIC) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(MKL_LINKER_FLAGS "-Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_${MKL_IL}.a ${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group -ldl -lm") set(MKL_COMPILE_FLAGS "") else() set(MKL_LINKER_FLAGS "-Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_${MKL_IL}.a ${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_intel_thread.a -Wl,--end-group") set(MKL_COMPILE_FLAGS "") endif() else() if(MKL_PARALLEL_COMPILER_FLAG) set(MKL_LINKER_FLAGS "-mkl=parallel") set(MKL_COMPILE_FLAGS "-mkl=parallel") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(MKL_LINKER_FLAGS "-L${MKL_LIBRARY_DIR} -lmkl_intel_${MKL_IL} -lmkl_core -lmkl_gnu_thread") set(MKL_COMPILE_FLAGS "") endif() endif() endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(MKL DEFAULT_MSG MKL_INCLUDE_DIRS) set(MKL_CBLAS_FOUND FALSE) set(MKL_BLAS_FOUND FALSE) set(MKL_LAPACKE_FOUND FALSE) set(MKL_BLACS_FOUND FALSE) set(MKL_SCALAPACK_FOUND FALSE) endif(MKL_DETECT) if (MKL_FOUND) include(CMakePushCheckState) cmake_push_check_state() set(CMAKE_REQUIRED_FLAGS ${MKL_COMPILE_FLAGS}) set(CMAKE_REQUIRED_INCLUDES ${MKL_INCLUDE_DIRS}) # We want MKL link flags to be added at the end of the command line # else static compilation will fail, so we use CMAKE_REQUIRED_LIBRARIES # instead of CMAKE_REQUIRED_FLAGS list(APPEND CMAKE_REQUIRED_LIBRARIES ${MKL_LINKER_FLAGS}) include(CheckIncludeFile) check_include_file("mkl_cblas.h" HAVE_MKL_CBLAS_H) check_include_file("mkl_lapacke.h" HAVE_MKL_LAPACKE_H) check_include_file("mkl.h" HAVE_MKL_H) check_include_file("mkl_service.h" HAVE_MKL_SERVICE_H) check_include_file("mkl_blas.h" HAVE_MKL_BLAS_H) check_include_file("mkl_types.h" HAVE_MKL_TYPES_H) include(CheckFunctionExists) check_function_exists("MKLGetVersion" HAVE_MKLGETVERSION) check_function_exists("dgemm" HAVE_DGEMM) check_function_exists("cblas_dgemm" HAVE_CBLAS_DGEMM) check_function_exists("LAPACKE_zgesdd" HAVE_LAPACKE_ZGESDD) check_function_exists("LAPACKE_zgeqrf" HAVE_LAPACKE_ZGEQRF) check_function_exists("zgemm3m" HAVE_ZGEMM3M) check_function_exists("mkl_set_num_threads" HAVE_MKL_SET_NUM_THREADS) check_function_exists("mkl_get_max_threads" HAVE_MKL_GET_MAX_THREADS) check_function_exists("MKL_Get_Version" HAVE_MKL_GET_VERSION) check_function_exists("mkl_simatcopy" HAVE_MKL_IMATCOPY) get_property(_LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES) if (_LANGUAGES_ MATCHES Fortran) include(CheckFortranFunctionExists) check_fortran_function_exists("psgesv" HAVE_PSGESV) check_fortran_function_exists("dgesd2d" HAVE_DGESD2D) endif () cmake_pop_check_state() if (HAVE_MKL_CBLAS_H AND HAVE_CBLAS_DGEMM) set(MKL_CBLAS_FOUND TRUE) endif() if (HAVE_DGEMM) set(MKL_BLAS_FOUND TRUE) endif() if (HAVE_MKL_LAPACKE_H AND HAVE_LAPACKE_ZGESDD) set(MKL_LAPACKE_FOUND TRUE) endif() if(HAVE_DGESD2D) set(MKL_BLACS_FOUND TRUE) endif() if (HAVE_PSGESV) set(MKL_SCALAPACK_FOUND TRUE) endif() set(MKL_DEFINITIONS) foreach(arg_ HAVE_MKL_H HAVE_MKL_CBLAS_H HAVE_MKL_LAPACKE_H HAVE_MKL_SERVICE_H HAVE_MKL_BLAS_H HAVE_MKL_TYPES_H) if(${${arg_}}) list(APPEND MKL_DEFINITIONS ${arg_}) endif() endforeach(arg_ ${ARGN}) message(STATUS "${MYPACK}_DEFINITIONS = ${${MYPACK}_DEFINITIONS}") endif (MKL_FOUND) hmat-oss-1.0.4/CMake/GitVersion.cmake000066400000000000000000000032531245574051100173310ustar00rootroot00000000000000# HMat-OSS (HMatrix library, open source software) # # Copyright (C) 2014-2015 Airbus Group SAS # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # http://github.com/jeromerobert/hmat-oss # Set label_VERSION to the git version function(git_version label default_version) option(${label}_GIT_VERSION "Get the version string from git describe" ON) if(${label}_GIT_VERSION) find_package(Git) if(GIT_FOUND) execute_process(COMMAND "${GIT_EXECUTABLE}" --git-dir ${CMAKE_CURRENT_SOURCE_DIR}/.git describe --dirty=-dirty --always --tags OUTPUT_VARIABLE _GIT_DESCRIBE ERROR_QUIET) if(_GIT_DESCRIBE) string(STRIP ${_GIT_DESCRIBE} ${label}_VERSION) set(${label}_VERSION ${${label}_VERSION} PARENT_SCOPE) endif() endif() endif() if(NOT ${label}_VERSION) set(${label}_VERSION ${default_version} PARENT_SCOPE) endif() message(STATUS "Version string is ${${label}_VERSION}") endfunction() hmat-oss-1.0.4/CMake/HMATConfig.cmake.in000066400000000000000000000037711245574051100175310ustar00rootroot00000000000000# HMat-OSS (HMatrix library, open source software) # # Copyright (C) 2014-2015 Airbus Group SAS # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # http://github.com/jeromerobert/hmat-oss # - Config file for the HMAT package # It defines the following variables # HMAT_INCLUDE_DIRS - include directories for HMAT # HMAT_LIBRARY_DIRS - library directories for HMAT (normally not used!) # HMAT_LIBRARIES - libraries to link against # HMAT_EXECUTABLE - the bar executable # HMAT_DEFINITIONS - List of compilation flags -DTOTO to export # defined since 2.8.3 if (NOT CMAKE_CURRENT_LIST_DIR) get_filename_component (CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) endif () # Tell the user project where to find our headers and libraries set(HMAT_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/@RELATIVE_HMAT_INCLUDE_DIRS@;@MPI_INCLUDE_PATH@") set(HMAT_LIBRARY_DIRS "${CMAKE_CURRENT_LIST_DIR}/@RELATIVE_HMAT_LIB_DIR@") # Allows loading HMAT settings from another project set(HMAT_CONFIG_FILE "${CMAKE_CURRENT_LIST_FILE}") # List of compilation flags -DTOTO to export set(HMAT_DEFINITIONS "@HMAT_DEFINITIONS@") # Our library dependencies (contains definitions for IMPORTED targets) include("${CMAKE_CURRENT_LIST_DIR}/HMATLibraryDepends.cmake") # These are IMPORTED targets created by HMATLibraryDepends.cmake set(HMAT_LIBRARIES "@CMAKE_PROJECT_NAME@") hmat-oss-1.0.4/CMake/HMATConfigVersion.cmake.in000066400000000000000000000023271245574051100210730ustar00rootroot00000000000000# HMat-OSS (HMatrix library, open source software) # # Copyright (C) 2014-2015 Airbus Group SAS # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # http://github.com/jeromerobert/hmat-oss set(PACKAGE_VERSION "@HMAT_VERSION@") # Check whether the requested PACKAGE_FIND_VERSION is compatible if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif() hmat-oss-1.0.4/CMake/config.h.in000066400000000000000000000033471245574051100162650ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "hmat/config.h" /** HMAT Configuration header file prototype for CMake */ #cmakedefine HAVE_LIBRT /* Define to 1 if you have the header file. */ #cmakedefine HAVE_STDINT_H /* Define to 1 if you have the header file. */ #cmakedefine HAVE_UNISTD_H /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #cmakedefine HAVE_TIME_H /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_RESOURCE_H #cmakedefine HAVE_ZGEMM3M #cmakedefine HAVE_MKL_H #cmakedefine HAVE_MKL_IMATCOPY /* Define to 1 if you have the header file. */ #cmakedefine HAVE_OMP_H #cmakedefine HAVE_GOTO_GET_NUM_PROCS #cmakedefine HAVE_OPENBLAS_SET_NUM_THREADS #if defined(HAVE_GOTO_GET_NUM_PROCS) && defined(HAVE_OPENBLAS_SET_NUM_THREADS) #define OPENBLAS_DISABLE_THREADS #endif hmat-oss-1.0.4/CMake/hmat-config.h.in000066400000000000000000000016741245574051100172150ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #pragma once #ifndef __HMAT_CONFIG_H #define __HMAT_CONFIG_H #define HMAT_VERSION "@HMAT_VERSION@" #endif hmat-oss-1.0.4/CMakeLists.txt000066400000000000000000000334111245574051100160150ustar00rootroot00000000000000# HMat-OSS (HMatrix library, open source software) # # Copyright (C) 2014-2015 Airbus Group SAS # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # http://github.com/jeromerobert/hmat-oss cmake_minimum_required(VERSION 2.8) # Set CMAKE_BUILD_TYPE to Release by default. # Must be done before calling project() if(CMAKE_BUILD_TYPE MATCHES "^CMAKE_BUILD_TYPE$") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" Release Debug RelWithDebInfo MinSizeRel) endif() # Set BUILD_SHARED_LIBS to ON by default. # Must be done before calling project() if(BUILD_SHARED_LIBS MATCHES "^BUILD_SHARED_LIBS$") if(NOT WIN32) # __declspec(dllexport) are missing in hmat so it's currently not possible # to build it as a shared library on win32 set(BUILD_SHARED_LIBS "ON" CACHE BOOL "Build shared libraries." FORCE) endif() endif() project(hmat-oss C CXX) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMake) if(NOT HMAT_VERSION) include(GitVersion) git_version(HMAT 1.0.4) endif() set(HMAT_SO_VERSION 1) # Offer the user the choice of overriding the installation directories set(INSTALL_LIB_DIR lib${LIB_SUFFIX} CACHE PATH "Installation directory for libraries") set(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") set(INSTALL_INCLUDE_DIR include CACHE PATH "Installation directory for header files") set(INSTALL_DATA_DIR share/hmat CACHE PATH "Installation directory for data files") set(INSTALL_CMAKE_DIR ${INSTALL_LIB_DIR}/cmake/hmat CACHE PATH "Installation directory for cmake config files") # Make relative paths absolute (needed later on) foreach(p LIB BIN INCLUDE DATA CMAKE) set(var INSTALL_${p}_DIR) set(RELATIVE_INSTALL_${p}_DIR ${INSTALL_${p}_DIR}) if(NOT IS_ABSOLUTE "${${var}}") set(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") endif() endforeach() # CMake 2.x does not define MSVC when using Intel compiler on Windows, see # http://public.kitware.com/Bug/view.php?id=14476 if ( WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel" ) set(WINTEL TRUE) set(CMAKE_C_FLAGS "/Qstd=c99 ${CMAKE_C_FLAGS}") endif() # ======================== # C # ======================== include(CheckIncludeFile) check_include_file("stdint.h" HAVE_STDINT_H) check_include_file("sys/types.h" HAVE_SYS_TYPES_H) check_include_file("time.h" HAVE_TIME_H) check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) check_include_file("unistd.h" HAVE_UNISTD_H) include_directories(${PROJECT_SOURCE_DIR}/include) # Enable gcc vectorization function(opt_flag flag) if(CMAKE_VERSION VERSION_GREATER 2.8.12) include(CheckCCompilerFlag) string(MAKE_C_IDENTIFIER ${flag} label) check_c_compiler_flag(${flag} ${label}) if(${label}) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${flag}" PARENT_SCOPE) set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${flag}" PARENT_SCOPE) endif() endif() endfunction() opt_flag("-ffast-math") opt_flag("-funsafe-math-optimizations") # ======================== # SYSTEM & EXTERNAL LIBS # ======================== set(EXTRA_LIBS "") include(CheckLibraryExists) check_library_exists("m" sqrt "" HAVE_LIBM) if(HAVE_LIBM) set(M_LIBRARY m) endif() check_library_exists("rt" clock_gettime "" HAVE_LIBRT) if(HAVE_LIBRT) set(RT_LIBRARY rt) endif() find_package(OpenMP) if(OPENMP_FOUND) set(HAVE_OMP_H TRUE) set(CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") set(CMAKE_C_FLAGS "${OpenMP_C_FLAGS} ${CMAKE_C_FLAGS}") set(CMAKE_SHARED_LIBRARY_CXX_FLAGS "${OpenMP_CXX_FLAGS} ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}") endif() # MKL find_package(MKL) if(MKL_FOUND) include_directories(${MKL_INCLUDE_DIRS}) set(CMAKE_C_FLAGS "${MKL_COMPILER_FLAGS} ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "${MKL_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}") endif() option(USE_DEBIAN_OPENBLAS "On Debian, link to openblas instead of generic blas." ON) # BLAS/LAPACK if (NOT MKL_FOUND) if(USE_DEBIAN_OPENBLAS) get_filename_component(real_blas_path "/usr/lib/libblas.so" REALPATH) string(REGEX MATCH "/usr/lib/openblas-base/libblas.so" is_debian_openblas ${real_blas_path}) if(is_debian_openblas) set(BLAS_FOUND ON) set(BLAS_LIBRARIES openblas) set(CBLAS_INCLUDE_DIRS /usr/include/openblas) include_directories(${CBLAS_INCLUDE_DIRS}) set(LAPACK_LIBRARIES openblas) unset(BLAS_LINKER_FLAGS) endif() endif() if(NOT is_debian_openblas) # workaround for cmake issue #0009976 if (CMAKE_VERSION VERSION_LESS 2.8.4) enable_language(Fortran) endif () find_package(BLAS REQUIRED) find_package(LAPACK REQUIRED) endif() message(STATUS "BLAS_FOUND = ${BLAS_FOUND}") message(STATUS "BLAS_LINKER_FLAGS = ${BLAS_LINKER_FLAGS}") message(STATUS "BLAS_LIBRARIES = ${BLAS_LIBRARIES}") message(STATUS "LAPACK_LIBRARIES = ${LAPACK_LIBRARIES}") link_directories(${BLAS_LIBRARY_DIRS}) include_directories(${BLAS_INCLUDE_DIRS}) # MKL_INCLUDE_DIRS is written into HMATConfig.cmake, fake it set(MKL_INCLUDE_DIRS "${BLAS_INCLUDE_DIRS}") endif() # CBLAS include(CheckFunctionExists) if (NOT MKL_CBLAS_FOUND) if (CMAKE_VERSION VERSION_GREATER 2.8.8) include(CMakePushCheckState) cmake_push_check_state() endif () set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) check_function_exists("openblas_set_num_threads" HAVE_OPENBLAS_SET_NUM_THREADS) check_function_exists("goto_get_num_procs" HAVE_GOTO_GET_NUM_PROCS) check_function_exists("cblas_dgemm" CHECK_FUNCTION_EXISTS_CBLAS_DGEMM) if (CMAKE_VERSION VERSION_GREATER 2.8.8) cmake_pop_check_state() endif () endif() if ((NOT MKL_CBLAS_FOUND) AND (NOT is_debian_openblas)) # Functions may already be available via MKL or BLAS, but we need cblas.h if (CHECK_FUNCTION_EXISTS_CBLAS_DGEMM) find_path(CBLAS_INCLUDE_DIRS NAMES cblas.h DOC "CBLAS include directory") if(CBLAS_INCLUDE_DIRS) set(CMAKE_REQUIRED_INCLUDES ${CBLAS_INCLUDE_DIRS}) include_directories(${CBLAS_INCLUDE_DIRS}) check_include_file("cblas.h" HAVE_CBLAS_H) if (NOT HAVE_CBLAS_H) message(FATAL_ERROR "cblas.h not found") endif() else(CBLAS_INCLUDE_DIRS) message(FATAL_ERROR "cblas.h not found") endif(CBLAS_INCLUDE_DIRS) else (CHECK_FUNCTION_EXISTS_CBLAS_DGEMM) find_package(CBLAS REQUIRED) endif() endif () # ======================== # Configuration file # ======================== configure_file("${PROJECT_SOURCE_DIR}/CMake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/config.h") configure_file("${PROJECT_SOURCE_DIR}/CMake/hmat-config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/hmat/config.h") include_directories(${CMAKE_CURRENT_BINARY_DIR}) # ============================ # Generation des bibliotheques # ============================ # Sources file(GLOB_RECURSE HMAT_SOURCES RELATIVE ${PROJECT_SOURCE_DIR} src/*.cpp) list(REMOVE_ITEM HMAT_SOURCES src/hmat_cpp_interface.cpp) link_directories(${PROJECT_BINARY_DIR} ${BLAS_LIBRARY_DIRS}) add_definitions(-D_GNU_SOURCE) # We want to enforce assertions even in Release mode add_definitions(-UNDEBUG) if (MSVC) add_definitions(-D__func__=__FUNCTION__) add_definitions(-D_CRT_SECURE_NO_WARNINGS) endif () # lib HMAT if(HMAT_OSS_STATIC AND BUILD_SHARED_LIBS) add_library(${PROJECT_NAME} STATIC ${HMAT_SOURCES}) if(BUILD_SHARED_LIBS) set_target_properties(${PROJECT_NAME} PROPERTIES COMPILE_FLAGS -fPIC) endif() else() add_library(${PROJECT_NAME} ${HMAT_SOURCES}) install(TARGETS ${PROJECT_NAME} EXPORT HMATLibraryDepends RUNTIME DESTINATION "${RELATIVE_INSTALL_BIN_DIR}" COMPONENT Runtime LIBRARY DESTINATION "${RELATIVE_INSTALL_LIB_DIR}" COMPONENT Runtime ARCHIVE DESTINATION "${RELATIVE_INSTALL_LIB_DIR}" COMPONENT Development ) endif() set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS "${LAPACK_LINKER_FLAGS} ${BLAS_LINKER_FLAGS} ${MPI_LINK_FLAGS}") if(NOT HMAT_NO_VERSION) set_target_properties(${PROJECT_NAME} PROPERTIES VERSION ${HMAT_VERSION} SOVERSION ${HMAT_SO_VERSION}) endif() set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_NAME_DIR ${INSTALL_LIB_DIR} ) # LINK_PRIVATE and LINK_PUBLIC keywords were introduced in 2.8.7 if(BUILD_SHARED_LIBS AND CMAKE_VERSION VERSION_GREATER 2.8.6) # Programs using hmat do not need to link directly with second level dependencies so we strip them all set(_LINK_PRIVATE LINK_PRIVATE) set(_LINK_PUBLIC LINK_PUBLIC) endif() target_link_libraries(${PROJECT_NAME} ${_LINK_PRIVATE} ${CBLAS_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${MPI_C_LIBRARIES} ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES}) if(MKL_FOUND) # The start-group/end-group flags must be at the end of the # link command line, so we must use target_link_libraries not, CMAKE_SHARED_XXX_FLAGS target_link_libraries(${PROJECT_NAME} ${_LINK_PRIVATE} ${MKL_LINKER_FLAGS}) endif() if (HAVE_LIBRT) # rt is needed by my_chrono.hpp target_link_libraries(${PROJECT_NAME} ${_LINK_PUBLIC} ${RT_LIBRARY}) endif () if (HAVE_LIBM) target_link_libraries(${PROJECT_NAME} ${_LINK_PUBLIC} ${M_LIBRARY}) endif() # Examples include_directories(${PROJECT_SOURCE_DIR}/src) # Install examples with RPATH list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${INSTALL_LIB_DIR}" isSystemDir) if("${isSystemDir}" STREQUAL "-1") set(CMAKE_INSTALL_RPATH "${INSTALL_LIB_DIR}") endif("${isSystemDir}" STREQUAL "-1") option(BUILD_EXAMPLES "build examples" OFF) macro(hmat_add_example name source) if (BUILD_EXAMPLES) if (MSVC OR WINTEL) set_source_files_properties("examples/${source}" PROPERTIES COMPILE_FLAGS "-D_USE_MATH_DEFINES") endif () if (MSVC) # No complex.h on MSVC, compile with C++ set_source_files_properties("examples/${source}" PROPERTIES LANGUAGE CXX) endif () add_executable(${HMAT_PREFIX_EXAMPLE}${name} ${PROJECT_SOURCE_DIR}/examples/${source}) target_link_libraries(${HMAT_PREFIX_EXAMPLE}${name} ${PROJECT_NAME}) install(TARGETS ${HMAT_PREFIX_EXAMPLE}${name} DESTINATION "${RELATIVE_INSTALL_BIN_DIR}/examples" COMPONENT Runtime) endif () endmacro() hmat_add_example(kriging kriging.cpp) hmat_add_example(cylinder cylinder.cpp) hmat_add_example(c-cylinder c-cylinder.c) hmat_add_example(c-simple-cylinder c-simple-cylinder.c) hmat_add_example(c-simple-kriging c-simple-kriging.c) install(DIRECTORY include/hmat DESTINATION "${INSTALL_INCLUDE_DIR}" COMPONENT Development) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hmat DESTINATION "${INSTALL_INCLUDE_DIR}" COMPONENT Development) set(CXX_HMAT_HEADERS "full_matrix;hmat_cpp_interface;compression;h_matrix;" "default_engine;cluster_tree;tree;interaction;data_types") foreach(header ${CXX_HMAT_HEADERS}) set(HMAT_HEADERS src/${header}.hpp;${HMAT_HEADERS}) endforeach() install(FILES ${HMAT_HEADERS} DESTINATION "${INSTALL_INCLUDE_DIR}" COMPONENT Development) if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) # # BUILD-TREE # # Add all targets to the build-tree export set export(TARGETS ${PROJECT_NAME} FILE "${PROJECT_BINARY_DIR}/HMATLibraryDepends.cmake") # Create a HMATConfig.cmake file for use from the build tree set(HMAT_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/include" "${PROJECT_BINARY_DIR}") set(HMAT_LIB_DIR "${PROJECT_BINARY_DIR}") set(HMAT_CMAKE_DIR "${PROJECT_BINARY_DIR}") set(HMAT_DEFINITIONS ${CBLAS_DEFINITIONS};${MKL_DEFINITIONS}) configure_file(CMake/HMATConfig.cmake.in "${PROJECT_BINARY_DIR}/HMATConfig.cmake" @ONLY) configure_file(CMake/HMATConfigVersion.cmake.in "${PROJECT_BINARY_DIR}/HMATConfigVersion.cmake" @ONLY) # # INSTALL-TREE # # Install the export set for use with the install-tree install(EXPORT HMATLibraryDepends DESTINATION "${RELATIVE_INSTALL_CMAKE_DIR}" COMPONENT Development) # Create a HMATConfig.cmake file for the use from the install tree # and install it set(HMAT_CMAKE_DIR "${INSTALL_CMAKE_DIR}") file(RELATIVE_PATH rel_include_dir "${HMAT_CMAKE_DIR}" "${INSTALL_INCLUDE_DIR}") list(APPEND RELATIVE_HMAT_INCLUDE_DIRS ${rel_include_dir}) file(RELATIVE_PATH rel_lib_dir "${HMAT_CMAKE_DIR}" "${INSTALL_LIB_DIR}") list(APPEND RELATIVE_HMAT_LIB_DIR ${rel_lib_dir}) configure_file(CMake/HMATConfig.cmake.in "${PROJECT_BINARY_DIR}/InstallFiles/HMATConfig.cmake" @ONLY) configure_file(CMake/HMATConfigVersion.cmake.in "${PROJECT_BINARY_DIR}/InstallFiles/HMATConfigVersion.cmake" @ONLY) install(FILES "${PROJECT_BINARY_DIR}/InstallFiles/HMATConfig.cmake" "${PROJECT_BINARY_DIR}/InstallFiles/HMATConfigVersion.cmake" DESTINATION "${HMAT_CMAKE_DIR}" COMPONENT Development) endif() # ======================== # final LOG # ======================== if (CMAKE_VERSION VERSION_GREATER 2.8.5) include(FeatureSummary) feature_summary(WHAT ALL) endif () hmat-oss-1.0.4/LICENSE.md000066400000000000000000000353521245574051100146670ustar00rootroot00000000000000### GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. ### Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. ### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION **0.** This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. **1.** You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. **2.** You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: : **a)** You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. : **b)** You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. : **c)** If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. **3.** You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: : **a)** Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, : **b)** Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, : **c)** Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. **4.** You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. **5.** You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. **6.** Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. **7.** If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. **8.** If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. **9.** The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. **10.** If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. **NO WARRANTY** **11.** BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. **12.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ### END OF TERMS AND CONDITIONS hmat-oss-1.0.4/README.md000066400000000000000000000002411245574051100145270ustar00rootroot00000000000000A [hierarchical matrix](http://en.wikipedia.org/wiki/Hierarchical_matrix) C/C++ library including a [LU](http://en.wikipedia.org/wiki/LU_decomposition) solver. hmat-oss-1.0.4/examples/000077500000000000000000000000001245574051100150715ustar00rootroot00000000000000hmat-oss-1.0.4/examples/c-cylinder.c000066400000000000000000000205211245574051100172660ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ // Cylinder #include #include #ifdef __cplusplus #include typedef std::complex double_complex; #define make_complex(realPart, imagPart) \ std::complex(realPart, imagPart) #else #include typedef double complex double_complex; #define make_complex(realPart, imagPart) \ realPart + imagPart * _Complex_I #endif #include "hmat/hmat.h" /** This is a simple example showing how to use the HMatrix library. In this example, we assemble and do a decomposition of a Matrix such that: \f[A_{ij} = \frac{e^{i\kappa |x_i - x_j|}}{4 \pi |x_i - x_j|}\f] with the points \f$(x_i)\f$ on a cylinder. In the real case we use 1 / r instead. */ /** Create an open cylinder point cloud. \param radius Radius of the cylinder \param step distance between two neighboring points \param n number of points \return a vector of points. */ DofCoordinate* createCylinder(double radius, double step, int n) { DofCoordinate* result = (DofCoordinate*) malloc(n * sizeof(DofCoordinate)); double length = 2 * M_PI * radius; int pointsPerCircle = length / step; double angleStep = 2 * M_PI / pointsPerCircle; int i; for (i = 0; i < n; i++) { result[i].x = radius * cos(angleStep * i); result[i].y = radius * sin(angleStep * i), result[i].z = (step * i) / pointsPerCircle; result[i].globalIndex = i; } return result; } /** Write points into file. */ void pointsToFile(DofCoordinate* points, int size, const char* filename) { int i; FILE * fp = fopen(filename, "w"); for (i = 0; i < size; i++) { fprintf(fp, "%e %e %e\n", points[i].x, points[i].y, points[i].z); } fclose(fp); } /** Define interaction between 2 degrees of freedoms (real case) */ double interaction_real(DofCoordinate* points, int i, int j) { double dx = points[i].x - points[j].x; double dy = points[i].y - points[j].y; double dz = points[i].z - points[j].z; return 1. / (sqrt(dx*dx + dy*dy + dz*dz) + 1e-10); } /** Define interaction between 2 degrees of freedoms (complex case) */ double_complex interaction_complex(DofCoordinate* points, double k, int i, int j) { double dx = points[i].x - points[j].x; double dy = points[i].y - points[j].y; double dz = points[i].z - points[j].z; double distance = sqrt(dx*dx + dy*dy + dz*dz) + 1e-10; double realPart = cos(k * distance) / (4 * M_PI * distance); double imagPart = sin(k * distance) / (4 * M_PI * distance); return make_complex(realPart, imagPart); } /** This structure contains data related to our problem. */ typedef struct { int type; int n; DofCoordinate* points; double k; } problem_data_t; /** This structure is a glue between hmat library and application. */ typedef struct { int row_start; int col_start; int* row_hmat2client; int* col_hmat2client; problem_data_t* user_context; } block_data_t; /** prepare_hmat is called by hmat library to prepare assembly of a cluster block. We allocate a block_data_t, which is then passed to the compute_hmat function. We have to store all datas needed by compute_hmat into this block_data_t structure. */ void prepare_hmat(int row_start, int row_count, int col_start, int col_count, int *row_hmat2client, int *row_client2hmat, int *col_hmat2client, int *col_client2hmat, void *user_context, void **data) { *data = calloc(1, sizeof(block_data_t)); block_data_t* bdata = (block_data_t*) *data; bdata->row_start = row_start; bdata->col_start = col_start; bdata->row_hmat2client = row_hmat2client; bdata->col_hmat2client = col_hmat2client; bdata->user_context = (problem_data_t*) user_context; } /** Compute all values of a block and store them into an array, which had already been allocated by hmat. There is no padding, all values computed in this block are contiguous, and stored in a column-major order. This block is not necessarily identical to the one previously processed by prepare_hmat, it may be a sub-block. In fact, it is either the full block, or a full column, or a full row. */ void compute_hmat(void *data, int rowBlockBegin, int rowBlockCount, int colBlockBegin, int colBlockCount, void *values) { int i, j; double *dValues = (double *) values; double_complex *zValues = (double_complex *) values; block_data_t* bdata = (block_data_t*) data; int type = bdata->user_context->type; int pos = 0; for (j = 0; j < colBlockCount; ++j) { int col = bdata->col_hmat2client[j + colBlockBegin + bdata->col_start]; switch (type) { case HMAT_SIMPLE_PRECISION: case HMAT_DOUBLE_PRECISION: for (i = 0; i < rowBlockCount; ++i, ++pos) dValues[pos] = interaction_real(bdata->user_context->points, bdata->row_hmat2client[i + rowBlockBegin + bdata->row_start], col); break; case HMAT_SIMPLE_COMPLEX: case HMAT_DOUBLE_COMPLEX: for (i = 0; i < rowBlockCount; ++i, ++pos) zValues[pos] = interaction_complex(bdata->user_context->points, bdata->user_context->k, bdata->row_hmat2client[i + rowBlockBegin + bdata->row_start], col); break; } } } /** Function to free our block_data_t structure. As we only store pointers, there is no */ void free_hmat(void *data) { free((block_data_t*) data); } int main(int argc, char **argv) { double radius, step, k; DofCoordinate* points; hmat_settings_t settings; hmat_interface_t hmat; hmat_value_t scalar_type; hmat_info_t mat_info; int n; char arithmetic; hmat_cluster_tree_t* cluster_tree; hmat_matrix_t* hmatrix; int rc; problem_data_t problem_data; if (argc != 3) { fprintf(stderr, "Usage: %s n_points (S|D|C|Z)\n", argv[0]); return 1; } n = atoi(argv[1]); arithmetic = argv[2][0]; hmat_get_parameters(&settings); switch (arithmetic) { case 'S': scalar_type = HMAT_SIMPLE_PRECISION; break; case 'D': scalar_type = HMAT_DOUBLE_PRECISION; break; case 'C': scalar_type = HMAT_SIMPLE_COMPLEX; break; case 'Z': scalar_type = HMAT_DOUBLE_COMPLEX; break; default: fprintf(stderr, "Unknown arithmetic code %c\n", arithmetic); return 1; } hmat_init_default_interface(&hmat, scalar_type); settings.compressionMethod = hmat_compress_aca_plus; settings.admissibilityFactor = 3.; hmat_set_parameters(&settings); if (0 != hmat.init()) { fprintf(stderr, "Unable to initialize HMat library\n"); return 1; } printf("Generating the point cloud...\n"); radius = 1.; step = 1.75 * M_PI * radius / sqrt((double)n); k = 2 * M_PI / (10. * step); // 10 points / lambda points = createCylinder(radius, step, n); printf("done.\n"); pointsToFile(points, n, "points.txt"); problem_data.n = n; problem_data.points = points; problem_data.k = k; problem_data.type = scalar_type; cluster_tree = create_cluster_tree(points, n); printf("ClusterTree node count = %d\n", hmat_tree_nodes_count(cluster_tree)); hmatrix = hmat.create_empty_hmatrix(cluster_tree, cluster_tree); hmat.hmat_get_info(hmatrix, &mat_info); printf("HMatrix node count = %d\n", mat_info.nr_block_clusters); rc = hmat.assemble(hmatrix, &problem_data, prepare_hmat, compute_hmat, free_hmat, 0); if (rc) { fprintf(stderr, "Error in assembly, return code is %d, exiting...\n", rc); hmat.finalize(); return rc; } rc = hmat.factor(hmatrix); if (rc) { fprintf(stderr, "Error in factor, return code is %d, exiting...\n", rc); hmat.finalize(); return rc; } hmat.finalize(); return 0; } hmat-oss-1.0.4/examples/c-simple-cylinder.c000066400000000000000000000136651245574051100205700ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ // Cylinder #include #include #ifdef __cplusplus #include typedef std::complex double_complex; #define make_complex(realPart, imagPart) \ std::complex(realPart, imagPart) #else #include typedef double complex double_complex; #define make_complex(realPart, imagPart) \ realPart + imagPart * _Complex_I #endif #include "hmat/hmat.h" /** This is a simple example showing how to use the HMatrix library. In this example, we assemble and do a decomposition of a Matrix such that: \f[A_{ij} = \frac{e^{i\kappa |x_i - x_j|}}{4 \pi |x_i - x_j|}\f] with the points \f$(x_i)\f$ on a cylinder. In the real case we use 1 / r instead. */ /** Create an open cylinder point cloud. \param radius Radius of the cylinder \param step distance between two neighboring points \param n number of points \return a vector of points. */ DofCoordinate* createCylinder(double radius, double step, int n) { DofCoordinate* result = (DofCoordinate*) malloc(n * sizeof(DofCoordinate)); double length = 2 * M_PI * radius; int pointsPerCircle = length / step; double angleStep = 2 * M_PI / pointsPerCircle; int i; for (i = 0; i < n; i++) { result[i].x = radius * cos(angleStep * i); result[i].y = radius * sin(angleStep * i), result[i].z = (step * i) / pointsPerCircle; result[i].globalIndex = i; } return result; } /** Write points into file. */ void pointsToFile(DofCoordinate* points, int size, const char* filename) { int i; FILE * fp = fopen(filename, "w"); for (i = 0; i < size; i++) { fprintf(fp, "%e %e %e\n", points[i].x, points[i].y, points[i].z); } fclose(fp); } typedef struct { int n; DofCoordinate* points; double k; } problem_data_t; /** Define interaction between 2 degrees of freedoms (real case) */ void interaction_real(void* data, int i, int j, void* result) { problem_data_t* pdata = (problem_data_t*) data; DofCoordinate* points = pdata->points; double dx = points[i].x - points[j].x; double dy = points[i].y - points[j].y; double dz = points[i].z - points[j].z; *((double*)result) = 1. / (sqrt(dx*dx + dy*dy + dz*dz) + 1e-10); } /** Define interaction between 2 degrees of freedoms (complex case) */ void interaction_complex(void* data, int i, int j, void* result) { problem_data_t* pdata = (problem_data_t*) data; DofCoordinate* points = pdata->points; double k = pdata->k; double dx = points[i].x - points[j].x; double dy = points[i].y - points[j].y; double dz = points[i].z - points[j].z; double distance = sqrt(dx*dx + dy*dy + dz*dz) + 1e-10; double realPart = cos(k * distance) / (4 * M_PI * distance); double imagPart = sin(k * distance) / (4 * M_PI * distance); *((double_complex*)result) = make_complex(realPart, imagPart); } int main(int argc, char **argv) { double radius, step, k; DofCoordinate* points; hmat_settings_t settings; int n; char arithmetic; hmat_cluster_tree_t* cluster_tree; hmat_matrix_t* hmatrix; hmat_interface_t hmat; hmat_value_t type; hmat_info_t mat_info; int rc; problem_data_t problem_data; int is_parallel_run = 0; if (argc != 3) { fprintf(stderr, "Usage: %s n_points (S|D|C|Z)\n", argv[0]); return 1; } n = atoi(argv[1]); arithmetic = argv[2][0]; switch (arithmetic) { case 'S': type = HMAT_SIMPLE_PRECISION; break; case 'D': type = HMAT_DOUBLE_PRECISION; break; case 'C': type = HMAT_SIMPLE_COMPLEX; break; case 'Z': type = HMAT_DOUBLE_COMPLEX; break; default: fprintf(stderr, "Unknown arithmetic code %c, exiting...\n", arithmetic); return 1; } hmat_get_parameters(&settings); hmat_init_default_interface(&hmat, type); settings.compressionMethod = hmat_compress_aca_plus; settings.admissibilityFactor = 3.; hmat_set_parameters(&settings); if (0 != hmat.init()) { fprintf(stderr, "Unable to initialize HMat library\n"); return 1; } printf("Generating the point cloud...\n"); radius = 1.; step = 1.75 * M_PI * radius / sqrt((double)n); k = 2 * M_PI / (10. * step); // 10 points / lambda points = createCylinder(radius, step, n); printf("done.\n"); pointsToFile(points, n, "points.txt"); problem_data.n = n; problem_data.points = points; problem_data.k = k; cluster_tree = create_cluster_tree(points, n); printf("ClusterTree node count = %d\n", hmat_tree_nodes_count(cluster_tree)); hmatrix = hmat.create_empty_hmatrix(cluster_tree, cluster_tree); hmat.hmat_get_info(hmatrix, &mat_info); printf("HMatrix node count = %d\n", mat_info.nr_block_clusters); if (type == HMAT_SIMPLE_PRECISION || type == HMAT_DOUBLE_PRECISION) rc = hmat.assemble_simple_interaction(hmatrix, &problem_data, interaction_real, 0); else rc = hmat.assemble_simple_interaction(hmatrix, &problem_data, interaction_complex, 0); if (rc) { fprintf(stderr, "Error in assembly, return code is %d, exiting...\n", rc); hmat.finalize(); return rc; } rc = hmat.factor(hmatrix); if (rc) { fprintf(stderr, "Error in factor, return code is %d, exiting...\n", rc); hmat.finalize(); return rc; } hmat.finalize(); return rc; } hmat-oss-1.0.4/examples/c-simple-kriging.c000066400000000000000000000242531245574051100204040ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include #include #ifdef __cplusplus #include typedef std::complex double_complex; #else #include typedef double complex double_complex; #endif #include "hmat/hmat.h" #ifdef _MSC_VER #include typedef SSIZE_T ssize_t; #endif #if _WIN32 // getline is not defined in mingw #include size_t getline(char **lineptr, size_t *n, FILE *stream) { char *bufptr = NULL; char *p = bufptr; size_t size; int c; if (lineptr == NULL) { return -1; } if (stream == NULL) { return -1; } if (n == NULL) { return -1; } bufptr = *lineptr; size = *n; c = fgetc(stream); if (c == EOF) { return -1; } if (bufptr == NULL) { bufptr = (char*) malloc(128); if (bufptr == NULL) { return -1; } size = 128; } p = bufptr; while(c != EOF) { if ((p - bufptr) > (size - 1)) { size = size + 128; bufptr = (char*) realloc(bufptr, size); if (bufptr == NULL) { return -1; } } *p++ = c; if (c == '\n') { break; } c = fgetc(stream); } *p++ = '\0'; *lineptr = bufptr; *n = size; return p - bufptr - 1; } #endif /** This is a simple example showing how to use the HMatrix library. c version of kriging.cpp. The kernel function is the interaction one! */ /** Write points into file. */ void pointsToFile(DofCoordinate* points, int size, const char* filename) { int i; FILE * fp = fopen(filename, "w"); for (i = 0; i < size; i++) { fprintf(fp, "%e %e %e\n", points[i].x, points[i].y, points[i].z); } fclose(fp); } /** Read points from file. */ void readPointsFromFile(const char* filename, DofCoordinate **points, int *size) { FILE * fp=NULL; int k; char *line = NULL; size_t len = 0; ssize_t read; double x = 0, y = 0, z = 0; size_t np = 1000000, rnp = 500000; fp = fopen(filename, "r"); *points = (DofCoordinate*) malloc(np * sizeof(DofCoordinate)); k = 0; while ((read = getline(&line, &len, fp)) != -1) { if(k>=np) *points = (DofCoordinate*) realloc(*points, (k+rnp) * sizeof(DofCoordinate)); sscanf(line, "%lf %lf %lf\n", &x, &y, &z); (*points)[k].globalIndex = k; (*points)[k].x = x; (*points)[k].y = y; (*points)[k].z = z; k++; } *points = (DofCoordinate*) realloc(*points, k * sizeof(DofCoordinate)); fclose(fp); free(line); *size = k; return; } double distanceTo(DofCoordinate center, DofCoordinate points){ double r = sqrt((center.x - points.x)*(center.x - points.x) + (center.y - points.y)*(center.y - points.y) + (center.z - points.z)*(center.z - points.z)); return r; } double* createRhs(DofCoordinate *points, int n, double l) { double* rhs = (double*) calloc(n, sizeof(double)); int i; DofCoordinate center; center.x = 0.; center.y = 0.; center.z = 0.; for (i = 0; i < n; i++) { center.x += points[i].x; center.y += points[i].y; center.z += points[i].z; } center.x /= n; center.y /= n; center.z /= n; for (i = 0; i < n; i++) { double r = distanceTo(center, points[i]); rhs[i] = exp(-fabs(r) / l); } return rhs; } typedef struct { int n; DofCoordinate* points; double l; } problem_data_t; double correlationLength(DofCoordinate * points, size_t n) { size_t i; DofCoordinate pMin, pMax; pMin.x = points[0].x; pMin.y = points[0].y; pMin.z = points[0].z; pMax.x = points[0].x; pMax.y = points[0].y; pMax.z = points[0].z; for (i = 0; i < n; i++) { if (points[i].x < pMin.x) pMin.x = points[i].x; if (points[i].x > pMax.x) pMax.x = points[i].x; if (points[i].y < pMin.y) pMin.y = points[i].y; if (points[i].y > pMax.y) pMax.y = points[i].y; if (points[i].z < pMin.z) pMin.z = points[i].z; if (points[i].z > pMax.z) pMax.z = points[i].z; } double l = pMax.x - pMin.x; if (pMax.y - pMin.y > l) l = pMax.y - pMin.y; if (pMax.z - pMin.z > l) l = pMax.z - pMin.z; return 0.1 * l; } /** Define interaction between 2 degrees of freedoms (real case) */ void interaction_real(void* data, int i, int j, void* result) { problem_data_t* pdata = (problem_data_t*) data; DofCoordinate* points = pdata->points; double r = distanceTo(points[i], points[j]); *((double*)result) = exp(-fabs(r) / pdata->l); } void double_precision_error(problem_data_t *problem_data, int n, double *rhs, double *rhsCopy, double *result) { int i,j; double rhsCopyNorm = 0.; double rhsNorm = 0.; double diffNorm = 0; double diff; double a; for (i = 0; i < n; i++) { rhsCopyNorm += rhsCopy[i] * rhsCopy[i]; } rhsCopyNorm = sqrt(rhsCopyNorm); fprintf(stdout, "\n||b|| = %e\n", rhsCopyNorm); for (i = 0; i < n; i++) { rhsNorm += rhs[i] * rhs[i]; } rhsNorm = sqrt(rhsNorm); fprintf(stdout, "||x|| = %e\n", rhsNorm); for (i = 0; i < n; i++) { diff = rhsCopy[i]; for (j = 0; j < n; j++) { interaction_real(problem_data, i, j, &a); diff -= a * rhs[j]; } diffNorm += diff*diff; } *result = diffNorm/rhsCopyNorm; } void simple_precision_error(problem_data_t *problem_data, int n, float *rhs, float *rhsCopy, float *result) { int i,j; float rhsCopyNorm = 0.; float rhsNorm = 0.; float diffNorm = 0; float diff; double a; for (i = 0; i < n; i++) { rhsCopyNorm += rhsCopy[i] * rhsCopy[i]; } rhsCopyNorm = sqrt(rhsCopyNorm); fprintf(stdout, "\n||b|| = %e\n", rhsCopyNorm); for (i = 0; i < n; i++) { rhsNorm += rhs[i] * rhs[i]; } rhsNorm = sqrt(rhsNorm); fprintf(stdout, "||x|| = %e\n", rhsNorm); for (i = 0; i < n; i++) { diff = rhsCopy[i]; for (j = 0; j < n; j++) { interaction_real(problem_data, i, j, &a); diff -= a * rhs[j]; } diffNorm += diff*diff; } *result = diffNorm/rhsCopyNorm; } int main(int argc, char **argv) { int i; char *pointsFilename = NULL; DofCoordinate* points; hmat_interface_t hmat; hmat_settings_t settings; hmat_value_t type; hmat_info_t mat_info; int n; char arithmetic; hmat_cluster_tree_t* cluster_tree; hmat_matrix_t * hmatrix; int kLowerSymmetric = 1; /* =0 if not Symmetric */ int rc; problem_data_t problem_data; double l; int nrhs = 1; double *drhs, *drhsCopy, derr; float *frhs, *frhsCopy, ferr; int is_parallel_run = 0; if (argc != 3) { fprintf(stderr, "Usage: %s pointsfilename (S|D)\n", argv[0]); return 1; } pointsFilename = argv[1]; arithmetic = argv[2][0]; switch (arithmetic) { case 'S': type = HMAT_SIMPLE_PRECISION; break; case 'D': type = HMAT_DOUBLE_PRECISION; break; default: fprintf(stderr, "Unknown arithmetic code %c, exiting...\n", arithmetic); return 1; } hmat_get_parameters(&settings); hmat_init_default_interface(&hmat, type); settings.compressionMethod = hmat_compress_aca_plus; /*settings->recompress = 0;*/ /*settings->admissibilityFactor = 3.;*/ hmat_set_parameters(&settings); if (0 != hmat.init()) { fprintf(stderr, "Unable to initialize HMat library\n"); return 1; } printf("Load points..."); readPointsFromFile(pointsFilename, &points, &n); printf("done\n"); printf("n = %d\n", n); l = correlationLength(points, n); printf("correlationLength = %le\n", l); problem_data.n = n; problem_data.points = points; problem_data.l = l; if(type == HMAT_SIMPLE_PRECISION){ drhs = createRhs(points, n, l); drhsCopy = createRhs(points, n, l); frhs = (float*) calloc(n, sizeof(float)); frhsCopy = (float*) calloc(n, sizeof(float)); for(i=0;i #include #include "hmat_cpp_interface.hpp" #include "default_engine.hpp" /** This is a simple example showing how to use the HMatrix library. In this example, we assemble and do a decomposition of a Matrix such that: \f[A_{ij} = \frac{e^{i\kappa |x_i - x_j|}}{4 \pi |x_i - x_j|}\f] with the points \f$(x_i)\f$ on a cylinder. In the real case we use 1 / r instead. The required steps are: - Create the point cloud in the \a createCylinder() function - Create a function returning any element (i, j) in the matrix: TestAssemblyFunction. We are using the unoptimzed and simpler SimpleAssemblyFunction as a base class here. - The rest of the code is in go(): - Initialize the library - Create a ClusterTree (createClusterTree()) - Create a HMatInterface<> instance - Assemble it hmat->assemble() - Factorisation: hmat->factorize() */ /** Create an open cylinder point cloud. \param radius Radius of the cylinder \param step distance between two neighboring points \param n number of points \return a vector of points. */ std::vector createCylinder(double radius, double step, int n) { std::vector result; double length = 2 * M_PI * radius; int pointsPerCircle = length / step; double angleStep = 2 * M_PI / pointsPerCircle; for (int i = 0; i < n; i++) { Point p(radius * cos(angleStep * i), radius * sin(angleStep * i), (step * i) / pointsPerCircle); result.push_back(p); } return result; } void pointsToFile(const std::vector& points, const char* filename) { std::ofstream f(filename); f << std::scientific; for (size_t i = 0; i < points.size(); i++) { f << points[i].x << " " << points[i].y << " " << points[i].z << std::endl; } } template class TestAssemblyFunction : public SimpleAssemblyFunction { public: /// Point coordinates std::vector points; /// Wavenumber for the complex case. double k; public: /** Constructor. \param _points Point cloud \param _k Wavenumber */ TestAssemblyFunction(std::vector& _points, double _k = 1.) : SimpleAssemblyFunction(), points(_points), k(_k) {} typename Types::dp interaction(int i, int j) const; }; template<> Types::dp TestAssemblyFunction::interaction(int i, int j) const { double distance = points[i].distanceTo(points[j]) + 1e-10; return 1. / distance; } template<> Types::dp TestAssemblyFunction::interaction(int i, int j) const { double distance = points[i].distanceTo(points[j]) + 1e-10; return 1. / distance; } template<> Types::dp TestAssemblyFunction::interaction(int i, int j) const { double distance = points[i].distanceTo(points[j]) + 1e-10; Z_t result = Constants::zero; result.real(cos(k * distance) / (4 * M_PI * distance)); result.imag(sin(k * distance) / (4 * M_PI * distance)); return result; } template<> Types::dp TestAssemblyFunction::interaction(int i, int j) const { double distance = points[i].distanceTo(points[j]) + 1e-10; Z_t result = Constants::zero; result.real(cos(k * distance) / (4 * M_PI * distance)); result.imag(sin(k * distance) / (4 * M_PI * distance)); return result; } ClusterTree* createClusterTree(const std::vector& points) { int n = (int) points.size(); DofCoordinate* dls = new DofCoordinate[n]; for (int i = 0; i < n; i++) { dls[i].x = points[i].x; dls[i].y = points[i].y; dls[i].z = points[i].z; } // We leak dls... return createClusterTree(dls, n); } template class E> struct Configuration { void configure(HMatInterface & hmat){} }; template class E> void go(std::vector& points, double k) { if (0 != HMatInterface::init()) return; { ClusterTree* ct = createClusterTree(points); std::cout << "ClusterTree node count = " << ct->nodesCount() << std::endl; TestAssemblyFunction f(points, k); HMatInterface hmat(ct, ct); std::cout << "HMatrix node count = " << hmat.nodesCount() << std::endl; Configuration().configure(hmat); hmat.assemble(f, kNotSymmetric, false); hmat.factorize(); std::pair compressionRatio = hmat.compressionRatio(); std::cout << "Compression Ratio = " << 100 * ((double) compressionRatio.first) / compressionRatio.second << "%" << std::endl; hmat.createPostcriptFile("h_matrix.ps"); } HMatInterface::finalize(); } template class E> void goA(char arithmetic, std::vector& points, double k) { switch (arithmetic) { case 'S': go(points, k); break; case 'D': go(points, k); break; case 'C': go(points, k); break; case 'Z': go(points, k); break; default: std::cerr << "Unknown arithmetic code " << arithmetic << std::endl; } } int main(int argc, char **argv) { HMatSettings& settings = HMatSettings::getInstance(); settings.maxParallelLeaves = 10000; if (argc != 3) { std::cout << "Usage: " << argv[0] << " n_points (S|D|C|Z)" << std::endl; return 0; } int n = atoi(argv[1]); char arithmetic = argv[2][0]; settings.compressionMethod = AcaPlus; settings.admissibilityFactor = 3.; settings.setParameters(); settings.printSettings(); std::cout << "Generating the point cloud..."; double radius = 1.; double step = 1.75 * M_PI * radius / sqrt((double)n); double k = 2 * M_PI / (10. * step); // 10 points / lambda std::vector points = createCylinder(radius, step, n); std::cout << "done.\n"; pointsToFile(points, "points.txt"); goA(arithmetic, points, k); return 0; } hmat-oss-1.0.4/examples/kriging.cpp000066400000000000000000000124671245574051100172410ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /** Sample application for the HMatrix library. */ #include "full_matrix.hpp" #include "interaction.hpp" #include "data_types.hpp" #include "hmat_cpp_interface.hpp" #include #include #include #include #include class KrigingAssemblyFunction : public SimpleAssemblyFunction { private: std::vector points; double l; public: /** Constructor. \param _mat The FullMatrix the values are taken from. */ KrigingAssemblyFunction(std::vector& _points, double _l) : SimpleAssemblyFunction(), points(_points), l(_l) {} D_t interaction(int i, int j) const { D_t r = points[i].distanceTo(points[j]); // Exponential return exp(-fabs(r) / l); } }; ClusterTree* createClusterTree(const std::vector& points) { int n = (int) points.size(); DofCoordinate* dls = new DofCoordinate[n]; for (int i = 0; i < n; i++) { dls[i].x = points[i].x; dls[i].y = points[i].y; dls[i].z = points[i].z; } // We leak dls... return createClusterTree(dls, n); } template HMatrix* createHMatrix(AssemblyFunction& f, std::vector& points) { ClusterTree* ct = createClusterTree(points); std::cout << "ClusterTree node count = " << ct->nodesCount() << std::endl; HMatrix* hmat = new HMatrix(ct, ct, kNotSymmetric); std::cout << "HMatrix node count = " << hmat->nodesCount() << std::endl; hmat->assembleSymmetric(f); return hmat; } void readPointsFromFile(const char* filename, std::vector& points) { std::ifstream f(filename); std::string line; while (getline(f, line)) { double x = 0, y = 0, z = 0; sscanf(line.c_str(), "%le %le %le", &x, &y, &z); points.push_back(Point(x, y, z)); } } FullMatrix* createRhs(const std::vector& points, double l) { const int n = (int) points.size(); FullMatrix* rhs = FullMatrix::Zero(n, 1); Point center(0., 0., 0.); for (int i = 0; i < n; i++) { center.x += points[i].x; center.y += points[i].y; center.z += points[i].z; } center.x /= n; center.y /= n; center.z /= n; for (int i = 0; i < n; i++) { double r = center.distanceTo(points[i]); rhs->get(i, 0) = exp(-fabs(r) / l); } return rhs; } double correlationLength(const std::vector& points) { using std::max; using std::min; Point pMin(points[0]), pMax(points[0]); const size_t n = points.size(); for (size_t i = 0; i < n; i++) { for (int coord = 0; coord < 3; coord++) { pMin.xyz[coord] = min(pMin.xyz[coord], points[i].xyz[coord]); pMax.xyz[coord] = max(pMax.xyz[coord], points[i].xyz[coord]); } } double l = .1 * max(max(pMax.x - pMin.x, pMax.y - pMin.y), pMax.z - pMin.z); return l; } template class E> int go(const char* pointsFilename) { if (0 != HMatInterface::init()) return 1; HMatSettings& settings = HMatSettings::getInstance(); settings.compressionMethod = AcaPlus; settings.setParameters(); std::cout << "Load points..."; std::vector points; readPointsFromFile(pointsFilename, points); const int n = (int) points.size(); std::cout << n << std::endl; const double l = correlationLength(points); FullMatrix* rhs = createRhs(points, l); FullMatrix rhsCopy(n, 1); rhsCopy.copyMatrixAtOffset(rhs, 0, 0); KrigingAssemblyFunction f(points, l); ClusterTree* ct = createClusterTree(points); std::cout << "ClusterTree node count = " << ct->nodesCount() << std::endl; HMatInterface hmat(ct, ct); hmat.assemble(f, kLowerSymmetric); std::pair compressionRatio = hmat.compressionRatio(); std::cout << "Compression Ratio = " << 100 * ((double) compressionRatio.first) / compressionRatio.second << "%" << std::endl; std::cout << "done.\nFactorisation..."; hmat.factorize(); std::cout << "Resolution..."; hmat.solve(*rhs); std::cout << "done." << std::endl; std::cout << "Accuracy..."; double rhsCopyNorm = rhsCopy.norm(); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { rhsCopy.get(i, 0) -= f.interaction(i, j) * rhs->get(j, 0); } } double diffNorm = rhsCopy.norm(); std::cout << "Done" << std::endl; std::cout << "||Ax - b|| / ||b|| = " << diffNorm / rhsCopyNorm << std::endl; delete rhs; HMatInterface::finalize(); return 0; } int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "Usage: %s filename\n", argv[0]); return 1; } return go(argv[1]); } hmat-oss-1.0.4/include/000077500000000000000000000000001245574051100146765ustar00rootroot00000000000000hmat-oss-1.0.4/include/hmat/000077500000000000000000000000001245574051100156275ustar00rootroot00000000000000hmat-oss-1.0.4/include/hmat/hmat.h000066400000000000000000000316221245574051100167350ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief C interface to the HMatrix library. */ #ifndef _HMAT_H #define _HMAT_H #include #include "hmat/config.h" #ifdef __cplusplus extern "C" { #endif /*! \brief All the scalar types */ typedef enum { HMAT_SIMPLE_PRECISION=0, HMAT_DOUBLE_PRECISION=1, HMAT_SIMPLE_COMPLEX=2, HMAT_DOUBLE_COMPLEX=3, } hmat_value_t; /** Type of ClusterTree */ typedef enum { hmat_cluster_geometric, hmat_cluster_median, hmat_cluster_hybrid } hmat_cluster_t; /** Choice of the compression method */ typedef enum { hmat_compress_svd, hmat_compress_aca_full, hmat_compress_aca_partial, hmat_compress_aca_plus } hmat_compress_t; /*! \brief Coordinates of a degree of freedom \a globalIndex is the global index */ typedef struct { int globalIndex; double x; double y; double z; } DofCoordinate; /*! \brief Prepare block assembly. \param row_start starting row \param row_count number of rows \param col_start starting column \param col_count number of columns \param row_hmat2client renumbered rows -> global row indices mapping \param row_client2hmat global row indices -> renumbered rows mapping \param col_hmat2client renumbered cols -> global col indices mapping \param col_client2hmat global col indices -> renumbered cols mapping \param context user provided data \param **data opaque pointer to data related to this block. Must be freed using \a release_func(). */ typedef void (*prepare_func)(int row_start, int row_count, int col_start, int col_count, int *row_hmat2client, int *row_client2hmat, int *col_hmat2client, int *col_client2hmat, void *context, void **data); /*! \brief Compute a sub-block. \warning The computation has to be prepared with \a prepare_func() first. The supported cases are: - Compute the full block - Compute exactly one row - Compute exactly one column Regarding the indexing: For all indices in this function, the "C" conventions are followed, ie indices start at 0, the lower bound is included and the upper bound is excluded. The indexing is relative to the block, that is the row i is the i-th row within the block. \param v_data opaque pointer, as set by \a prepare_func() \param row_start starting row \param row_count number of rows \param col_start starting column \param col_count number of columns \param block pointer to the output buffer. No padding is allowed, that is the leading dimension of the buffer must be its real leading dimension (1 for a row). Column-major order is assumed. */ typedef void (*compute_func)(void* v_data, int row_start, int row_count, int col_start, int col_count, void* block); /*! \brief Release opaque pointer allocated by \a prepare_func(). \param v_data opaque pointer */ typedef void (*release_func)(void* v_data); /*! \brief Compute a single matrix term \param user_context pointer to user data, see \a assemble_hmatrix_simple_interaction function \param row row index \param col column index \param result address where result is stored; result is a pointer to a double for real matrices, and a pointer to a double complex for complex matrices. */ typedef void (*simple_interaction_compute_func)(void* user_context, int row, int col, void* result); typedef struct hmat_cluster_tree_struct hmat_cluster_tree_t; /*! \brief Create a ClusterTree from the DoFs coordinates. \param dls the DoFs coordinates \param n the number of DoFs \return an opaque pointer to a ClusterTree, or NULL in case of error. */ hmat_cluster_tree_t * create_cluster_tree(DofCoordinate* dls, int n); /*! * Return the number of nodes in a cluster tree */ int hmat_tree_nodes_count(hmat_cluster_tree_t * tree); /** Information on the HMatrix */ typedef struct { /*! Number of elements */ size_t compressed_size; /*! Number of elements if HMatrix was not compressed */ size_t uncompressed_size; /*! Number of block cluster tree nodes in the HMatrix */ int nr_block_clusters; } hmat_info_t; typedef struct hmat_matrix_struct hmat_matrix_t; typedef struct { /*! Create an empty (not assembled) HMatrix from 2 \a ClusterTree instances. The HMatrix is built on a row and a column tree, that are provided as arguments. \param stype the scalar type \param rows_tree a ClusterTree as returned by \a create_cluster_tree(). \param cols_tree a ClusterTree as returned by \a create_cluster_tree(). \return an opaque pointer to an HMatrix, or NULL in case of error. */ hmat_matrix_t* (*create_empty_hmatrix)(void* rows_tree, void* cols_tree); /*! Assemble a HMatrix. \param hmatrix The matrix to be assembled. \param user_context The user context to pass to the prepare function. \param prepare The prepare function as given in \a HMatOperations. \param compute The compute function as given in \a HMatOperations. \param free_data The free function as given in \a HMatOperations. \param lower_symmetric 1 if the matrix is lower symmetric, 0 otherwise \return 0 for success. */ int (*assemble)(hmat_matrix_t* hmatrix, void* user_context, prepare_func prepare, compute_func compute, release_func free_data, int lower_symmetric); /*! Assemble a HMatrix then factorize it. \param hmatrix The matrix to be assembled. \param user_context The user context to pass to the prepare function. \param prepare The prepare function as given in \a HMatOperations. \param compute The compute function as given in \a HMatOperations. \param free_data The free function as given in \a HMatOperations. \param lower_symmetric 1 if the matrix is lower symmetric, 0 otherwise \return 0 for success. */ int (*assemble_factor)(hmat_matrix_t* hmatrix, void* user_context, prepare_func prepare, compute_func compute, release_func free_data, int lower_symmetric); /*! Assemble a HMatrix. This is a simplified interface, a single function is provided to compute matrix terms. \param hmatrix The matrix to be assembled. \param user_context The user context to pass to the compute function. \param compute The compute function \param lower_symmetric 1 if the matrix is lower symmetric, 0 otherwise \return 0 for success. */ int (*assemble_simple_interaction)(hmat_matrix_t* hmatrix, void* user_context, simple_interaction_compute_func compute, int lower_symmetric); /*! \brief Return a copy of a HMatrix. \param from_hmat the source matrix \return a copy of the matrix, or NULL */ hmat_matrix_t* (*copy)(hmat_matrix_t* hmatrix); /*! \brief Compute the norm of a HMatrix. \param hmatrix the matrix of which to compute the norm \return the norm */ double (*norm)(hmat_matrix_t* hmatrix); /*! \brief Factor a HMatrix in place. \param hmatrix the matrix to factor \return 0 for success */ int (*factor)(hmat_matrix_t *hmatrix); /*! \brief Destroy a HMatrix. \param hmatrix the matrix to destroy \return 0 */ int (*destroy)(hmat_matrix_t* hmatrix); /*! \brief Solve A X = B, with X overwriting B. In this function, B is a H-matrix hmat \param hmatrix \param hmatrixb \return 0 for success */ int (*solve_mat)(hmat_matrix_t* hmatrix, hmat_matrix_t* hmatrixB); /*! \brief Solve A x = b, with x overwriting b. In this function, b is a multi-column vector, with nrhs RHS. \param hmatrix \param b \param nrhs \return 0 for success */ int (*solve_systems)(hmat_matrix_t* hmatrix, void* b, int nrhs); /*! \brief Transpose an HMatrix in place. \return 0 for success. */ int (*transpose)(hmat_matrix_t *hmatrix); /*! \brief A <- alpha * A \param alpha \param hmatrix */ int (*scale)(void * alpha, hmat_matrix_t *hmatrix); /*! \brief C <- alpha * A * B + beta * C \param trans_a 'N' or 'T' \param trans_b 'N' or 'T' \param alpha \param hmatrix \param hmatrix_b \param beta \param hmatrix_c \return 0 for success */ int (*gemm)(char trans_a, char trans_b, void* alpha, hmat_matrix_t* hmatrix, hmat_matrix_t* hmatrix_b, void* beta, hmat_matrix_t* hmatrix_c); /*! \brief c <- alpha * A * b + beta * c \param trans_a 'N' or 'T' \param alpha \param hmatrix \param vec_b \param beta \param vec_c \param nrhs \return 0 for success */ int (*gemv)(char trans_a, void* alpha, hmat_matrix_t* hmatrix, void* vec_b, void* beta, void* vec_c, int nrhs); /*! \brief C <- alpha * A * B + beta * C In this version, a, c: FullMatrix, b: HMatrix. \param trans_a \param trans_b \param mc number of rows of C \param nc number of columns of C \param c \param alpha \param a \param hmat_b \param beta \return 0 for success */ int (*full_gemm)(char trans_a, char trans_b, int mc, int nc, void* c, void* alpha, void* a, hmat_matrix_t* hmat_b, void* beta); /*! \brief Initialize library */ int (*init)(); /*! \brief Do the cleanup */ int (*finalize)(); /*! \brief Get current informations \param hmatrix A hmatrix \param info A structure to fill with current informations */ int (*hmat_get_info)(hmat_matrix_t *hmatrix, hmat_info_t* info); /** For internal use only */ void * internal; } hmat_interface_t; void hmat_init_default_interface(hmat_interface_t * i, hmat_value_t type); /*! \brief Set the maximum amount of memory used for some operations. This setting is not universal since the HMatrix solver uses as much memory as needed to store the matrices. It is only used for handling the vectors in gemv() and solve(). \param memory_in_bytes the amount of memory in bytes. \return 0 for success */ int hmatrix_set_max_memory(size_t memory_in_bytes); /*! Get the value set by \a hmatrix_set_max_memory(). */ size_t hmatrix_get_max_memory(); typedef struct { /*! \brief Tolerance for the assembly. */ double assemblyEpsilon; /*! \brief Tolerance for the recompression (using SVD) */ double recompressionEpsilon; int compressionMethod; /** \f$\eta\f$ in the admissiblity condition for two clusters \f$\sigma\f$ and \f$\tau\f$: \f[ \min(diam(\sigma), diam(\tau)) < \eta \cdot d(\sigma, \tau) \f] */ double admissibilityFactor; /*! \brief Type of ClusterTree */ hmat_cluster_t clustering; /*! \brief Maximum size of a leaf in a ClusterTree (and of a non-admissible block in an HMatrix) */ int maxLeafSize; /*! \brief max(|L0|) */ int maxParallelLeaves; /*! \brief Maximum size of an admissible block. Should be size_t ! */ int elementsPerBlock; /*! \brief Use an LU decomposition */ int useLu; /*! \brief Use an LDL^t decomposition if possible */ int useLdlt; /*! \brief Coarsen the matrix structure after assembly. */ int coarsening; /*! \brief Recompress the matrix after assembly. */ int recompress; /*! \brief Validate the rk-matrices after compression */ int validateCompression; /*! \brief For blocks above error threshold, re-run the compression algorithm */ int validationReRun; /*! \brief For blocks above error threshold, dump the faulty block to disk */ int validationDump; /*! \brief Error threshold for the compression validation */ double validationErrorThreshold; } hmat_settings_t; /*! \brief Get current settings \param settings A structure to fill with current settings */ void hmat_get_parameters(hmat_settings_t * settings); /*! \brief Set current settings \param structure containing parameters \return 1 on failure, 0 otherwise. */ int hmat_set_parameters(hmat_settings_t*); /*! * \brief hmat_get_version * \return The version of this library */ const char * hmat_get_version(); #ifdef __cplusplus } #endif #endif /* _HMAT_H */ hmat-oss-1.0.4/src/000077500000000000000000000000001245574051100140425ustar00rootroot00000000000000hmat-oss-1.0.4/src/blas_overloads.hpp000066400000000000000000000423521245574051100175600ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /* Declarations of BLAS and MKL functions used by hmat */ #ifndef _BLAS_OVERLOADS_HPP #define _BLAS_OVERLOADS_HPP #include "config.h" #include "data_types.hpp" #ifdef HAVE_MKL_H #include "mkl_cblas.h" #else #include "cblas.h" #endif #ifdef HAVE_MKL_IMATCOPY #include "mkl_trans.h" #endif namespace proxy_cblas { #if defined(OPENBLAS_COMPLEX_STRUCT) || defined(OPENBLAS_COMPLEX_C99) #define _C(x) (_C_T::value_type*)x #define _CC(T, x) (T*)x #else #define _C(x) x #define _CC(T, x) x #endif // Level 1 template void axpy(const int n, const T& alpha, const T* x, const int incx, T* y, const int incy); inline void axpy(const int n, const S_t& alpha, const S_t* x, const int incx, S_t* y, const int incy) { cblas_saxpy(n, alpha, x, incx, y, incy); } inline void axpy(const int n, const D_t& alpha, const D_t* x, const int incx, D_t* y, const int incy) { cblas_daxpy(n, alpha, x, incx, y, incy); } inline void axpy(const int n, const C_t& alpha, const C_t* x, const int incx, C_t* y, const int incy) { // WARNING: &alpha instead of alpha for complex values #define _C_T C_t cblas_caxpy(n, _C(&alpha), _C(x), incx, _C(y), incy); #undef _C_T } inline void axpy(const int n, const Z_t& alpha, const Z_t* x, const int incx, Z_t* y, const int incy) { #define _C_T Z_t // WARNING: &alpha instead of alpha for complex values cblas_zaxpy(n, _C(&alpha), _C(x), incx, _C(y), incy); #undef _C_T } template void copy(const int n, const T* x, const int incx, T* y, const int incy); inline void copy(const int n, const S_t* x, int incx, S_t* y, const int incy) { cblas_scopy(n, x, incx, y, incy); } inline void copy(const int n, const D_t* x, int incx, D_t* y, const int incy) { cblas_dcopy(n, x, incx, y, incy); } inline void copy(const int n, const C_t* x, int incx, C_t* y, const int incy) { #define _C_T C_t cblas_ccopy(n, _C(x), incx, _C(y), incy); #undef _C_T } inline void copy(const int n, const Z_t* x, int incx, Z_t* y, const int incy) { #define _C_T Z_t cblas_zcopy(n, _C(x), incx, _C(y), incy); #undef _C_T } template T dot(const int n, const T* x, const int incx, const T* y, const int incy); inline S_t dot(const int n, const S_t* x, const int incx, const S_t* y, const int incy) { return cblas_sdot(n, x, incx, y, incy); } inline D_t dot(const int n, const D_t* x, const int incx, const D_t* y, const int incy) { return cblas_ddot(n, x, incx, y, incy); } template T dotc(const int n, const T* x, const int incx, const T* y, const int incy); inline C_t dotc(const int n, const C_t* x, const int incx, const C_t* y, const int incy) { #ifdef HAVE_CBLAS_CDOTC return cblas_cdotc(n, x, incx, y, incy); #else C_t result = Constants::zero; #define _C_T C_t cblas_cdotc_sub(n, _C(x), incx, _C(y), incy, _CC(openblas_complex_float, &result)); #undef _C_T return result; #endif } inline Z_t dotc(const int n, const Z_t* x, const int incx, const Z_t* y, const int incy) { #ifdef HAVE_CBLAS_CDOTC return cblas_zdotc(n, x, incx, y, incy); #else Z_t result = Constants::zero; #define _C_T Z_t cblas_zdotc_sub(n, _C(x), incx, _C(y), incy, _CC(openblas_complex_double, &result)); #undef _C_T return result; #endif } template int i_amax(const int n, const T* x, const int incx); inline int i_amax(const int n, const S_t* x, const int incx) { return cblas_isamax(n, x, incx); } inline int i_amax(const int n, const D_t* x, const int incx) { return cblas_idamax(n, x, incx); } inline int i_amax(const int n, const C_t* x, const int incx) { #define _C_T C_t return cblas_icamax(n, _C(x), incx); #undef _C_T } inline int i_amax(const int n, const Z_t* x, const int incx) { #define _C_T Z_t return cblas_izamax(n, _C(x), incx); #undef _C_T } template void scal(const int n, const T& alpha, T* x, const int incx); inline void scal(const int n, const S_t& alpha, S_t* x, const int incx) { cblas_sscal(n, alpha, x, incx); } inline void scal(const int n, const D_t& alpha, D_t* x, const int incx) { cblas_dscal(n, alpha, x, incx); } inline void scal(const int n, const C_t& alpha, C_t* x, const int incx) { // WARNING: &alpha instead of alpha for complex values #define _C_T C_t cblas_cscal(n, _C(&alpha), _C(x), incx); #undef _C_T } inline void scal(const int n, const Z_t& alpha, Z_t* x, const int incx) { // WARNING: &alpha instead of alpha for complex values #define _C_T Z_t cblas_zscal(n, _C(&alpha), _C(x), incx); #undef _C_T } // Level 2 template void gemv(const char trans, const int m, const int n, const T& alpha, const T* a, const int lda, const T* x, const int incx, const T& beta, T* y, const int incy); inline void gemv(const char trans, const int m, const int n, const S_t& alpha, const S_t* a, const int lda, const S_t* x, const int incx, const S_t& beta, S_t* y, const int incy) { const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); cblas_sgemv(CblasColMajor, t, m, n, alpha, a, lda, x, incx, beta, y, incy); } inline void gemv(const char trans, const int m, const int n, const D_t& alpha, const D_t* a, const int lda, const D_t* x, const int incx, const D_t& beta, D_t* y, const int incy) { const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); cblas_dgemv(CblasColMajor, t, m, n, alpha, a, lda, x, incx, beta, y, incy); } inline void gemv(const char trans, const int m, const int n, const C_t& alpha, const C_t* a, const int lda, const C_t* x, const int incx, const C_t& beta, C_t* y, const int incy) { const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); // WARNING: &alpha/&beta instead of alpha/beta for complex values #define _C_T C_t cblas_cgemv(CblasColMajor, t, m, n, _C(&alpha), _C(a), lda, _C(x), incx, _C(&beta), _C(y), incy); #undef _C_T } inline void gemv(const char trans, const int m, const int n, const Z_t& alpha, const Z_t* a, const int lda, const Z_t* x, const int incx, const Z_t& beta, Z_t* y, const int incy) { const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); // WARNING: &alpha/&beta instead of alpha/beta for complex values #define _C_T Z_t cblas_zgemv(CblasColMajor, t, m, n, _C(&alpha), _C(a), lda, _C(x), incx, _C(&beta), _C(y), incy); #undef _C_T } // Level 3 template void gemm(const char transA, const char transB, const int m, const int n, const int k, const T& alpha, const T* a, const int lda, const T* b, const int ldb, const T& beta, T* c, const int ldc); inline void gemm(const char transA, const char transB, const int m, const int n, const int k, const S_t& alpha, const S_t* a, const int lda, const S_t* b, const int ldb, const S_t& beta, S_t* c, const int ldc) { const CBLAS_TRANSPOSE tA = (transA == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_TRANSPOSE tB = (transB == 'N' ? CblasNoTrans : CblasTrans); cblas_sgemm(CblasColMajor, tA, tB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } inline void gemm(const char transA, const char transB, const int m, const int n, const int k, const D_t& alpha, const D_t* a, const int lda, const D_t* b, const int ldb, const D_t& beta, D_t* c, const int ldc) { const CBLAS_TRANSPOSE tA = (transA == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_TRANSPOSE tB = (transB == 'N' ? CblasNoTrans : CblasTrans); cblas_dgemm(CblasColMajor, tA, tB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } inline void gemm(const char transA, const char transB, const int m, const int n, const int k, const C_t& alpha, const C_t* a, const int lda, const C_t* b, const int ldb, const C_t& beta, C_t* c, const int ldc) { const CBLAS_TRANSPOSE tA = (transA == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_TRANSPOSE tB = (transB == 'N' ? CblasNoTrans : CblasTrans); // WARNING: &alpha/&beta instead of alpha/beta for complex values #define _C_T C_t #ifdef HAVE_ZGEMM3M cblas_cgemm3m(CblasColMajor, tA, tB, m, n, k, _C(&alpha), _C(a), lda, _C(b), ldb, _C(&beta), _C(c), ldc); #else cblas_cgemm(CblasColMajor, tA, tB, m, n, k, _C(&alpha), _C(a), lda, _C(b), ldb, _C(&beta), _C(c), ldc); #endif #undef _C_T } inline void gemm(const char transA, const char transB, const int m, const int n, int k, const Z_t& alpha, const Z_t* a, const int lda, const Z_t* b, const int ldb, const Z_t& beta, Z_t* c, const int ldc) { const CBLAS_TRANSPOSE tA = (transA == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_TRANSPOSE tB = (transB == 'N' ? CblasNoTrans : CblasTrans); // WARNING: &alpha/&beta instead of alpha/beta for complex values #define _C_T Z_t #ifdef HAVE_ZGEMM3M cblas_zgemm3m(CblasColMajor, tA, tB, m, n, k, _C(&alpha), _C(a), lda, _C(b), ldb, _C(&beta), _C(c), ldc); #else cblas_zgemm(CblasColMajor, tA, tB, m, n, k, _C(&alpha), _C(a), lda, _C(b), ldb, _C(&beta), _C(c), ldc); #endif #undef _C_T } template void trmm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const T& alpha, const T* a, const int lda, T* b, const int ldb); inline void trmm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const S_t& alpha, const S_t* a, const int lda, S_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); cblas_strmm(CblasColMajor, s, u, t, d, m, n, alpha, a, lda, b, ldb); } inline void trmm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const D_t& alpha, const D_t* a, const int lda, D_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); cblas_dtrmm(CblasColMajor, s, u, t, d, m, n, alpha, a, lda, b, ldb); } inline void trmm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const C_t& alpha, const C_t* a, const int lda, C_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); // WARNING: &alpha instead of alpha for complex values #define _C_T C_t cblas_ctrmm(CblasColMajor, s, u, t, d, m, n, _C(&alpha), _C(a), lda, _C(b), ldb); #undef _C_T } inline void trmm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const Z_t& alpha, const Z_t* a, const int lda, Z_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); // WARNING: &alpha instead of alpha for complex values #define _C_T Z_t cblas_ztrmm(CblasColMajor, s, u, t, d, m, n, _C(&alpha), _C(a), lda, _C(b), ldb); #undef _C_T } template void trsm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const T& alpha, const T* a, const int lda, T* b, const int ldb); inline void trsm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const S_t& alpha, const S_t* a, const int lda, S_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); cblas_strsm(CblasColMajor, s, u, t, d, m, n, alpha, a, lda, b, ldb); } inline void trsm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const D_t& alpha, const D_t* a, const int lda, D_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); cblas_dtrsm(CblasColMajor, s, u, t, d, m, n, alpha, a, lda, b, ldb); } inline void trsm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const C_t& alpha, const C_t* a, const int lda, C_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); // WARNING: &alpha instead of alpha for complex values #define _C_T C_t cblas_ctrsm(CblasColMajor, s, u, t, d, m, n, _C(&alpha), _C(a), lda, _C(b), ldb); #undef _C_T } inline void trsm(const char side, const char uplo, const char trans, const char diag, const int m, const int n, const Z_t& alpha, const Z_t* a, const int lda, Z_t* b, const int ldb) { const CBLAS_SIDE s = (side == 'L' ? CblasLeft : CblasRight); const CBLAS_UPLO u = (uplo == 'U' ? CblasUpper : CblasLower); const CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); const CBLAS_DIAG d = (diag == 'N' ? CblasNonUnit : CblasUnit ); // WARNING: &alpha instead of alpha for complex values #define _C_T Z_t cblas_ztrsm(CblasColMajor, s, u, t, d, m, n, _C(&alpha), _C(a), lda, _C(b), ldb); #undef _C_T } } // End namespace proxy_cblas // Some BLAS routines only exist for real or complex matrices; for instance, // when computing norms, one want to call dot on real vectors and dotc on // complex vectors. namespace proxy_cblas_convenience { template T dot_c(const int n, const T* x, const int incx, const T* y, const int incy); inline S_t dot_c(const int n, const S_t* x, const int incx, const S_t* y, const int incy) { return proxy_cblas::dot(n, x, incx, y, incy); } inline D_t dot_c(const int n, const D_t* x, const int incx, const D_t* y, const int incy) { return proxy_cblas::dot(n, x, incx, y, incy); } inline C_t dot_c(const int n, const C_t* x, const int incx, const C_t* y, const int incy) { return proxy_cblas::dotc(n, x, incx, y, incy); } inline Z_t dot_c(const int n, const Z_t* x, const int incx, const Z_t* y, const int incy) { return proxy_cblas::dotc(n, x, incx, y, incy); } } // end namespace proxy_cblas_convenience #ifdef HAVE_MKL_IMATCOPY namespace proxy_mkl { template void imatcopy(const size_t rows, const size_t cols, T* m); inline void imatcopy(const size_t rows, const size_t cols, S_t* m) { mkl_simatcopy('C', 'T', rows, cols, Constants::pone, m, rows, cols); } inline void imatcopy(const size_t rows, const size_t cols, D_t* m) { mkl_dimatcopy('C', 'T', rows, cols, Constants::pone, m, rows, cols); } inline void imatcopy(const size_t rows, const size_t cols, C_t* m) { const MKL_Complex8 pone = {1., 0.}; mkl_cimatcopy('C', 'T', rows, cols, pone, (MKL_Complex8*) m, rows, cols); } inline void imatcopy(const size_t rows, const size_t cols, Z_t* m) { const MKL_Complex16 pone = {1., 0.}; mkl_zimatcopy('C', 'T', rows, cols, pone, (MKL_Complex16*) m, rows, cols); } template void omatcopy(size_t rows, size_t cols, const T* m, T* copy); inline void omatcopy(size_t rows, size_t cols, const S_t* m, S_t* copy) { mkl_somatcopy('C', 'T', rows, cols, Constants::pone, m, rows, copy, cols); } inline void omatcopy(size_t rows, size_t cols, const D_t* m, D_t* copy) { mkl_domatcopy('C', 'T', rows, cols, Constants::pone, m, rows, copy, cols); } inline void omatcopy(size_t rows, size_t cols, const C_t* m, C_t* copy) { const MKL_Complex8 pone = {1., 0.}; mkl_comatcopy('C', 'T', rows, cols, pone, (const MKL_Complex8*) m, rows, (MKL_Complex8*) copy, cols); } inline void omatcopy(size_t rows, size_t cols, const Z_t* m, Z_t* copy) { const MKL_Complex16 pone = {1., 0.}; mkl_zomatcopy('C', 'T', rows, cols, pone, (const MKL_Complex16*) m, rows, (MKL_Complex16*) copy, cols); } } // End namespace proxy_mkl #endif #endif // _BLAS_OVERLOADS_HPP hmat-oss-1.0.4/src/c_default_interface.cpp000066400000000000000000000136541245574051100205250ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "hmat/hmat.h" #include "hmat_cpp_interface.hpp" #include "default_engine.hpp" #include "c_wrapping.hpp" #include "common/my_assert.h" hmat_cluster_tree_t * create_cluster_tree(DofCoordinate* dls, int n) { return (hmat_cluster_tree_t*) createClusterTree(dls, n); } int hmat_tree_nodes_count(hmat_cluster_tree_t * tree) { return ((ClusterTree*)tree)->nodesCount(); } void hmat_init_default_interface(hmat_interface_t * i, hmat_value_t type) { switch (type) { case HMAT_SIMPLE_PRECISION: createCInterface(i); break; case HMAT_DOUBLE_PRECISION: createCInterface(i); break; case HMAT_SIMPLE_COMPLEX: createCInterface(i); break; case HMAT_DOUBLE_COMPLEX: createCInterface(i); break; default: strongAssert(false); } } void hmat_get_parameters(hmat_settings_t* settings) { HMatSettings& settingsCxx = HMatSettings::getInstance(); settings->assemblyEpsilon = settingsCxx.assemblyEpsilon; settings->recompressionEpsilon = settingsCxx.recompressionEpsilon; switch (settingsCxx.compressionMethod) { case Svd: settings->compressionMethod = hmat_compress_svd; break; case AcaFull: settings->compressionMethod = hmat_compress_aca_full; break; case AcaPartial: settings->compressionMethod = hmat_compress_aca_partial; break; case AcaPlus: settings->compressionMethod = hmat_compress_aca_plus; break; default: std::cerr << "Internal error: invalid value for compression method: \"" << settingsCxx.compressionMethod << "\"." << std::endl; std::cerr << "Internal error: using SVD" << std::endl; settings->compressionMethod = hmat_compress_svd; break; } settings->admissibilityFactor = settingsCxx.admissibilityFactor; switch (settingsCxx.clustering) { case kGeometric: settings->clustering = hmat_cluster_geometric; break; case kMedian: settings->clustering = hmat_cluster_median; break; case kHybrid: settings->clustering = hmat_cluster_hybrid; break; default: std::cerr << "Internal error: invalid value for clustering method: \"" << settingsCxx.clustering << "\"." << std::endl; std::cerr << "Internal error: using median" << std::endl; settings->clustering = hmat_cluster_median; break; } settings->maxLeafSize = settingsCxx.maxLeafSize; settings->maxParallelLeaves = settingsCxx.maxParallelLeaves; settings->elementsPerBlock = settingsCxx.elementsPerBlock; settings->useLu = settingsCxx.useLu; settings->useLdlt = settingsCxx.useLdlt; settings->coarsening = settingsCxx.coarsening; settings->recompress = settingsCxx.recompress; settings->validateCompression = settingsCxx.validateCompression; settings->validationErrorThreshold = settingsCxx.validationErrorThreshold; settings->validationReRun = settingsCxx.validationReRun; settings->validationDump = settingsCxx.validationDump; } int hmat_set_parameters(hmat_settings_t* settings) { strongAssert(settings != NULL); int rc = 0; HMatSettings& settingsCxx = HMatSettings::getInstance(); settingsCxx.assemblyEpsilon = settings->assemblyEpsilon; settingsCxx.recompressionEpsilon = settings->recompressionEpsilon; switch (settings->compressionMethod) { case hmat_compress_svd: settingsCxx.compressionMethod = Svd; break; case hmat_compress_aca_full: settingsCxx.compressionMethod = AcaFull; break; case hmat_compress_aca_partial: settingsCxx.compressionMethod = AcaPartial; break; case hmat_compress_aca_plus: settingsCxx.compressionMethod = AcaPlus; break; default: std::cerr << "Invalid value for compression method: \"" << settings->compressionMethod << "\"." << std::endl; rc = 1; break; } settingsCxx.admissibilityFactor = settings->admissibilityFactor; switch (settings->clustering) { case hmat_cluster_geometric: settingsCxx.clustering = kGeometric; break; case hmat_cluster_median: settingsCxx.clustering = kMedian; break; case hmat_cluster_hybrid: settingsCxx.clustering = kHybrid; break; default: std::cerr << "Invalid value for clustering method: \"" << settings->clustering << "\"." << std::endl; rc = 1; break; } settingsCxx.maxLeafSize = settings->maxLeafSize; settingsCxx.maxParallelLeaves = settings->maxParallelLeaves; settingsCxx.elementsPerBlock = settings->elementsPerBlock; settingsCxx.useLu = settings->useLu; settingsCxx.useLdlt = settings->useLdlt; settingsCxx.coarsening = settings->coarsening; settingsCxx.recompress = settings->recompress; settingsCxx.validateCompression = settings->validateCompression; settingsCxx.validationErrorThreshold = settings->validationErrorThreshold; settingsCxx.validationReRun = settings->validationReRun; settingsCxx.validationDump = settings->validationDump; settingsCxx.setParameters(); settingsCxx.printSettings(); return rc; } const char * hmat_get_version() { return HMAT_VERSION; } hmat-oss-1.0.4/src/c_wrapping.hpp000066400000000000000000000217151245574051100167120ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _C_WRAPPING_HPP #define _C_WRAPPING_HPP #include "common/context.hpp" #include "full_matrix.hpp" template class E> static hmat_matrix_t * create_empty_hmatrix( void* rows_tree, void* cols_tree) { return (hmat_matrix_t*) new HMatInterface( static_cast(rows_tree), static_cast(cols_tree)); } template class SimpleCAssemblyFunction : public SimpleAssemblyFunction { private: simple_interaction_compute_func functor; void* functor_extra_args; /** Constructor. \param _mat The FullMatrix the values are taken from. */ public: SimpleCAssemblyFunction(void* user_context, simple_interaction_compute_func &f) : SimpleAssemblyFunction(), functor(f), functor_extra_args(user_context) {} typename Types::dp interaction(int i, int j) const { typename Types::dp result; (*functor)(functor_extra_args, i, j, &result); return result; } }; template class E> static int assemble(hmat_matrix_t * holder, void* user_context, prepare_func prepare, compute_func compute, release_func free_data, int lower_symmetric) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*) holder; BlockAssemblyFunction f(&hmat->rows->data, &hmat->cols->data, user_context, prepare, compute, free_data); hmat->assemble(f, lower_symmetric ? kLowerSymmetric : kNotSymmetric, true); std::pair p = hmat->compressionRatio(); std::cout << "Compression ratio = " << (100. * p.first) / p.second << "%" << std::endl; return 0; } template class E> static int assemble_factor(hmat_matrix_t * holder, void* user_context, prepare_func prepare, compute_func compute, release_func free_data, int lower_symmetric) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*) holder; BlockAssemblyFunction f(&hmat->rows->data, &hmat->cols->data, user_context, prepare, compute, free_data); hmat->assemble(f, lower_symmetric ? kLowerSymmetric : kNotSymmetric, false); hmat->factorize(); std::pair p = hmat->compressionRatio(); std::cout << "Compression ratio = " << (100. * p.first) / p.second << "%" << std::endl; return 0; } template class E> static int assemble_simple_interaction(hmat_matrix_t * holder, void* user_context, simple_interaction_compute_func compute, int lower_symmetric) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*) holder; SimpleCAssemblyFunction f(user_context, compute); hmat->assemble(f, lower_symmetric ? kLowerSymmetric : kNotSymmetric); std::pair p = hmat->compressionRatio(); std::cout << "Compression ratio = " << (100. * p.first) / p.second << "%" << std::endl; return 0; } template class E> static hmat_matrix_t* copy(hmat_matrix_t* holder) { DECLARE_CONTEXT; return (hmat_matrix_t*) ((HMatInterface*) holder)->copy(); } template class E> static int destroy(hmat_matrix_t* holder) { delete (HMatInterface*)(holder); return 0; } template class E> static int factor(hmat_matrix_t* holder) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*) holder; hmat->factorize(); std::pair p = hmat->compressionRatio(); std::cout << "Compression ratio = " << (100. * p.first) / p.second << "%" << std::endl; return 0; } template class E> static int finalize() { HMatInterface::finalize(); return 0; } template class E> static int full_gemm(char transA, char transB, int mc, int nc, void* c, void* alpha, void* a, hmat_matrix_t * holder, void* beta) { DECLARE_CONTEXT; const HMatInterface* b = (HMatInterface*)holder; FullMatrix matC((T*)c, mc, nc); FullMatrix* matA = NULL; if (transA == 'N') { matA = new FullMatrix((T*)a, mc, transB == 'N' ? b->rows->data.n : b->cols->data.n); } else { matA = new FullMatrix((T*)a, transB == 'N' ? b->rows->data.n : b->cols->data.n, mc); } HMatInterface::gemm(matC, transA, transB, *((T*)alpha), *matA, *b, *((T*)beta)); delete matA; return 0; } template class E> static int gemm(char trans_a, char trans_b, void *alpha, hmat_matrix_t * holder, hmat_matrix_t * holder_b, void *beta, hmat_matrix_t * holder_c) { DECLARE_CONTEXT; HMatInterface* hmat_a = (HMatInterface*)holder; HMatInterface* hmat_b = (HMatInterface*)holder_b; HMatInterface* hmat_c = (HMatInterface*)holder_c; hmat_c->gemm(trans_a, trans_b, *((T*)alpha), hmat_a, hmat_b, *((T*)beta)); return 0; } template class E> static int gemv(char trans_a, void* alpha, hmat_matrix_t * holder, void* vec_b, void* beta, void* vec_c, int nrhs) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*)holder; const ClusterData* bData = (trans_a == 'N' ? &hmat->cols->data : &hmat->rows->data); const ClusterData* cData = (trans_a == 'N' ? &hmat->rows->data : &hmat->cols->data); FullMatrix mb((T*) vec_b, bData->n, nrhs); FullMatrix mc((T*) vec_c, cData->n, nrhs); hmat->gemv(trans_a, *((T*)alpha), mb, *((T*)beta), mc); return 0; } template class E> static int init() { return HMatInterface::init(); } template class E> static double norm(hmat_matrix_t* holder) { DECLARE_CONTEXT; return ((HMatInterface*)holder)->norm(); } template class E> static int scale(void *alpha, hmat_matrix_t* holder) { DECLARE_CONTEXT; ((HMatInterface*)holder)->scale(*((T*)alpha)); return 0; } template class E> static int solve_mat(hmat_matrix_t* hmat, hmat_matrix_t* hmatB) { ((HMatInterface*)hmat)->solve(*(HMatInterface*)hmatB); return 0; } template class E> static int solve_systems(hmat_matrix_t* holder, void* b, int nrhs) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*)holder; FullMatrix mb((T*) b, hmat->cols->data.n, nrhs); hmat->solve(mb); return 0; } template class E> static int transpose(hmat_matrix_t* hmat) { DECLARE_CONTEXT; ((HMatInterface*)hmat)->transpose(); return 0; } template class E> static int hmat_get_info(hmat_matrix_t* holder, hmat_info_t* info) { DECLARE_CONTEXT; HMatInterface* hmat = (HMatInterface*) holder; std::pair p = hmat->compressionRatio(); info->compressed_size = p.first; info->uncompressed_size = p.second; info->nr_block_clusters = hmat->nodesCount(); return 0; } template class E> static void createCInterface(hmat_interface_t * i) { i->assemble = assemble; i->assemble_factor = assemble_factor; i->assemble_simple_interaction = assemble_simple_interaction; i->copy = copy; i->create_empty_hmatrix = create_empty_hmatrix; i->destroy = destroy; i->factor = factor; i->finalize = finalize; i->full_gemm = full_gemm; i->gemm = gemm; i->gemv = gemv; i->init = init; i->norm = norm; i->scale = scale; i->solve_mat = solve_mat; i->solve_systems = solve_systems; i->transpose = transpose; i->internal = NULL; i->hmat_get_info = hmat_get_info; } #endif // _C_WRAPPING_HPP hmat-oss-1.0.4/src/cluster_tree.cpp000066400000000000000000000235441245574051100172560ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "config.h" /*! \file \ingroup HMatrix \brief Spatial cluster tree implementation. */ #include "tree.hpp" #include "cluster_tree.hpp" #include "common/my_assert.h" #include "common/context.hpp" #include #include using namespace std; size_t maxElementsPerBlock = 10000000; bool ClusterData::operator==(const ClusterData& o) const { // Attention ! On ne fait pas de verification sur les indices, pour // pouvoir parler d'egalite entre des noeuds ayant ete generes // differemment (differentes matrices). return (offset == o.offset) && (n == o.n); } bool ClusterData::isSubset(const ClusterData& o) const { return (offset >= o.offset) && (offset + n <= o.offset + o.n); } bool ClusterData::isSuperSet(const ClusterData& o) const { return o.isSubset(*this); } bool ClusterData::intersects(const ClusterData& o) const { size_t start = max(offset, o.offset); size_t end = min(offset + n, o.offset + o.n); return (end > start); } const ClusterData* ClusterData::intersection(const ClusterData& o) const { if (!intersects(o)) { return NULL; } size_t start = max(offset, o.offset); size_t end = min(offset + n, o.offset + o.n); size_t interN = end - start; ClusterData* result = new ClusterData(*this); result->offset = start; result->n = interN; return result; } void ClusterData::computeBoundingBox(Point boundingBox[2]) const { int* myIndices = indices + offset; Point tmp = (*points)[myIndices[0]]; Point minPoint(tmp.x, tmp.y, tmp.z), maxPoint(tmp.x, tmp.y, tmp.z); for (int i = 0; i < n; i++) { int index = myIndices[i]; const Point& p = (*points)[index]; for (int dim = 0; dim < 3; dim++) { minPoint.xyz[dim] = min(minPoint.xyz[dim], p.xyz[dim]); maxPoint.xyz[dim] = max(maxPoint.xyz[dim], p.xyz[dim]); } } boundingBox[0] = minPoint; boundingBox[1] = maxPoint; } // ClusterTree double ClusterTree::eta = 2.; ClusterTree::ClusterTree(Point _boundingBox[2], const ClusterData& _data, int _threshold = 100) : Tree<2>(NULL), data(_data), threshold(_threshold) { DECLARE_CONTEXT; boundingBox[0] = _boundingBox[0]; boundingBox[1] = _boundingBox[1]; memset(&childrenBoundingBoxes, 0, sizeof(Point) * 2 * 2); myAssert(data.n > 0); } /* Implemente la condition d'admissibilite des bounding box. */ bool ClusterTree::isAdmissibleWith(const ClusterTree* other, double eta) const { size_t elements = ((size_t) data.n) * other->data.n; if (elements > maxElementsPerBlock) { return false; } return min(diameter(), other->diameter()) <= ClusterTree::eta * distanceTo(other); } double ClusterTree::getEta(const ClusterTree* other) const { return min(diameter(), other->diameter()) / distanceTo(other); } double ClusterTree::diameter() const { return boundingBox[0].distanceTo(boundingBox[1]); } double ClusterTree::distanceTo(const ClusterTree* other) const { double result = 0.; double difference = 0.; difference = max(0., boundingBox[0].x - other->boundingBox[1].x); result += difference * difference; difference = max(0., other->boundingBox[0].x - boundingBox[1].x); result += difference * difference; difference = max(0., boundingBox[0].y - other->boundingBox[1].y); result += difference * difference; difference = max(0., other->boundingBox[0].y - boundingBox[1].y); result += difference * difference; difference = max(0., boundingBox[0].z - other->boundingBox[1].z); result += difference * difference; difference = max(0., other->boundingBox[0].z - boundingBox[1].z); result += difference * difference; return sqrt(result); } void ClusterTree::divide() { DECLARE_CONTEXT; myAssert(isLeaf()); if (data.n <= (size_t) threshold) { return; } int dim = largestDimension(); sortByDimension(dim); int separatorIndex = findSeparatorIndex(); ClusterData leftData(data.indices, data.offset, separatorIndex, data.points); ClusterData rightData(data.indices, data.offset + separatorIndex, data.n - separatorIndex, data.points); leftData.computeBoundingBox(childrenBoundingBoxes[0]); rightData.computeBoundingBox(childrenBoundingBoxes[1]); ClusterTree *leftChild = this->make(childrenBoundingBoxes[0], leftData, threshold); ClusterTree *rightChild = this->make(childrenBoundingBoxes[1], rightData, threshold); strongAssert(leftChild); strongAssert(rightChild); leftChild->depth = depth + 1; rightChild->depth = depth + 1; insertChild(0, leftChild); insertChild(1, rightChild); leftChild->divide(); rightChild->divide(); } ClusterTree* ClusterTree::copy(const ClusterTree* copyFather) const { ClusterTree* result = NULL; if (!copyFather) { // La racine doit s'occuper le tableau des points et le mapping. myAssert(data.n == data.points->size()); size_t n = data.points->size(); int* copyIndices = new int[n]; memcpy(copyIndices, data.indices, sizeof(int) * n); vector* copyPoints = new vector(n); *copyPoints = *data.points; ClusterData rootData(copyIndices, 0, n, copyPoints); result = this->make((Point*) boundingBox, rootData, threshold); copyFather = result; } else { ClusterData copyData(copyFather->data.indices, data.offset, data.n, copyFather->data.points); result = this->make((Point*) boundingBox, copyData, threshold); } if (!isLeaf()) { result->insertChild(0, ((ClusterTree*) getChild(0))->copy(copyFather)); result->insertChild(1, ((ClusterTree*) getChild(1))->copy(copyFather)); } return result; } // GeometricBisectionClusterTree int GeometricBisectionClusterTree::findSeparatorIndex() const { int dim = largestDimension(); double middle = .5 * (boundingBox[0].xyz[dim] + boundingBox[1].xyz[dim]); int middleIndex = 0; int* myIndices = data.indices + data.offset; while ((*data.points)[myIndices[middleIndex]].xyz[dim] < middle) { middleIndex++; } return middleIndex; } ClusterTree* GeometricBisectionClusterTree::make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const { return new GeometricBisectionClusterTree(_boundingBox, _data, _threshold); } // MedianBisectionClusterTree int MedianBisectionClusterTree::findSeparatorIndex() const { return data.n / 2; } ClusterTree* MedianBisectionClusterTree::make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const { return new MedianBisectionClusterTree(_boundingBox, _data, _threshold); } int ClusterTree::largestDimension() const { int maxDim = -1; double maxSize = -1; for (int i = 0; i < 3; i++) { double size = (boundingBox[1].xyz[i] - boundingBox[0].xyz[i]); if (size > maxSize) { maxSize = size; maxDim = i; } } return maxDim; } void ClusterTree::sortByDimension(int dim) { int* myIndices = data.indices + data.offset; switch (dim) { case 0: sort(myIndices, myIndices + data.n, IndicesComparator<0>(data)); break; case 1: sort(myIndices, myIndices + data.n, IndicesComparator<1>(data)); break; case 2: sort(myIndices, myIndices + data.n, IndicesComparator<2>(data)); break; default: strongAssert(false); } } static double volume(const Point boundingBox[2]) { double result = 1.; for (int dim = 0; dim < 3; dim++) { result *= (boundingBox[1].xyz[dim] - boundingBox[0].xyz[dim]); } return result; } const double thresholdRatio = .8; int HybridBisectionClusterTree::findSeparatorIndex() const { // Change de version de decoupage en fonction de la taille realtive des // boites. On essaie de prendre le decoupage selon la mediane. Si celui-ci // donne une reduction trop faible de la taille des boites englobantes, alors // on passe sur le critere geometrique. int index = data.n / 2; Point leftBoundingBox[2], rightBoundingBox[2]; { ClusterData leftData(data.indices, data.offset, index, data.points); ClusterData rightData(data.indices, data.offset + index, data.n - index, data.points); leftData.computeBoundingBox(leftBoundingBox); rightData.computeBoundingBox(rightBoundingBox); } double currentVolume = volume(boundingBox); double leftVolume = volume(leftBoundingBox); double rightVolume = volume(rightBoundingBox); double maxRatio = max(rightVolume / currentVolume, leftVolume / currentVolume); if (maxRatio > thresholdRatio) { int dim = largestDimension(); double middle = .5 * (boundingBox[0].xyz[dim] + boundingBox[1].xyz[dim]); int middleIndex = 0; int* myIndices = data.indices + data.offset; while ((*data.points)[myIndices[middleIndex]].xyz[dim] < middle) { middleIndex++; } return middleIndex; } else { return index; } } ClusterTree* HybridBisectionClusterTree::make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const { return new HybridBisectionClusterTree(_boundingBox, _data, _threshold); } hmat-oss-1.0.4/src/cluster_tree.hpp000066400000000000000000000233571245574051100172650ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Spatial cluster tree for the Dofs. */ #ifndef _CLUSTER_TREE_HPP #define _CLUSTER_TREE_HPP #include "tree.hpp" #include #include extern size_t maxElementsPerBlock; /*! \brief Class representing a 3D point. */ class Point { public: union { struct { double x, y, z; /// Coordinates }; double xyz[3]; /// Idem }; Point(double _x = 0., double _y = 0., double _z = 0.) : x(_x), y(_y), z(_z) {} /*! \brief Return d(this, other) */ inline double distanceTo(const Point &other) const { double result = 0.; double difference = 0.; difference = x - other.x; result += difference * difference; difference = y - other.y; result += difference * difference; difference = z - other.z; result += difference * difference; return sqrt(result); } }; /*! \brief Data held by a node of a \a ClusterTree. */ class ClusterData { public: /// Global indices array int *indices; /// offset of the start of this node's data in indices size_t offset; /// length of this node's data in indices size_t n; const std::vector* points; public: /*! \brief Compare two indices set for equality. */ bool operator==(const ClusterData& o) const; /*! \brief Return true if this is a subset of o. \param o ClusterData \return true if this is a subset of o */ bool isSubset(const ClusterData& o) const; /*! \brief Return true if this is a strict subset of o. */ bool isStrictSubset(const ClusterData& o) const { return isSubset(o) && (!(*this == o)); }; /*! \brief Return o.isSubset(*this) */ bool isSuperSet(const ClusterData& o) const; /*! \brief Return o.isStrictSubset(*this) */ bool isStrictSuperSet(const ClusterData& o) const { return isSuperSet(o) && (!(*this == o)); }; /** Return true if two index sets intersect. */ bool intersects(const ClusterData& o) const; /** Return the intersection of two index sets, NULL if the intersection is empty. */ const ClusterData* intersection(const ClusterData& o) const; /** Compute the bounding box of a ClusterData set. */ void computeBoundingBox(Point boundingBox[2]) const; ClusterData(int* _indices, size_t _offset, size_t _n, const std::vector* _points) : indices(_indices), offset(_offset), n(_n), points(_points) {} }; /*! \brief Comparateur pour deux indices selon une de leur coordonnee. On utilise les templates pour eviter un trop grand cout a l'execution ou le copier-coller, avec pour le parametre du template : - 0 pour une comparaison selon x - 1 pour une comparaison selon y - 2 pour une comparaison selon z */ template class IndicesComparator { private: ClusterData& data; public: IndicesComparator(ClusterData& _data) : data(_data) {} bool operator() (int i, int j) { return (*data.points)[i].xyz[N] < (*data.points)[j].xyz[N]; } }; /*! \brief Abstract Base Class for the Cluster trees. This class defines the index set division of a problem. Several divisions being possible, this is an abstract class. It however only allows bisection of the index set. \warning During the tree creation, it is mandatory to call ClusterTree::divide() on the newly created tree, to really create the tree structure. */ class ClusterTree : public Tree<2> { public: /*! min and max points */ Point boundingBox[2]; /*! Admissibility criteria for the tree nodes. */ static double eta; /*! Data */ ClusterData data; protected: /*! Bounding boxes of the children */ Point childrenBoundingBoxes[2][2]; /*! Max number of Dofs in a leaf */ int threshold; public: /*! \brief Create a leaf. \param _boundingBox bounding box of this leaf \param clusterData data held by this leaf \param _threshold max number of indices in a leaf. Used for the recursive division. */ ClusterTree(Point _boundingBox[2], const ClusterData& _data, int _threshold); /*! \brief Returns true if 2 nodes are admissible together. This is used for the tree construction on which we develop the HMatrix. Two leaves are admissible if they satisfy the criterion allowing the compression of the resulting matrix block. In the default implementation in the base class, the criterion kept is: min (diameter (), other-> diameter ()) <= eta * distanceTo (other); There is also a criterion of size on the resulting block, which should not exceed 10000000 elements (160MB for double complex precision). TODO: making the criterion adjustable. \param other the other node of the couple. \param eta a parameter used in the evaluation of the admissibility. \return true if 2 nodes are admissible. */ bool isAdmissibleWith(const ClusterTree* other, double eta = 10.) const; /*! \brief Returns the admissibility parameter eta corresponding to two clusters. As described in \a ClusterTree::isAdmissibleWith documentation, this criteria is defined by: eta = min(diameter(), other->diameter()) / distanceTo(other); The lower it is, the farther away are the two boxes. It is thus linked with the compression ratio one can expect from a block, the lower eta is, the more the block can be compressed. It can be used as a parameter in a crude a-priori work estimation method. */ double getEta(const ClusterTree* other) const; /*! \brief Divide the tree to create its final structure. \warning It is mandatory to call this function before using a newly created tree. */ void divide(); /*! \brief Return a copy to this. */ ClusterTree* copy(const ClusterTree* copyFather=NULL) const; protected: /*! Diameter of the node */ double diameter() const; /*! Distance to an other node */ double distanceTo(const ClusterTree* other) const; /*! Returns true if a point is inside a given children bounding box. \param p The points \param index the index of the children bounding box to consider. */ inline bool isPointInBox(const Point p, int boxIndex) const { Point box[2] = {childrenBoundingBoxes[boxIndex][0], childrenBoundingBoxes[boxIndex][1]}; return ((p.x >= box[0].x) && (p.x <= box[1].x) && (p.y >= box[0].y) && (p.y <= box[1].y) && (p.z >= box[0].z) && (p.z <= box[1].z)); } /** Return the largest dimension of the node bounding box. */ int largestDimension() const; /** Sort the ClusterData indices of this node according to a given dimension. \parak dim The dimension. */ void sortByDimension(int dim); /** Find the index of the separation between the two sons. */ virtual int findSeparatorIndex() const = 0; /*! \brief Make a new ClusterTree*. This function has the same arguments as \a ClusterTree::ClusterTree(). It is used to make an instance of the right derived type. */ virtual ClusterTree* make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const = 0; }; /*! \brief Creating tree by geometric binary division according to the largest dimension. For this tree, if a leaf has too many DOFs, the division is done by dividing the bounding box in the middle of its biggest dimension. The boxes of children are resized to the real dimension of the DOFs. This method ensures that both 2 children won't be empty, but not the equal repartition of DOFs on both sides of the boundary. */ class GeometricBisectionClusterTree : public ClusterTree { public: GeometricBisectionClusterTree(Point _boundingBox[2], const ClusterData& _data, int _threshold) : ClusterTree(_boundingBox, _data, _threshold) {} protected: int findSeparatorIndex() const; ClusterTree* make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const; }; /*! \brief Creating tree by median division. For the division of a leaf, we chose the biggest dimension of bounding box and divide it according to this axis. The DOFs are sorted by the increasing order on this direction, and are divided in the median of this axis. This method ensures that the children will have equal number of DOFs, but desn't respect their size criterion. The bounding boxes of the children are resized to the necessary size. */ class MedianBisectionClusterTree : public ClusterTree { public: MedianBisectionClusterTree(Point _boundingBox[2], const ClusterData& _data, int _threshold) : ClusterTree(_boundingBox, _data, _threshold) {} protected: int findSeparatorIndex() const; ClusterTree* make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const; }; class HybridBisectionClusterTree : public ClusterTree { public: HybridBisectionClusterTree(Point _boundingBox[2], const ClusterData& _data, int _threshold) : ClusterTree(_boundingBox, _data, _threshold) {} protected: int findSeparatorIndex() const; ClusterTree* make(Point _boundingBox[2], const ClusterData& _data, int _threshold) const; }; #endif hmat-oss-1.0.4/src/common/000077500000000000000000000000001245574051100153325ustar00rootroot00000000000000hmat-oss-1.0.4/src/common/chrono.h000066400000000000000000000045011245574051100167730ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _CHRONO_H #define _CHRONO_H #include "config.h" #include /* We use the realtime extension of libc */ #ifdef HAVE_TIME_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_RESOURCE_H #include #endif #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #endif #if defined(_WIN32) && \ !(defined (__MINGW64__) || defined(__MINGW32__)) struct my_timespec { int64_t tv_sec; int64_t tv_nsec; }; typedef struct my_timespec Time; #else typedef struct timespec Time; #endif // Windows inline static Time now() { Time result; #ifdef _WIN32 LARGE_INTEGER frequency; LARGE_INTEGER value; double dTime; QueryPerformanceFrequency(&frequency); QueryPerformanceCounter(&value); dTime = ((double) value.QuadPart) / ((double) frequency.QuadPart); result.tv_sec = (int64_t) dTime; result.tv_nsec = (int64_t) (1.e9 * (dTime - result.tv_sec)); if (result.tv_nsec >= 1000000000) { result.tv_sec += 1; result.tv_nsec = 0; } #else #ifdef HAVE_LIBRT clock_gettime(CLOCK_MONOTONIC, &result); #else struct rusage temp; getrusage(RUSAGE_SELF, &temp); result.tv_sec = (time_t) (temp.ru_utime.tv_sec); /* seconds */ result.tv_nsec = (long) (1000 * temp.ru_utime.tv_usec); /* uSecs */ #endif #endif /* _WIN32 */ return result; } inline static int64_t time_diff_in_nanos(Time tick, Time tock) { return (tock.tv_sec - tick.tv_sec) * 1000000000 + (tock.tv_nsec - tick.tv_nsec); } #endif hmat-oss-1.0.4/src/common/context.cpp000066400000000000000000000133331245574051100175250ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "hmat/config.h" #include "context.hpp" #include "my_assert.h" #include #include #include #include #include #include namespace trace { int (*nodeIndexFunction)() = NULL; /** \brief Set the function used to get the root index. \return a nuber between 0 (not in a parallel region) and the number of workers (included). */ static int currentNodeIndex() { return (nodeIndexFunction ? nodeIndexFunction() : -1) + 1; } void setNodeIndexFunction(int (*nodeIndexFunc)()) { nodeIndexFunction = nodeIndexFunc; } bool Node::enabled = true; UM_NS::unordered_map Node::currentNodes[MAX_ROOTS]; void* Node::enclosingContext[MAX_ROOTS] = {}; Node::Node(const char* _name, Node* _parent) : name(_name), data(), parent(_parent), children() {} Node::~Node() { for (std::vector::iterator it = children.begin(); it != children.end(); ++it) { delete *it; } } void Node::enterContext(const char* name) { Node* current = currentNode(); myAssert(current); Node* child = current->findChild(name); int index = currentNodeIndex(); void* enclosing = enclosingContext[index]; if (!child) { child = new Node(name, current); current->children.push_back(child); } myAssert(child); currentNodes[index][enclosing] = child; current = child; current->data.lastEnterTime = now(); current->data.n += 1; } void Node::leaveContext() { int index = currentNodeIndex(); void* enclosing = enclosingContext[index]; Node* current = currentNodes[index][enclosing]; myAssert(current); current->data.totalTime += time_diff_in_nanos(current->data.lastEnterTime, now()); if (!(current->parent)) { std::cout << "Warning! Closing root node." << std::endl; } else { currentNodes[index][enclosing] = current->parent; } } void* Node::currentReference() { return (void*) Node::currentNode(); } void Node::setEnclosingContext(void* enclosing) { int index = currentNodeIndex(); enclosingContext[index] = enclosing; } void Node::incrementFlops(int64_t flops) { currentNode()->data.totalFlops += flops; } void Node::startComm() { currentNode()->data.lastCommInitiationTime = now(); } void Node::endComm() { Node* current = currentNode(); current->data.totalCommTime += time_diff_in_nanos(current->data.lastEnterTime, now()); } Node* Node::findChild(const char* name) const { for (std::vector::const_iterator it = children.begin(); it != children.end(); ++it) { // On cherche la correspondance avec le pointeur. Puisqu'on demande que // tous les noms soient des pointeurs qui existent tout le long de // l'execution, on peut forcer l'unicite. if ((*it)->name == name) { return *it; } } return NULL; } void Node::dump(std::ofstream& f) const { f << "{" << "\"name\": \"" << name << "\", " << "\"id\": \"" << this << "\", " << "\"n\": " << data.n << ", " << "\"totalTime\": " << data.totalTime / 1e9 << ", " << "\"totalFlops\": " << data.totalFlops << ", " << "\"totalBytesSent\": " << data.totalBytesSent << ", " << "\"totalBytesReceived\": " << data.totalBytesReceived << ", " << "\"totalBytesReceived\": " << data.totalBytesReceived << ", " << "\"totalCommTime\": " << data.totalCommTime / 1e9 << "," << std::endl; f << "\"children\": ["; std::string delimiter(""); for (std::vector::const_iterator it = children.begin(); it != children.end(); ++it) { f << delimiter; (*it)->dump(f); delimiter = ", "; } f << "]}"; } void Node::jsonDump(const char* filename) { std::ofstream f(filename); f << "["; std::string delimiter(""); for (int i = 0; i < MAX_ROOTS; i++) { if (!currentNodes[i].empty()) { UM_NS::unordered_map::iterator p = currentNodes[i].begin(); for(; p != currentNodes[i].end(); ++p) { Node* root = p->second; f << delimiter << std::endl; root->dump(f); delimiter = ", "; } } } f << std::endl << "]" << std::endl; } /** Find the current node, allocating one if necessary. */ Node* Node::currentNode() { int id = currentNodeIndex(); void* enclosing = enclosingContext[id]; UM_NS::unordered_map::iterator it = currentNodes[id].find(enclosing); Node* current; if (it == currentNodes[id].end()) { char *name = const_cast("root"); if (id != 0) { name = strdup("Worker #XXX - 0xXXXXXXXXXXXXXXXX"); // Worker ID - enclosing strongAssert(name); sprintf(name, "Worker #%03d - %p", id, enclosing); } current = new Node(name, NULL); currentNodes[id][enclosing] = current; } else { current = it->second; } return current; } } hmat-oss-1.0.4/src/common/context.hpp000066400000000000000000000120351245574051100175300ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Context manager used for the tracing functionnality. */ #ifndef _CONTEXT_HPP #define _CONTEXT_HPP #include "hmat/config.h" #include "common/chrono.h" #include #include #if (__cplusplus > 199711L) || defined(HAVE_CPP11) || defined(_MSC_VER) #include #define UM_NS std #else #include #define UM_NS std::tr1 #endif namespace trace { class NodeData { public: unsigned int n; int64_t totalTime; // ns int64_t totalFlops; int totalBytesSent; int totalBytesReceived; int64_t totalCommTime; Time lastEnterTime; Time lastCommInitiationTime; }; #ifndef MAX_ROOTS #define MAX_ROOTS 128 #endif /** Set the function tasked with returning the current root index. The function set this way is used to determine to which tree the current tracing context belongs to. By default, a single context is assumed, and this behavior can be restored by setting the fuction to NULL. \param nodeIndexFunc a function returning a worker index, which is: - 0 in non-parallel parts of the code - between 1 and n_workers (included) in parallel regions */ void setNodeIndexFunction(int (*nodeIndexFunc)()); class Node { public: /// True if the tracing is enabled. True by default. static bool enabled; private: /// Unique name for the context. const char* name; /// Tracing data associated with this node. NodeData data; /// Parent node. NULL for a root. Node* parent; /// Ordered list of children nodes. std::vector children; /// List of trace trees. static UM_NS::unordered_map currentNodes[MAX_ROOTS]; // TODO: padding to avoid false sharing ? static void* enclosingContext[MAX_ROOTS]; // TODO: padding to avoid false sharing ? public: /** Enter a context noted by a name. */ static void enterContext(const char* name); /** Leave the current context. */ static void leaveContext(); /** Get a unique reference to the current context. */ static void* currentReference(); /** Set the current as being relative to a given enclosong one. The enclosing context is identified through the pointer returned by \a currentReference(). */ static void setEnclosingContext(void* enclosing); static void enable() {enabled = true;} static void disable() {enabled = false;} static void incrementFlops(int64_t flops); static void startComm(); static void endComm(); /** Dumps the trace trees to a JSON file. */ static void jsonDump(const char* filename); private: Node(const char* _name, Node* _parent); ~Node(); Node* findChild(const char* name) const; void dump(std::ofstream& f) const; static Node* currentNode(); }; } class DisableContextInBlock { private: bool enabled; public: DisableContextInBlock() { enabled = trace::Node::enabled; trace::Node::disable(); } ~DisableContextInBlock() { trace::Node::enabled = enabled; } }; #ifdef HAVE_CONTEXT #define DISABLE_CONTEXT_IN_BLOCK DisableContextInBlock dummyDisableContextInBlock #define tracing_set_worker_index_func(f) trace::setNodeIndexFunction(f) #define enter_context(x) trace::Node::enterContext(x) #define leave_context() trace::Node::leaveContext() #define increment_flops(x) trace::Node::incrementFlops(x) #define tracing_dump(x) trace::Node::jsonDump(x) #else #define tracing_set_worker_index_func(f) do {} while (0) #define enter_context(x) do {} while(0) #define leave_context() do {} while(0) #define increment_flops(x) do { (void)(x); } while(0) #define tracing_dump(x) do {} while(0) #define DISABLE_CONTEXT_IN_BLOCK do {} while (0) #endif /*! \brief Simple wrapper around enter/leave_context() to avoid having to put leave_context() before each return statement. */ class Context { public: Context(const char* name) { enter_context(name); } ~Context() { leave_context(); } }; #if defined(__GNUC__) #define DECLARE_CONTEXT Context __reserved_ctx(__PRETTY_FUNCTION__) #elif defined(_MSC_VER) #define DECLARE_CONTEXT Context __reserved_ctx(__FUNCTION__) #else #define DECLARE_CONTEXT Context __reserved_ctx(__func__) #endif #endif hmat-oss-1.0.4/src/common/data_recorder.hpp000066400000000000000000000076231245574051100206510ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _DATA_RECORDER_HPP #define _DATA_RECORDER_HPP #include #include #include #include #include #include template std::ostream& operator<<(std::ostream& os, const std::pair& p) { os << p.first << " " << p.second; return os; } /** Helper class to record arbitrary timed data to a file. Data points are provided through the \a record() method adding a timestamped record to an in-memory array. It can then be dumped to a text file, provided that the data point has a suitable operator<<() defined. The user can also record tags, which are timestamped strings, saved to a different file. The time delta are relative to the first record() call. */ template class TimedDataRecorder { typedef std::chrono::time_point TimePoint; typedef std::pair DataPoint; typedef std::pair Tag; std::deque data; std::deque tags; std::mutex mutex; public: /** Add a data point to the record. \param value data point */ void record(const T& value) { TimePoint now = std::chrono::high_resolution_clock::now(); data.push_back(std::make_pair(now, value)); } /** Synchronized version of \a TimedDataRecorder::record(). */ void recordSynchronized(const T& value) { std::lock_guard guard(mutex); record(value); } void tag(const std::string& tag) { TimePoint now = std::chrono::high_resolution_clock::now(); tags.push_back(std::make_pair(now, tag)); } /** Synchronized version of \a TimedDataRecorder::tag(). */ void tagSynchronized(const T& value) { std::lock_guard guard(mutex); tag(value); } /** Dump the timestamped records to a file. The format is: timestamp_in_nsrecord\n \param filename Name of the output file. The markers are put in filename.tags \param delimiter field delimiter, space by default */ void toFile(const char* filename, const char* delimiter=" ") { typedef std::chrono::nanoseconds nanos; if (data.size() == 0 && tags.size() == 0) return; bool hasData = data.size() > 0; bool hasTags = tags.size() > 0; auto origin = TimePoint(); if (hasData && hasTags) { origin = std::min(tags[0].first, data[0].first); } else if (hasData) { origin = data[0].first; } else if (hasTags) { origin = tags[0].first; } { std::ofstream file(filename); for (auto it : data) { size_t offsetNano = std::chrono::duration_cast((it.first - origin)).count(); file << std::scientific << offsetNano << delimiter << it.second << "\n"; } } { std::string name(filename); name += ".tags"; std::ofstream file(name.c_str()); for (auto it : tags) { size_t offsetNano = std::chrono::duration_cast((it.first - origin)).count(); file << std::scientific << offsetNano << delimiter << it.second << "\n"; } } } }; #endif hmat-oss-1.0.4/src/common/memory_instrumentation.cpp000066400000000000000000000041111245574051100226660ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Memory Allocation tracking. */ #if ((__cplusplus > 199711L) || defined(HAVE_CPP11)) && HAVE_MEM_INSTR #include "memory_instrumentation.hpp" #include "data_recorder.hpp" #include "../system_types.h" #include namespace mem_instr { static size_t get_res_mem() { size_t resident = 0; #ifdef __linux__ FILE * statm_file = fopen("/proc/self/statm", "r"); fscanf(statm_file, "%*s %lu", &resident); fclose(statm_file); #endif return resident; } struct Event { ptrdiff_t size; size_t rss; Event(ptrdiff_t size) : size(size) { rss = get_res_mem(); } }; std::ostream& operator<< (std::ostream& stream, const Event & event) { return stream << event.size << " " << event.rss; } TimedDataRecorder allocs; /// True if the memory tracking is enabled. static bool enabled = false; void addAlloc(void* ptr, ptrdiff_t size) { if (!enabled) { return; } allocs.recordSynchronized(Event(size)); } void toFile(const std::string& filename) { allocs.toFile(filename.c_str()); } void enable() { enabled = true; } void disable() { enabled = false; } } #endif // __cplusplus > 199711L hmat-oss-1.0.4/src/common/memory_instrumentation.hpp000066400000000000000000000043141245574051100227000ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Memory Allocation tracking. */ #ifndef _MEMORY_INSTRUMENTATION_H #define _MEMORY_INSTRUMENTATION_H // We use things from C++11, so test for the standard level. #if ((__cplusplus > 199711L) || defined(HAVE_CPP11)) && HAVE_MEM_INSTR #include #include /*! \brief Memory Tracking. This system is only suited for the specific purpose of the \a HMatrix code. */ namespace mem_instr { /*! \brief Add an allocation to the tracking. \param size positive or negative integer, size in bytes. */ void addAlloc(void* ptr, ptrdiff_t size); /*! \brief Dumps the data to filename. */ void toFile(const std::string& filename); void enable(); void disable(); } #define REGISTER_ALLOC(ptr, size) mem_instr::addAlloc(ptr, +(size)) #define REGISTER_FREE(ptr, size) mem_instr::addAlloc(ptr, -(size)) #define MEMORY_INSTRUMENTATION_TO_FILE(filename) mem_instr::toFile(filename) #define MEMORY_INSTRUMENTATION_ENABLE mem_instr::enable() #define MEMORY_INSTRUMENTATION_DISABLE mem_instr::disable() #else #define REGISTER_ALLOC(ptr, size) do {} while (0) #define REGISTER_FREE(ptr, size) do { (void)ptr; (void)size; } while (0) #define MEMORY_INSTRUMENTATION_TO_FILE(filename) do {} while (0) #define MEMORY_INSTRUMENTATION_ENABLE do {} while (0) #define MEMORY_INSTRUMENTATION_DISABLE do {} while (0) #endif // __cplusplus > 199711L #endif hmat-oss-1.0.4/src/common/my_assert.h000066400000000000000000000020161245574051100175100ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _MY_ASSERT_H #define _MY_ASSERT_H #include #define strongAssert(x) assert(x) #ifdef HMAT_NO_ASSERT #define myAssert(x) #else #define myAssert(x) assert(x) #endif #endif hmat-oss-1.0.4/src/compression.cpp000066400000000000000000000620731245574051100171170ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifdef __INTEL_COMPILER #include #else #include #endif #include "compression.hpp" #include #include #include #include "cluster_tree.hpp" #include "interaction.hpp" #include "rk_matrix.hpp" #include "lapack_operations.hpp" #include "lapack_overloads.hpp" #include "full_matrix.hpp" #include "common/context.hpp" #include "common/my_assert.h" #ifdef _MSC_VER // Intel compiler defines isnan in global namespace // MSVC defines _isnan # ifndef __INTEL_COMPILER # define isnan _isnan # endif #elif defined(__MINGW32__) using std::isnan; #endif using std::vector; using std::min; using std::max; /** Convenience class to lighten the getRow() / getCol() calls. */ template class ClusterAssemblyFunction { const AssemblyFunction& f; void* handle; const ClusterData* rows; const ClusterData* cols; public: ClusterAssemblyFunction(const AssemblyFunction& _f, const ClusterData* _rows, const ClusterData* _cols) : f(_f), handle(NULL), rows(_rows), cols(_cols) { f.prepareBlock(rows, cols, &handle); } ~ClusterAssemblyFunction() { f.releaseBlock(handle); } void getRow(int index, Vector::dp>& result) const { f.getRow(rows, cols, index, handle, &result); } void getCol(int index, Vector::dp>& result) const { f.getCol(rows, cols, index, handle, &result); } private: ClusterAssemblyFunction(ClusterAssemblyFunction&o) {}; // No copy }; template static double squaredNorm(const T x); template<> double squaredNorm(const C_t x) { return norm(x); } template<> double squaredNorm(const Z_t x) { return norm(x); } template double squaredNorm(const T x) { return x * x; } template static bool isZero(const Vector& v) { int index = v.absoluteMaxIndex(); return v.v[index] == Constants::zero; } /** \brief Updates a row to reflect its current value in the matrix. \param rowVec the row to update \param row the index of this row \param rows the rows that have already been computed \param cols the cols that have already been computed \params k the number of rows that have already been computed */ template static void updateRow(Vector& rowVec, int row, const vector*>& rows, const vector*>& cols, int k) { for (int l = 0; l < k; l++) { rowVec.axpy(Constants::mone * cols[l]->v[row], rows[l]); } } template static void updateCol(Vector& colVec, int col, const vector*>& cols, const vector*>& rows, int k) { updateRow(colVec, col, cols, rows, k); } template static void findMax(FullMatrix* m, int& i, int& j) { i = 0; j = 0; double maxNorm = squaredNorm(m->get(i, j)); const int cols = m->cols; const int rows = m->rows; for (int col = 0; col < cols; col++) { for (int row = 0; row < rows; row++) { double norm = squaredNorm(m->get(row, col)); if (norm > maxNorm) { i = row; j = col; maxNorm = norm; } } } } /*! \brief Find a column that is free and not null, or return an error. \param block The block assembly function \param colFree an array of the columns that are free to choose from. This array is updated by this function. \param col the column to be returned. It doesn't need to be zeroed beforehand.' \return the index of the chosen column, or -1 if no column can be found. */ template static int findCol(ClusterAssemblyFunction& block, vector& colFree, Vector::dp>& col) { int colCount = colFree.size(); bool found = false; int i; for (i = 0; i < colCount; i++) { if (colFree[i]) { col.clear(); block.getCol(i, col); colFree[i] = false; if (!isZero(col)) { found = true; break; } } } return (found ? i : -1); } template static int findMinRow(ClusterAssemblyFunction& block, vector& rowFree, const vector::dp>*>& aCols, const vector::dp>*>& bCols, const Vector::dp>& aRef, Vector::dp>& row) { int rowCount = aRef.rows; double minNorm2; int i_ref; bool found = false; while (!found) { i_ref = -1; minNorm2 = DBL_MAX; for (int i = 0; i < rowCount; i++) { if (rowFree[i]) { double norm2 = squaredNorm::dp>(aRef.v[i]); if (norm2 < minNorm2) { i_ref = i; minNorm2 = norm2; } } } if (i_ref == -1) { return i_ref; } row.clear(); block.getRow(i_ref, row); updateRow::dp>(row, i_ref, bCols, aCols, aCols.size()); found = !isZero(row); rowFree[i_ref] = false; } return i_ref; } template static int findMinCol(ClusterAssemblyFunction& block, vector& colFree, const vector::dp>*>& aCols, const vector::dp>*>& bCols, const Vector::dp>& bRef, Vector::dp>& col) { int colCount = bRef.rows; double minNorm2; int j_ref; bool found = false; while (!found) { j_ref = -1; minNorm2 = DBL_MAX; for (int j = 0; j < colCount; j++) { if (colFree[j]) { double norm2 = squaredNorm::dp>(bRef.v[j]); if (norm2 < minNorm2) { j_ref = j; minNorm2 = norm2; } } } if (j_ref == -1) { return j_ref; } col.clear(); block.getCol(j_ref, col); updateCol::dp>(col, j_ref, aCols, bCols, bCols.size()); found = !isZero(col); colFree[j_ref] = false; } return j_ref; } template RkMatrix* compressMatrix(FullMatrix* m, const ClusterData* rows, const ClusterData* cols) { DECLARE_CONTEXT; myAssert((size_t) m->rows == rows->n); myAssert((size_t) m->cols == cols->n); myAssert(m->lda >= m->rows); bool zeroMatrix = true; for (size_t col = 0; col < m->cols; col++) { Vector v(m->m + m->rows * col, m->rows); zeroMatrix = zeroMatrix && isZero(v); if (!zeroMatrix) break; } if (zeroMatrix) { return new RkMatrix(NULL, rows, NULL, cols, NoCompression); } // In the case of non-square matrix, we don't calculated singular vectors // bigger than the minimum dimension of the matrix. However this is not // necessary here, since k < min (n, p) for M matrix (nxp). int rowCount = m->rows; int colCount = m->cols; FullMatrix *u = NULL, *vt = NULL; Vector* sigma = NULL; int info = truncatedSvd(m, &u, &sigma, &vt); strongAssert(info == 0); // Control of the approximation int maxK = min(rowCount, colCount); int k = RkMatrix::approx.findK(sigma->v, maxK, RkMatrix::approx.assemblyEpsilon); for (int col = 0; col < k; col++) { for (int row = 0; row < rowCount; row++) { u->get(row, col) *= sigma->v[col]; } } FullMatrix matU(u->m, rowCount, k); FullMatrix* uTilde = matU.copy(); FullMatrix matV(vt->m, k, colCount, maxK); FullMatrix* vTilde = matV.copyAndTranspose(); delete u; delete vt; delete sigma; FullMatrix* a = uTilde; return new RkMatrix(a, rows, vTilde, cols, Svd); } template static RkMatrix::dp>* compressSvd(const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols) { DECLARE_CONTEXT; typedef typename Types::dp dp_t; FullMatrix* m = f.assemble(rows, cols); RkMatrix* result = compressMatrix(m, rows, cols); delete m; return result; } template static RkMatrix::dp>* compressAcaFull(const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols) { DECLARE_CONTEXT; typedef typename Types::dp dp_t; FullMatrix* m = f.assemble(rows, cols); double estimateSquaredNorm = 0; int maxK = min(m->rows, m->cols); if (RkMatrix::approx.k > 0) { maxK = min(maxK, RkMatrix::approx.k); } FullMatrix tmpA(m->rows, maxK); tmpA.clear(); FullMatrix tmpB(m->cols, maxK); tmpB.clear(); int nu; for (nu = 0; nu < maxK; nu++) { int i_nu, j_nu; findMax(m, i_nu, j_nu); dp_t delta = m->get(i_nu, j_nu); if (squaredNorm(delta) == 0.) { break; } // Creation of the vectors A_i_nu and B_j_nu memcpy(tmpA.m + nu * tmpA.rows, m->m + j_nu * m->rows, sizeof(dp_t) * tmpA.rows); for (int j = 0; j < m->cols; j++) { tmpB.get(j, nu) = m->get(i_nu, j) / delta; } FullMatrix a_nu(tmpA.m + nu * tmpA.rows, tmpA.rows, 1); FullMatrix b_nu(tmpB.m + nu * tmpB.rows, tmpB.rows, 1); m->gemm('N', 'T', Constants::mone, &a_nu, &b_nu, Constants::pone); // Update the estimate norm // Let S_{k-1} be the previous estimate. We have (for the Frobenius norm): // ||S_k||^2 = ||S_{k-1}||^2 + \sum_{l = 0}^{nu-1} ( + )) // + ||a_k||^2 ||b_k||^2 { Vector va_nu(tmpA.m + nu * tmpA.rows, tmpA.rows); Vector vb_nu(tmpB.m + nu * tmpB.rows, tmpB.rows); Vector a_l(tmpA.m, tmpA.rows); Vector b_l(tmpB.m, tmpB.rows); // The sum double newEstimate = 0.0; for (int l = 0; l < nu - 1; l++) { a_l.v = tmpA.m + l * tmpA.rows; b_l.v = tmpB.m + l * tmpB.rows; newEstimate += hmat::real(Vector::dot(&va_nu, &a_l) * Vector::dot(&vb_nu, &b_l)); } estimateSquaredNorm += 2.0 * newEstimate; double a_nu_norm = va_nu.norm(); double b_nu_norm = vb_nu.norm(); double ab_norm_2 = (a_nu_norm * a_nu_norm) * (b_nu_norm * b_nu_norm); estimateSquaredNorm += ab_norm_2; // Evaluate the stopping criterion // ||a_nu|| ||b_nu|| < epsilon * ||S_nu|| // <=> ||a_nu||^2 ||b_nu||^2 < epsilon^2 ||S_nu||^2 double epsilon = RkMatrix::approx.assemblyEpsilon; if (ab_norm_2 < epsilon * epsilon * estimateSquaredNorm) { break; } } } delete m; if (nu == 0) { return new RkMatrix(NULL, rows, NULL, cols, AcaFull); } FullMatrix* newA = new FullMatrix(tmpA.rows, nu); newA->clear(); memcpy(newA->m, tmpA.m, sizeof(dp_t) * tmpA.rows * nu); FullMatrix* newB = new FullMatrix(tmpB.rows, nu); newB->clear(); memcpy(newB->m, tmpB.m, sizeof(dp_t) * tmpB.rows * nu); return new RkMatrix(newA, rows, newB, cols, AcaFull); } template static RkMatrix::dp>* compressAcaPartial(const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols) { typedef typename Types::dp dp_t; double estimateSquaredNorm = 0; ClusterAssemblyFunction block(f, rows, cols); const int rowCount = rows->n; const int colCount = cols->n; int maxK = min(rowCount, colCount); // Contains false for the rows that were already used as pivot vector rowFree(rowCount, true); int rowPivotCount = 0; // idem for columns vector colFree(colCount, true); vector*> aCols; vector*> bCols; int I = 0; int J = 0; int k = 0; do { Vector* bCol = new Vector(cols->n); // Calculation of row I and its residue block.getRow(I, *bCol); updateRow(*bCol, I, bCols, aCols, k); rowFree[I] = false; rowPivotCount++; // Find max and argmax of the residue double maxNorm2 = 0.; for (int j = 0; j < colCount; j++) { double norm2 = squaredNorm(bCol->v[j]); if (colFree[j] && norm2 > maxNorm2) { maxNorm2 = norm2; J = j; } } if (bCol->v[J] == Constants::zero) { delete bCol; // We look for another row which has not already been used. I = 0; while (!rowFree[I]) { I++; } } else { // Find pivot and scale column B dp_t pivot = Constants::pone / bCol->v[J]; bCol->scale(pivot); bCols.push_back(bCol); // Compute column J and residue Vector* aCol = new Vector(rows->n); block.getCol(J, *aCol); updateCol(*aCol, J, aCols, bCols, k); colFree[J] = false; aCols.push_back(aCol); // Find max and argmax of the residue maxNorm2 = 0.; for (int i = 0; i < rowCount; i++) { double norm2 = squaredNorm(aCol->v[i]); if (rowFree[i] && norm2 > maxNorm2) { maxNorm2 = norm2; I = i; } } // Update the estimated norm // Let S_{k-1} be the previous estimate. We have (for the Frobenius norm): // ||S_k||^2 = ||S_{k-1}||^2 + \sum_{l = 0}^{nu-1} ( + )) // + ||a_k||^2 ||b_k||^2 double newEstimate = 0.0; for (int l = 0; l < k; l++) { newEstimate += hmat::real(Vector::dot(aCol, aCols[l]) * Vector::dot(bCol, bCols[l])); } estimateSquaredNorm += 2.0 * newEstimate; double aColNorm = aCol->norm(); double bColNorm = bCol->norm(); double ab_norm_2 = (aColNorm * aColNorm) * (bColNorm * bColNorm); estimateSquaredNorm += ab_norm_2; k++; // Evaluate the stopping criterion // ||a_nu|| ||b_nu|| < epsilon * ||S_nu|| // <=> ||a_nu||^2 ||b_nu||^2 < epsilon^2 ||S_nu||^2 double epsilon = RkMatrix::approx.assemblyEpsilon; if (ab_norm_2 < epsilon * epsilon * estimateSquaredNorm) { break; } } } while (rowPivotCount < maxK); FullMatrix *newA, *newB; if (k != 0) { newA = new FullMatrix(rows->n, k); for (int i = 0; i < k; i++) { memcpy(newA->m + (i * newA->rows), aCols[i]->v, sizeof(dp_t) * newA->rows); delete aCols[i]; aCols[i] = NULL; } newB = new FullMatrix(cols->n, k); for (int i = 0; i < k; i++) { memcpy(newB->m + (i * newB->rows), bCols[i]->v, sizeof(dp_t) * newB->rows); delete bCols[i]; bCols[i] = NULL; } } else { // If k == 0, block is only made of zeros. return new RkMatrix(NULL, rows, NULL, cols, AcaPartial); } return new RkMatrix(newA, rows, newB, cols, AcaPartial); } template static RkMatrix::dp>* compressAcaPlus(const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols) { typedef typename Types::dp dp_t; double estimateSquaredNorm = 0; int i_ref, j_ref; int rowCount = rows->n, colCount = cols->n; int maxK = min(rowCount, colCount); Vector bRef(colCount), aRef(rowCount); vector rowFree(rowCount, true), colFree(colCount, true); vector*> aCols, bCols; ClusterAssemblyFunction block(f, rows, cols); j_ref = findCol(block, colFree, aRef); if (j_ref == -1) { // The block is completely zero. return new RkMatrix(NULL, rows, NULL, cols, AcaPlus); } // The reference row is chosen such that it intersects the reference // column at its argmin index. i_ref = findMinRow(block, rowFree, aCols, bCols, aRef, bRef); int k = 0; do { Vector* bVec = new Vector(colCount); Vector* aVec = new Vector(rowCount); int i_star, j_star; dp_t i_star_value, j_star_value; i_star = aRef.absoluteMaxIndex(); i_star_value = aRef.v[i_star]; j_star = bRef.absoluteMaxIndex(); j_star_value = bRef.v[j_star]; if (squaredNorm(i_star_value) > squaredNorm(j_star_value)) { // i_star is fixed, we look for j_star block.getRow(i_star, *bVec); // Calculate the residue updateRow(*bVec, i_star, bCols, aCols, k); j_star = bVec->absoluteMaxIndex(); dp_t pivot = bVec->v[j_star]; strongAssert(pivot != Constants::zero); // Calculate a block.getCol(j_star, *aVec); updateCol(*aVec, j_star, aCols, bCols, k); aVec->scale(Constants::pone / pivot); } else { // j_star is fixed, we look for i_star block.getCol(j_star, *aVec); updateCol(*aVec, j_star, aCols, bCols, k); i_star = aVec->absoluteMaxIndex(); dp_t pivot = aVec->v[i_star]; strongAssert(pivot != Constants::zero); // Calculate b block.getRow(i_star, *bVec); updateRow(*bVec, i_star, bCols, aCols, k); bVec->scale(Constants::pone / pivot); } rowFree[i_star] = false; colFree[j_star] = false; aCols.push_back(aVec); bCols.push_back(bVec); // Update the estimate norm // Let S_{k-1} be the previous estimate. We have (for the Frobenius norm): // ||S_k||^2 = ||S_{k-1}||^2 + \sum_{l = 0}^{nu-1} ( + )) // + ||a_k||^2 ||b_k||^2 double newEstimate = 0.0; for (int l = 0; l < k; l++) { newEstimate += hmat::real(Vector::dot(aVec, aCols[l]) * Vector::dot(bVec, bCols[l])); } estimateSquaredNorm += 2.0 * newEstimate; double aVecNorm = aVec->norm(); double bVecNorm = bVec->norm(); double ab_norm_2 = (aVecNorm * aVecNorm) * (bVecNorm * bVecNorm); estimateSquaredNorm += ab_norm_2; k++; // Evaluate the stopping criterion // ||a_nu|| ||b_nu|| < epsilon * ||S_nu|| // <=> ||a_nu||^2 ||b_nu||^2 < epsilon^2 ||S_nu||^2 double epsilon = RkMatrix::approx.assemblyEpsilon; if (ab_norm_2 < epsilon * epsilon * estimateSquaredNorm) { break; } // Update of a_ref and b_ref aRef.axpy(Constants::mone * bCols[k - 1]->v[j_ref], aCols[k - 1]); bRef.axpy(Constants::mone * aCols[k - 1]->v[i_ref], bCols[k - 1]); bool needNewA = isZero(aRef) || (j_star == j_ref); bool needNewB = isZero(bRef) || (i_star == i_ref); // If the row or the column of reference have been already chosen as pivot, // we can not keep it and then we take one or two others. if (needNewA && needNewB) { bool found = false; while (!found) { aRef.clear(); j_ref = findCol(block, colFree, aRef); // We can not find non-zero column anymore, done! if (j_ref == -1) { break; } updateCol(aRef, j_ref, aCols, bCols, k); found = !isZero(aRef); } if (!found) { break; } bRef.clear(); i_ref = findMinRow(block, rowFree, aCols, bCols, aRef, bRef); // We can not find non-zero line anymore, done! if (i_ref == -1) { break; } } else if (needNewB) { bRef.clear(); i_ref = findMinRow(block, rowFree, aCols, bCols, aRef, bRef); // We can not find non-zero line anymore, done! if (i_ref == -1) { break; } } else if (needNewA) { aRef.clear(); j_ref = findMinCol(block, colFree, aCols, bCols, bRef, aRef); // We can not find non-zero column anymore, done! if (j_ref == -1) { break; } } } while (k < maxK); myAssert(k > 0); FullMatrix* newA = new FullMatrix(rows->n, k); for (int i = 0; i < k; i++) { memcpy(newA->m + (i * newA->rows), aCols[i]->v, sizeof(dp_t) * newA->rows); delete aCols[i]; aCols[i] = NULL; } FullMatrix* newB = new FullMatrix(cols->n, k); for (int i = 0; i < k; i++) { memcpy(newB->m + (i * newB->rows), bCols[i]->v, sizeof(dp_t) * newB->rows); delete bCols[i]; bCols[i] = NULL; } return new RkMatrix(newA, rows, newB, cols, AcaPlus); } #include /* Appele par HMatrix::assemble() */ template RkMatrix::dp>* compress(CompressionMethod method, const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols) { typedef typename Types::dp dp_t; RkMatrix* rk = NULL; // Always compress the smallest blocks using an SVD. Small blocks tend to have // a bad compression ratio anyways, and the SVD is not very costly in this // case. // TODO: allow the setting of the size limit ? if (max(rows->n, cols->n) < 100) { rk = compressSvd(f, rows, cols); } else { switch (method) { case Svd: rk = compressSvd(f, rows, cols); break; case AcaFull: rk = compressAcaFull(f, rows, cols); break; case AcaPartial: rk = compressAcaPartial(f, rows, cols); break; case AcaPlus: rk = compressAcaPlus(f, rows, cols); break; case NoCompression: // Should not happen break; } } if (HMatrix::validateCompression) { FullMatrix* full = f.assemble(rows, cols); if (rk->a) rk->a->checkNan(); if (rk->b) rk->b->checkNan(); FullMatrix* rkFull = rk->eval(); double approxNorm = rkFull->norm(); double fullNorm = full->norm(); // If I meet a NaN, I save & leave // TODO : improve this behaviour if (isnan(approxNorm)) { rkFull->toFile("Rk"); full->toFile("Full"); strongAssert(false); } rkFull->axpy(Constants::mone, full); double diffNorm = rkFull->norm(); if (diffNorm > HMatrix::validationErrorThreshold * fullNorm ) { std::cout << "["<< rows->offset<<","<offset+rows->n -1 <<"]x["<< cols->offset<<","<offset+cols->n-1 <<"]"<< std::endl << std::scientific << "|M| = " << fullNorm << std::endl << "|Rk| = " << approxNorm << std::endl << "|M - Rk| / |Rk| = " << diffNorm / fullNorm << std::endl << "Rank = " << rk->k << " / " << min(full->rows, full->cols) << std::endl << std::endl; if (HMatrix::validationReRun) { // Call compression a 2nd time, for debugging with gdb the work of the compression algorithm... RkMatrix* rk_bis = NULL; if (max(rows->n, cols->n) < 100) { rk_bis = compressSvd(f, rows, cols); } else { switch (method) { case Svd: rk_bis = compressSvd(f, rows, cols); break; case AcaFull: rk_bis = compressAcaFull(f, rows, cols); break; case AcaPartial: rk_bis = compressAcaPartial(f, rows, cols); break; case AcaPlus: rk_bis = compressAcaPlus(f, rows, cols); break; case NoCompression: // Should not happen break; } } delete rk_bis ; } if (HMatrix::validationDump) { std::string filename; std::ostringstream convert; // stream used for the conversion convert << "["<< rows->offset<<","<offset+rows->n -1 <<"]x["<< cols->offset<<","<offset+cols->n-1 <<"]" ; filename = "Rk_"; filename += convert.str(); // set 'Result' to the contents of the stream rkFull->toFile(filename.c_str()); filename = "Full_"+convert.str(); // set 'Result' to the contents of the stream full->toFile(filename.c_str()); } } delete rkFull; delete full; } return rk; } // Declaration of the used templates template RkMatrix* compressMatrix(FullMatrix* m, const ClusterData* rows, const ClusterData* cols); template RkMatrix* compressMatrix(FullMatrix* m, const ClusterData* rows, const ClusterData* cols); template RkMatrix* compressMatrix(FullMatrix* m, const ClusterData* rows, const ClusterData* cols); template RkMatrix* compressMatrix(FullMatrix* m, const ClusterData* rows, const ClusterData* cols); template RkMatrix::dp>* compress(CompressionMethod method, const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols); template RkMatrix::dp>* compress(CompressionMethod method, const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols); template RkMatrix::dp>* compress(CompressionMethod method, const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols); template RkMatrix::dp>* compress(CompressionMethod method, const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols); hmat-oss-1.0.4/src/compression.hpp000066400000000000000000000043051245574051100171160ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _COMPRESSION_HPP #define _COMPRESSION_HPP /* Implementation of the algorithms of blocks compression */ #include "data_types.hpp" /** Choice of the compression method. */ enum CompressionMethod { Svd, AcaFull, AcaPartial, AcaPlus, NoCompression }; // Forward declarations template class AssemblyFunction; template class FullMatrix; template class RkMatrix; class ClusterData; /** Compress a FullMatrix into an RkMatrix. The compression uses the reduced SVD, and the accurarcy is controlled by \a RkMatrix::approx. \param m The matrix to compress. It is modified but not detroyed by the function. \param rows The block rows \param cols The block colums \return A RkMatrix approximationg the argument \a m. */ template RkMatrix* compressMatrix(FullMatrix* m, const ClusterData* rows, const ClusterData* cols); /** Compress a block into an RkMatrix. \param method The compression method \param f The assembly functions used to compute block elements \param rows The block rows \param cols The block colums \return A RkMatrix representation of the rows x cols block. */ template RkMatrix::dp>* compress(CompressionMethod method, const AssemblyFunction& f, const ClusterData* rows, const ClusterData* cols); #endif hmat-oss-1.0.4/src/data_types.cpp000066400000000000000000000036731245574051100167140ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "data_types.hpp" template<> S_t Constants::zero = 0.; template<> D_t Constants::zero = 0.; template<> C_t Constants::zero(0., 0.); template<> Z_t Constants::zero(0., 0.); template<> S_t Constants::pone = 1.; template<> D_t Constants::pone = 1.; template<> C_t Constants::pone(1., 0.); template<> Z_t Constants::pone(1., 0.); template<> S_t Constants::mone = -1.; template<> D_t Constants::mone = -1.; template<> C_t Constants::mone(-1., 0.); template<> Z_t Constants::mone(-1., 0.); template<> const size_t Multipliers::add = 1; template<> const size_t Multipliers::mul = 1; template<> const size_t Multipliers::add = 1; template<> const size_t Multipliers::mul = 1; template<> const size_t Multipliers::add = 2; template<> const size_t Multipliers::mul = 6; template<> const size_t Multipliers::add = 2; template<> const size_t Multipliers::mul = 6; template<> int Constants::code = 0; template<> int Constants::code = 1; template<> int Constants::code = 2; template<> int Constants::code = 3; hmat-oss-1.0.4/src/data_types.hpp000066400000000000000000000045311245574051100167130ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _DATA_TYPES_HPP #define _DATA_TYPES_HPP #include // Scalar Types typedef double D_t; typedef float S_t ; typedef std::complex C_t; typedef std::complex Z_t; typedef enum { /*! \brief Simple real type (float in C, REAL in fortran) */ S_TYPE = 0, /*! \brief Double real type (double in C, REAL*8 in fortran) */ D_TYPE = 1, /*! \brief Simple complex type (doesn't exist in C, COMPLEX in fortran) */ C_TYPE = 2, /*! \brief Double complex type (doesn't exist in C, DOUBLE COMPLEX in fortran) */ Z_TYPE = 3, // /*! \brief Number of scalar types available. */ // nbScalarType = 4 } ScalarTypes; template class Constants { public: static T zero; static T pone; static T mone; static int code; }; /** Multipliers used in the operations count. The operations are more expansive on complex numbers. This class contains the ratios for the various types used here (SDCZ). */ template class Multipliers { public: static const size_t mul; static const size_t add; }; /** Trait class used to get information about the types. */ template struct Types { // Simple and double precision equivalents to this type. typedef void dp; typedef void sp; }; template<> struct Types { typedef D_t dp; typedef S_t sp; }; template<> struct Types { typedef D_t dp; typedef S_t sp; }; template<> struct Types { typedef Z_t dp; typedef C_t sp; }; template<> struct Types { typedef Z_t dp; typedef C_t sp; }; #endif hmat-oss-1.0.4/src/default_engine.cpp000066400000000000000000000145371245574051100175310ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "hmat_cpp_interface.hpp" #include "default_engine.hpp" #include "common/context.hpp" #include "common/my_assert.h" template static void setTemplatedParameters(const HMatSettings& s) { RkMatrix::approx.assemblyEpsilon = s.assemblyEpsilon; RkMatrix::approx.recompressionEpsilon = s.recompressionEpsilon; RkMatrix::approx.method = s.compressionMethod; HMatrix::validateCompression = s.validateCompression; HMatrix::validationErrorThreshold = s.validationErrorThreshold; HMatrix::validationReRun = s.validationReRun; HMatrix::validationDump = s.validationDump; HMatrix::coarsening = s.coarsening; HMatrix::recompress = s.recompress; } void HMatSettings::setParameters() const { strongAssert(assemblyEpsilon > 0.); strongAssert(recompressionEpsilon > 0.); strongAssert(admissibilityFactor >= 0.); strongAssert(validationErrorThreshold >= 0.); setTemplatedParameters(*this); setTemplatedParameters(*this); setTemplatedParameters(*this); setTemplatedParameters(*this); ClusterTree::eta = admissibilityFactor; maxElementsPerBlock = elementsPerBlock; } void HMatSettings::printSettings(std::ostream& out) const { std::ios_base::fmtflags savedIosFlags = out.flags(); out << std::scientific; out << "Assembly Epsilon = " << assemblyEpsilon << std::endl; out << "Resolution Epsilon = " << recompressionEpsilon << std::endl; out << "Admissibility factor = " << admissibilityFactor << std::endl; out << "Max Leaf Size = " << maxLeafSize << std::endl; out << "Validation Error Threshold = " << validationErrorThreshold << std::endl; switch (clustering) { case kGeometric: out << "Geometric Clustering" << std::endl; break; case kMedian: out << "Median Clustering" << std::endl; break; case kHybrid: out << "Hybrid Clustering" << std::endl; break; } switch (compressionMethod) { case Svd: out << "SVD Compression" << std::endl; break; case AcaFull: out << "ACA compression (Full Pivoting)" << std::endl; break; case AcaPartial: out << "ACA compression (Partial Pivoting)" << std::endl; break; case AcaPlus: out << "ACA+ compression" << std::endl; break; case NoCompression: // Should not happen break; } out.flags(savedIosFlags); } ClusterTree* createClusterTree(DofCoordinate* dls, int n) { DECLARE_CONTEXT; std::vector* points = new std::vector(n); for (int i = 0; i < n; i++) { (*points)[i] = Point(dls[i].x, dls[i].y, dls[i].z); } int *indices = new int[n]; for (int i = 0; i < n; i++) { indices[i] = i; } ClusterData rootData(indices, 0, n, points); Point boundingBox[2]; rootData.computeBoundingBox(boundingBox); ClusterTree* ct; const HMatSettings& settings = HMatSettings::getInstance(); switch (settings.clustering) { case kMedian: ct = new MedianBisectionClusterTree(boundingBox, rootData, settings.maxLeafSize); break; case kGeometric: ct = new GeometricBisectionClusterTree(boundingBox, rootData, settings.maxLeafSize); break; case kHybrid: ct = new HybridBisectionClusterTree(boundingBox, rootData, settings.maxLeafSize); break; default: strongAssert(false); } ct->divide(); return ct; } template void DefaultEngine::assembly(AssemblyFunction& f, SymmetryFlag sym, bool synchronize) { const HMatSettings& settings = HMatSettings::getInstance(); if (sym == kLowerSymmetric) { hmat->assembleSymmetric(f, NULL, settings.useLdlt); } else { hmat->assemble(f); } } template void DefaultEngine::factorization() { const HMatSettings& settings = HMatSettings::getInstance(); strongAssert(settings.useLdlt ^ settings.useLu); if (settings.useLdlt) { if(settings.cholesky) hmat->lltDecomposition(); else hmat->ldltDecomposition(); } else { hmat->luDecomposition(); } } template void DefaultEngine::gemv(char trans, T alpha, FullMatrix& x, T beta, FullMatrix& y) const { hmat->gemv(trans, alpha, &x, beta, &y); } template void DefaultEngine::gemm(char transA, char transB, T alpha, const DefaultEngine& a, const DefaultEngine& b, T beta) { hmat->gemm(transA, transB, alpha, a.hmat, b.hmat, beta); } template void DefaultEngine::solve(FullMatrix& b) const { const HMatSettings& settings = HMatSettings::getInstance(); strongAssert(settings.useLu ^ settings.useLdlt); if (settings.useLu) hmat->solve(&b); else if (settings.useLdlt) hmat->solveLdlt(&b); } template void DefaultEngine::solve(DefaultEngine& b) const { hmat->solve(b.hmat); } template void DefaultEngine::createPostcriptFile(const char* filename) const { hmat->createPostcriptFile(filename); } template void DefaultEngine::dumpTreeToFile(const char* filename) const { hmat->dumpTreeToFile(filename); } template double DefaultEngine::norm() const { return hmat->norm(); } #include "hmat_cpp_interface.cpp" // Explicit template instantiation template class HMatInterface; template class HMatInterface; template class HMatInterface; template class HMatInterface; hmat-oss-1.0.4/src/default_engine.hpp000066400000000000000000000035361245574051100175330ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _DEFAULT_ENGINE_HPP #define _DEFAULT_ENGINE_HPP #include "h_matrix.hpp" class NullSettings {}; template class DefaultEngine { public: typedef NullSettings Settings; Settings settings; explicit DefaultEngine(HMatrix* m = NULL): hmat(m){} void destroy(){} // this attribute could be in HMatInterface, it's here to avoid making it friend HMatrix* hmat; static int init() { return 0; } static void finalize(){} void assembly(AssemblyFunction& f, SymmetryFlag sym, bool synchronize); void factorization(); void gemv(char trans, T alpha, FullMatrix& x, T beta, FullMatrix& y) const; void gemm(char transA, char transB, T alpha, const DefaultEngine & a, const DefaultEngine& b, T beta); void solve(FullMatrix& b) const; void solve(DefaultEngine& b) const; void copy(DefaultEngine & result) const {} void transpose() {} void createPostcriptFile(const char* filename) const; void dumpTreeToFile(const char* filename) const; double norm() const; }; #endif hmat-oss-1.0.4/src/disable_threading.cpp000066400000000000000000000033761245574051100202070ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "config.h" #include "disable_threading.hpp" #ifdef HAVE_MKL_H #include #endif #ifdef HAVE_OMP_H #include #endif #ifdef OPENBLAS_DISABLE_THREADS #include extern "C" { // This function is private in openblas int goto_get_num_procs(void); } #endif DisableThreadingInBlock::DisableThreadingInBlock() { #if defined(HAVE_MKL_H) mklNumThreads = mkl_get_max_threads(); mkl_set_num_threads(1); #endif #ifdef HAVE_OMP_H ompNumThreads = omp_get_max_threads(); omp_set_num_threads(1); #endif #ifdef OPENBLAS_DISABLE_THREADS openblasNumThreads = goto_get_num_procs(); openblas_set_num_threads(1); #endif } DisableThreadingInBlock::~DisableThreadingInBlock() { #if defined(HAVE_MKL_H) mkl_set_num_threads(mklNumThreads); #endif #ifdef HAVE_OMP_H omp_set_num_threads(ompNumThreads); #endif #ifdef OPENBLAS_DISABLE_THREADS openblas_set_num_threads(openblasNumThreads); #endif } hmat-oss-1.0.4/src/disable_threading.hpp000066400000000000000000000031341245574051100202040ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! Disable MKL and OpenMP threading inside a block using RAII. The HMatrix solver doesn't use a multithreaded BLAS, nor OpenMP. It is actually important for optimal performance to *not* use any threading. This class is not meant to be used by itself, but rather using the \a DISABLE_THREADING_IN_BLOCK macro to disable threading in a block, and restore it to its original setting at the end. */ class DisableThreadingInBlock { private: int mklNumThreads; int ompNumThreads; int openblasNumThreads; public: DisableThreadingInBlock(); ~DisableThreadingInBlock(); }; /** Disable OpenMP and MKL (if available) threading in a block, and restore it at the end. */ #define DISABLE_THREADING_IN_BLOCK DisableThreadingInBlock dummyDisableThreadingInBlock hmat-oss-1.0.4/src/fromdouble.cpp000066400000000000000000000045311245574051100167070ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "fromdouble.hpp" #include "common/my_assert.h" template<> FullMatrix* fromDoubleFull(FullMatrix* f) { return f; } template<> FullMatrix* fromDoubleFull(FullMatrix* f) { return f; } template FullMatrix* fromDoubleFull(FullMatrix::dp>* f) { if (!f) { return NULL; } FullMatrix* result = new FullMatrix(f->rows, f->cols); strongAssert(result); strongAssert(f->lda == f->rows); const size_t size = ((size_t) f->rows) * f->cols; T* const r = result->m; const typename Types::dp* m = f->m; for (size_t i = 0; i < size; ++i) { r[i] = T(m[i]); } delete f; return result; } template FullMatrix* fromDoubleFull(FullMatrix::dp>* f); template FullMatrix* fromDoubleFull(FullMatrix::dp>* f); template<> RkMatrix* fromDoubleRk(RkMatrix* rk) { return rk; } template<> RkMatrix* fromDoubleRk(RkMatrix* rk) { return rk; } template RkMatrix* fromDoubleRk(RkMatrix::dp>* rk) { RkMatrix* result = new RkMatrix(fromDoubleFull(rk->a), rk->rows, fromDoubleFull(rk->b), rk->cols, rk->method); rk->a= NULL; rk->b = NULL; delete rk; return result; } template RkMatrix* fromDoubleRk(RkMatrix::dp>* rk); template RkMatrix* fromDoubleRk(RkMatrix::dp>* rk); hmat-oss-1.0.4/src/fromdouble.hpp000066400000000000000000000030231245574051100167070ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "full_matrix.hpp" #include "rk_matrix.hpp" /** Returns a potentially lower precision matrix. This functions takes a FullMatrix either in double precision, and returns a single precision matrix if required. This is used to perform some computations with higher precision and then to go back to the required precision. If the target type is double precision, this function is a no-op. Otherwise (single precision target type) it performs the conversion and destroy the original matrix. */ template FullMatrix* fromDoubleFull(FullMatrix::dp>* f); template RkMatrix* fromDoubleRk(RkMatrix::dp>* rk); hmat-oss-1.0.4/src/full_matrix.cpp000066400000000000000000000572601245574051100171060ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Dense Matrix implementation. */ #include "config.h" #ifdef __INTEL_COMPILER #include #else #include #endif #include "full_matrix.hpp" #include "data_types.hpp" #include "lapack_overloads.hpp" #include "blas_overloads.hpp" #include "lapack_exception.hpp" #include "common/memory_instrumentation.hpp" #include "system_types.h" #include "common/my_assert.h" #include "common/context.hpp" #include // memset #include // swap #include #include #include #include #ifndef _WIN32 #include // mmap #endif #include #ifdef HAVE_UNISTD_H #include #endif #include #ifdef _MSC_VER // Intel compiler defines isnan in global namespace // MSVC defines _isnan # ifndef __INTEL_COMPILER # define isnan _isnan # endif #elif defined(__MINGW32__) using std::isnan; #endif /** FullMatrix */ template FullMatrix::FullMatrix(T* _m, size_t _rows, size_t _cols, int _lda) : ownsMemory(false), m(_m), rows(_rows), cols(_cols), lda(_lda), pivots(NULL), diagonal(NULL), isTriUpper(false), isTriLower(false) { if (lda == -1) { lda = rows; } myAssert(lda >= rows); } // #define POISON_ALLOCATION #ifdef POISON_ALLOCATION /*! \brief Fill an array with NaNs. The purpose of this function is to help spotting initialized memory earlier by making sure that any code using uninitialized memory encounters NaNs. */ template void poisonArray(T* array, size_t n); template<> static void poisonArray(S_t* array, size_t n) { const float nanFloat = nanf(""); for (size_t i = 0; i < n; i++) { array[i] = nanFloat; } } template<> static void poisonArray(D_t* array, size_t n) { const double nanDouble = nan(""); for (size_t i = 0; i < n; i++) { array[i] = nanDouble; } } template<> static void poisonArray(C_t* array, size_t n) { poisonArray((S_t*) array, 2 * n); } template<> static void poisonArray(Z_t* array, size_t n) { poisonArray((D_t*) array, 2 * n); } #endif template FullMatrix::FullMatrix(size_t _rows, size_t _cols) : ownsMemory(true), rows(_rows), cols(_cols), lda(_rows), pivots(NULL), diagonal(NULL), isTriUpper(false), isTriLower(false) { size_t size = ((size_t) rows) * cols * sizeof(T); m = (T*) calloc(size, 1); REGISTER_ALLOC(m, size); strongAssert(m); #ifdef POISON_ALLOCATION // This memory is not initialized, fill it with NaNs to force a // crash when using it. poisonArray(m, ((size_t) rows) * cols); #endif } template FullMatrix* FullMatrix::Zero(size_t rows, size_t cols) { FullMatrix* result = new FullMatrix(rows, cols); #ifdef POISON_ALLOCATION // The memory was poisoned in FullMatrix::FullMatrix(), set it back to 0; result->clear(); #endif return result; } template FullMatrix::~FullMatrix() { if (ownsMemory) { size_t size = ((size_t) rows) * cols * sizeof(T); REGISTER_FREE(m, size); free(m); m = NULL; } if (pivots) { free(pivots); } if (diagonal) { delete diagonal; } } template void FullMatrix::clear() { myAssert(lda == rows); size_t size = ((size_t) rows) * cols * sizeof(T); memset(m, 0, size); } template void FullMatrix::scale(T alpha) { increment_flops(Multipliers::mul * ((size_t) rows) * cols); if (lda == rows) { if (alpha == Constants::zero) { this->clear(); } else { // TODO: be careful to overflow! int nm = rows * cols; proxy_cblas::scal(nm, alpha, m, 1); } } else { T* x = m; if (alpha == Constants::zero) { for (int col = 0; col < cols; col++) { memset(x, 0, sizeof(T) * rows); x += lda; } } else { for (int col = 0; col < cols; col++) { proxy_cblas::scal(rows, alpha, x, 1); x += lda; } } } } template void FullMatrix::transpose() { myAssert(lda == rows); myAssert(m); #ifdef HAVE_MKL_IMATCOPY proxy_mkl::imatcopy((size_t) rows, (size_t) cols, m); std::swap(rows, cols); lda = rows; #else if (rows == cols) { // "Fast" path for (int col = 0; col < cols; col++) { for (int row = 0; row < col; row++) { T tmp = get(row, col); get(row, col) = get(col, row); get(col, row) = tmp; } } } else { FullMatrix tmp(rows, cols); tmp.copyMatrixAtOffset(this, 0, 0); std::swap(rows, cols); lda = rows; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { get(i, j) = tmp.get(j, i); } } } #endif if (isTriUpper) { isTriUpper = false; isTriLower = true; } else if (isTriLower) { isTriLower = false; isTriUpper = true; } } template FullMatrix* FullMatrix::copy() const { FullMatrix* result = new FullMatrix(rows, cols); if (lda == rows) { size_t size = ((size_t) rows) * cols * sizeof(T); memcpy(result->m, m, size); } else { for (size_t col = 0; col < cols; col++) { size_t resultOffset = result->rows * col; size_t offset = lda * col; memcpy(result->m + resultOffset, m + offset, rows * sizeof(T)); } } return result; } template FullMatrix* FullMatrix::copyAndTranspose() const { FullMatrix* result = new FullMatrix(cols, rows); result->clear(); #ifdef HAVE_MKL_IMATCOPY if (lda == rows) { proxy_mkl::omatcopy(rows, cols, m, result->m); } else { #endif const int _rows = rows, _cols = cols; for (int i = 0; i < _rows; i++) { for (int j = 0; j < _cols; j++) { result->get(j, i) = get(i, j); } } #ifdef HAVE_MKL_IMATCOPY } #endif return result; } template void FullMatrix::gemm(char transA, char transB, T alpha, const FullMatrix* a, const FullMatrix* b, T beta) { int m = (transA == 'N' ? a->rows : a->cols); int n = (transB == 'N' ? b->cols : b->rows); int k = (transA == 'N' ? a->cols : a->rows); myAssert(a->lda >= (transA == 'N' ? m : k)); myAssert(b->lda >= (transB == 'N' ? k : n)); myAssert(rows == m); myAssert(cols == n); { const size_t _m = m, _n = n, _k = k; const size_t adds = _m * _n * _k; const size_t muls = _m * _n * _k; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } proxy_cblas::gemm(transA, transB, m, n, k, alpha, a->m, a->lda, b->m, b->lda, beta, this->m, this->lda); } template void FullMatrix::multiplyWithDiagOrDiagInv(const Vector* d, bool inverse, bool left) { myAssert(left || (this->cols == d->rows)); myAssert(!left || (this->rows == d->rows)); T* diag = d->v; { const size_t _rows = rows, _cols = cols; increment_flops(Multipliers::mul * _rows * _cols); } if (left) { if (inverse) { // In this case, copying is a good idea since it avoids repeated // computations of 1 / diag[i]. diag = (T*) malloc(d->rows * sizeof(T)); strongAssert(diag); memcpy(diag, d->v, d->rows * sizeof(T)); for (int i = 0; i < d->rows; i++) { diag[i] = Constants::pone / diag[i]; } } // TODO: Test with scale to see if it is better. for (int j = 0; j < cols; j++) { for (int i = 0; i < rows; i++) { m[i + j * lda] *= diag[i]; } } if (inverse) { free(diag); } } else { for (int j = 0; j < cols; j++) { proxy_cblas::scal(rows, diag[j], m + j * lda, 1); } } } template void FullMatrix::ldltDecomposition() { int n = this->rows; diagonal = new Vector(n); strongAssert(diagonal); myAssert(this->rows == this->cols); // We expect a square matrix // Standard LDLt factorization algorithm is: // diag[j] = A(j,j) - sum_{k < j} L(j,k)^2 diag[k] // L(i,j) = (A(i,j) - sum_{k < j} (L(i,k)L(j,k)diag[k])) / diag[j] // See for instance http://en.wikipedia.org/wiki/Cholesky_decomposition // An auxiliary array is introduced in order to perform less multiplications, // see algorithm 1 in http://icl.cs.utk.edu/projectsfiles/plasma/pubs/ICL-UT-11-03.pdf T* v = new T[n]; strongAssert(v); for (int j = 0; j < n; j++) { for (int i = 0; i < j; i++) v[i] = get(j,i) * get(i,i); v[j] = get(j,j); for (int i = 0; i < j; i++) v[j] -= get(j,i) * v[i]; get(j,j) = v[j]; for (int i = 0; i < j; i++) for (int k = j+1; k < n; k++) get(k,j) -= get(k,i) * v[i]; for (int k = j+1; k < n; k++) { get(k,j) /= v[j]; } } for(int i = 0; i < n; i++) { diagonal->v[i] = get(i,i); get(i,i) = Constants::pone; for (int j = i + 1; j < n; j++) get(i,j) = Constants::zero; } isTriLower = true; myAssert(!isTriUpper); delete[] v; } template void FullMatrix::lltDecomposition() { // from http://www.netlib.org/lapack/lawnspdf/lawn41.pdf page 120 const size_t n2 = rows * rows; const size_t n3 = n2 * rows; const size_t muls = n3 / 6 + n2 / 2 + rows / 3; const size_t adds = n3 / 6 - rows / 6; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); int info = proxy_lapack::potrf('L', this->rows, this->m, this->lda); if(info != 0) // throw a pointer to be compliant with the Status class throw new hmat::LapackException("potrf", info); isTriLower = true; for (int j = 0; j < this->cols; j++) { for(int i = 0; i < j; i++) { get(i,j) = Constants::zero; } } } template void FullMatrix::luDecomposition() { int mm = rows; int n = cols; pivots = (int*) calloc(rows, sizeof(int)); strongAssert(pivots); int info; { const size_t _m = mm, _n = n; const size_t muls = _m * _n *_n / 2 - _n *_n*_n / 6 + _m * _n / 2 - _n*_n / 2 + 2 * _n / 3; const size_t adds = _m * _n *_n / 2 - _n *_n*_n / 6 + _m * _n / 2 + _n / 6; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } info = proxy_lapack::getrf(mm, n, m, lda, pivots); strongAssert(!info); } // The following code is very close to that of ZGETRS in LAPACK. // However, the resolution here is divided in the two parties. // Warning! The matrix has been obtained with ZGETRF therefore it is // permuted! We have the factorization A = P L U with P the // permutation matrix. So to solve L X = B, we must // solve LX = (P ^ -1 B), which is done by ZLASWP with // the permutation. we used it just like in ZGETRS. template void FullMatrix::solveLowerTriangular(FullMatrix* x) const { myAssert(pivots || diagonal); { const size_t _m = rows, _n = x->cols; const size_t adds = _n * _m * (_m - 1) / 2; const size_t muls = _n * _m * (_m + 1) / 2; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } if (!diagonal && pivots) proxy_lapack::laswp(x->cols, x->m, x->lda, 1, rows, pivots, 1); proxy_cblas::trsm('L', 'L', 'N', 'U', rows, x->cols, Constants::pone, m, lda, x->m, x->lda); } // The resolution of the upper triangular system does not need to // change the order of columns. // The pivots are not necessary here, but this helps to check // the matrix was factorized before. template void FullMatrix::solveUpperTriangular(FullMatrix* x, bool loweredStored) const { { const size_t _m = rows, _n = x->cols; const size_t adds = _n * _m * (_m - 1) / 2; const size_t muls = _n * _m * (_m + 1) / 2; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } proxy_cblas::trsm('R', loweredStored ? 'L' : 'U', loweredStored ? 'T' : 'N', 'N', x->rows, x->cols, Constants::pone, m, lda, x->m, x->lda); } template void FullMatrix::solveUpperTriangularLeft(FullMatrix* x, bool lowerStored) const { myAssert(pivots || diagonal); { const size_t _m = rows, _n = x->cols; const size_t adds = _n * _m * (_n - 1) / 2; const size_t muls = _n * _m * (_n + 1) / 2; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } if (pivots) { myAssert(!diagonal); proxy_cblas::trsm('L', 'U', 'N', 'N', rows, x->cols, Constants::pone, m, lda, x->m, x->lda); } else { myAssert(diagonal); myAssert(lowerStored); proxy_cblas::trsm('L', 'L', 'T', 'U', rows, x->cols, Constants::pone, m, lda, x->m, x->lda); } } template void FullMatrix::solve(FullMatrix* x) const { myAssert(pivots); int ierr = 0; { const size_t nrhs = x->cols; const size_t n = rows; const size_t adds = n * n * nrhs; const size_t muls = (n * n - n) * nrhs; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } ierr = proxy_lapack::getrs('N', rows, x->cols, m, lda, pivots, x->m, x->rows); strongAssert(!ierr); } template void FullMatrix::inverse() { // The inversion is done in two steps with dgetrf for LU decomposition and // dgetri for inversion of triangular matrices myAssert(rows == cols); int mm = rows; int n = cols; int *ipiv = new int[rows]; int info; { size_t vn = n, vm = n; // getrf size_t additions = (vm*vn*vn)/2 - (vn*vn*vn)/6 - (vm*vn)/2 + vn/6; size_t multiplications = (vm*vn*vn)/2 - (vn*vn*vn)/6 + (vm*vn)/2 - (vn*vn)/2 + 2*vn/3; increment_flops(Multipliers::add * additions + Multipliers::mul * multiplications); // getri additions = (2*vn*vn*vn)/3 - (3*vn*vn)/2 + (5*vn)/6; multiplications = (2*vn*vn*vn)/3 + (vn*vn)/2 + (5*vn)/6; increment_flops(Multipliers::add * additions + Multipliers::mul * multiplications); } info = proxy_lapack::getrf(mm, n, m, lda, ipiv); strongAssert(!info); // We call it twice: the first time to know the optimal size of // temporary arrays, and the second time for real calculation. int workSize; T workSize_req; info = proxy_lapack::getri(mm, m, lda, ipiv, &workSize_req, -1); workSize = (int) hmat::real(workSize_req) + 1; T* work = new T[workSize]; strongAssert(work); info = proxy_lapack::getri(mm, m, lda, ipiv, work, workSize); delete[] work; strongAssert(!info); delete[] ipiv; } template void FullMatrix::copyMatrixAtOffset(const FullMatrix* a, int rowOffset, int colOffset) { myAssert(rowOffset + a->rows <= rows); myAssert(colOffset + a->cols <= cols); // Use memcpy when copying the whole matrix. This avoids BLAS calls. if ((rowOffset == 0) && (colOffset == 0) && (a->rows == rows) && (a->cols == cols) && (a->lda == a->rows) && (lda == rows)) { size_t size = ((size_t) rows) * cols; memcpy(m, a->m, size * sizeof(T)); return; } for (int col = 0; col < a->cols; col++) { proxy_cblas::copy(a->rows, a->m + col * a->lda, 1, m + rowOffset + ((colOffset + col) * lda), 1); } } template void FullMatrix::copyMatrixAtOffset(const FullMatrix* a, int rowOffset, int colOffset, int rowsToCopy, int colsToCopy) { myAssert(rowOffset + rowsToCopy <= rows); myAssert(colOffset + colsToCopy <= cols); for (int col = 0; col < colsToCopy; col++) { proxy_cblas::copy(rowsToCopy, a->m + col * a->lda, 1, (m + rowOffset + ((colOffset + col) * lda)), 1); } } template void FullMatrix::axpy(T alpha, const FullMatrix* a) { myAssert(rows == a->rows); myAssert(cols == a->cols); size_t size = rows * cols; increment_flops(Multipliers::add * size + (alpha == Constants::pone ? 0 : Multipliers::mul * size)); // Fast path if ((lda == rows) && (a->lda == a->rows) && (size < 1000000000)) { proxy_cblas::axpy(size, alpha, a->m, 1, m, 1); return; } for (int col = 0; col < cols; col++) { proxy_cblas::axpy(rows, alpha, a->m + col * a->lda, 1, m + col * lda, 1); } } template double FullMatrix::norm() const { size_t size; size = rows * cols; T result = Constants::zero; // Fast path if ((size < 1000000000) && (lda == rows)) { result += proxy_cblas_convenience::dot_c(size, m, 1, m, 1); return sqrt(hmat::real(result)); } for (int col = 0; col < cols; col++) { result += proxy_cblas_convenience::dot_c(rows, m + col * lda, 1, m + col * lda, 1); } return sqrt(hmat::real(result)); } template void FullMatrix::toFile(const char *filename) const { int ierr; int fd; size_t size = rows * cols * sizeof(T) + 5 * sizeof(int); strongAssert(lda == rows); fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600); strongAssert(fd != -1); ierr = lseek(fd, size - 1, SEEK_SET); strongAssert(ierr != -1); ierr = write(fd, "", 1); strongAssert(ierr == 1); #ifndef _WIN32 void* mmapedFile = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ierr = (mmapedFile == MAP_FAILED) ? 1 : 0; strongAssert(!ierr); int *asIntArray = (int*) mmapedFile; asIntArray[0] = Constants::code; asIntArray[1] = (int) rows; asIntArray[2] = (int) cols; asIntArray[3] = sizeof(T); asIntArray[4] = 0; asIntArray += 5; T* mat = (T*) asIntArray; memcpy(mat, m, size - 5 * sizeof(int)); close(fd); munmap(mmapedFile, size); #else #ifdef __GNUC__ #warning MMAP: TO BE REMOVED OR FIXED #else #pragma message("Warning: MMAP: TO BE REMOVED OR FIXED") #endif #endif } template void checkNanReal(const FullMatrix* m) { for (int col = 0; col < m->cols; col++) { for (int row = 0; row < m->rows; row++) { strongAssert(!isnan(m->get(row, col))); } } } template void checkNanComplex(const FullMatrix* m) { for (int col = 0; col < m->cols; col++) { for (int row = 0; row < m->rows; row++) { strongAssert(!isnan(m->get(row, col).real())); strongAssert(!isnan(m->get(row, col).imag())); } } } template<> void FullMatrix::checkNan() const { checkNanReal(this); } template<> void FullMatrix::checkNan() const { checkNanReal(this); } template<> void FullMatrix::checkNan() const { checkNanComplex(this); } template<> void FullMatrix::checkNan() const { checkNanComplex(this); } // MmapedFullMatrix template MmapedFullMatrix::MmapedFullMatrix(size_t rows, size_t cols, const char* filename) : m(NULL, rows, cols), mmapedFile(NULL), fd(-1), size(0) { #ifdef _WIN32 strongAssert(false); // no mmap() on Windows #else int ierr; size = rows * cols * sizeof(T) + 5 * sizeof(int); fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600); strongAssert(fd != -1); ierr = lseek(fd, size - 1, SEEK_SET); strongAssert(ierr != -1); ierr = write(fd, "", 1); strongAssert(ierr == 1); mmapedFile = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ierr = (mmapedFile == MAP_FAILED) ? 1 : 0; strongAssert(!ierr); int *asIntArray = (int*) mmapedFile; asIntArray[0] = 0; asIntArray[1] = (int) rows; asIntArray[2] = (int) cols; asIntArray[3] = sizeof(T); asIntArray[4] = 0; asIntArray += 5; T* mat = (T*) asIntArray; m.m = mat; #endif } template MmapedFullMatrix::~MmapedFullMatrix() { #ifndef _WIN32 close(fd); munmap(mmapedFile, size); #endif } template MmapedFullMatrix* MmapedFullMatrix::fromFile(const char* filename) { MmapedFullMatrix* result = new MmapedFullMatrix(); #ifdef _WIN32 strongAssert(false); // no mmap() on Windows #else int ierr; result->fd = open(filename, O_RDONLY); strongAssert(result->fd != -1); struct stat fileStat; ierr = fstat(result->fd, &fileStat); strongAssert(!ierr); size_t fileSize = fileStat.st_size; result->mmapedFile = mmap(0, fileSize, PROT_READ, MAP_SHARED, result->fd, 0); ierr = (result->mmapedFile == MAP_FAILED) ? 1 : 0; strongAssert(!ierr); int* header = (int*) result->mmapedFile; // Check the consistency of the file strongAssert(header[0] == Constants::code); strongAssert(header[3] == sizeof(T)); strongAssert(header[1] * ((size_t) header[2]) * sizeof(T) + (5 * sizeof(int)) == fileSize); result->m.lda = result->m.rows = header[1]; result->m.cols = header[2]; result->m.m = (T*) (header + 5); #endif return result; } /** Vector */ template Vector::Vector(T* _v, size_t _rows) : ownsMemory(false), v(_v), rows(_rows) {} template Vector::Vector(size_t _rows) : ownsMemory(true), rows(_rows) { size_t size = ((size_t) rows) * sizeof(T); v = (T*) calloc(size, 1); REGISTER_ALLOC(v, size); strongAssert(v); } template Vector::~Vector() { if (ownsMemory) { size_t size = ((size_t) rows) * sizeof(T); free(v); REGISTER_FREE(v, size); } v = NULL; } template Vector* Vector::Zero(size_t rows) { Vector *result = new Vector(rows); return result; } template void Vector::gemv(char trans, T alpha, const FullMatrix* a, const Vector* x, T beta) { int matRows = a->rows; int matCols = a->cols; int lda = matRows; CBLAS_TRANSPOSE t = (trans == 'N' ? CblasNoTrans : CblasTrans); int64_t ops = (Multipliers::add + Multipliers::mul) * ((int64_t) matRows) * matCols; increment_flops(ops); if (trans == 'N') { myAssert(rows == (size_t) a->rows); myAssert(x->rows == (size_t) a->cols); } else { myAssert(rows == (size_t) a->cols); myAssert(x->rows == (size_t) a->rows); } proxy_cblas::gemv(t, matRows, matCols, alpha, a->m, lda, x->v, 1, beta, v, 1); } template void Vector::axpy(T alpha, const Vector* x) { myAssert(rows == x->rows); proxy_cblas::axpy(rows, alpha, x->v, 1, this->v, 1); } template int Vector::absoluteMaxIndex() const { return proxy_cblas::i_amax(rows, v, 1); } template T Vector::dot(const Vector* x, const Vector* y) { myAssert(x->rows == y->rows); // TODO: Beware of large vectors (>2 billion elements) ! return proxy_cblas_convenience::dot_c(x->rows, x->v, 1, y->v, 1); } template void Vector::addToMe(const Vector* x) { myAssert(rows == x->rows); axpy(Constants::pone, x); } template void Vector::subToMe(const Vector* x) { myAssert(rows == x->rows); axpy(Constants::mone, x); } template double Vector::norm() const { T result = dot(this, this); return sqrt(hmat::real(result)); } template void Vector::clear() { memset(this->v, 0, sizeof(T) * this->rows); } template void Vector::scale(T alpha) { if (alpha == Constants::zero) { memset(v, 0, sizeof(T) * ((size_t) rows)); } else { proxy_cblas::scal(rows, alpha, v, 1); } } // the classes declaration template class FullMatrix; template class FullMatrix; template class FullMatrix; template class FullMatrix; template class MmapedFullMatrix; template class MmapedFullMatrix; template class MmapedFullMatrix; template class MmapedFullMatrix; template class Vector; template class Vector; template class Vector; template class Vector; hmat-oss-1.0.4/src/full_matrix.hpp000066400000000000000000000240761245574051100171120ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Dense Matrix type used by the HMatrix library. */ #ifndef _FULL_MATRIX_HPP #define _FULL_MATRIX_HPP #include #include "data_types.hpp" #include "h_matrix.hpp" /*! \brief Templated dense Matrix type. The template parameter represents the scalar type of the matrix elements. The supported types are \a S_t, \a D_t, \a C_t and \a Z_t, as defined in @data_types.hpp. */ template class FullMatrix { private: /*! True if the matrix owns its memory, ie has to free it upon destruction */ bool ownsMemory; public: /// Fortran style pointer (columnwise) T* m; /// Number of rows int rows; /// Number of columns int cols; /*! Leading dimension, as in BLAS */ int lda; /*! Holds the pivots for the LU decomposition. */ int* pivots; /*! Diagonal in an LDL^t factored matrix */ Vector* diagonal; /*! Is this matrix upper triangular? */ bool isTriUpper; /*! Is this matrix lower triangular? */ bool isTriLower; public: /** \brief Initialize the matrix with existing data. In this case the matrix doesn't own the data (the memory is not freed at the object destruction). \param _m Pointer to the data \param _rows Number of rows \param _cols Number of cols \param lda Leading dimension, as in BLAS */ FullMatrix(T* _m, size_t _rows, size_t _cols, int _lda=-1); /* \brief Create an empty matrix, filled with 0s. In this case, the memory is freed when the object is destroyed. \param _rows Number of rows \param _cols Number of columns */ FullMatrix(size_t _rows, size_t _cols); /** \brief Create a matrix filled with 0s. In this case, the memory is freed when the object is destroyed. \param _rows Number of rows \param _cols Number of columns */ static FullMatrix* Zero(size_t rows, size_t cols); ~FullMatrix(); /** This <- 0. */ void clear(); /** \brief this *= alpha. \param alpha The scaling factor. */ void scale(T alpha); /** \brief Transpose in place. */ void transpose(); /** Return a copy of this. */ FullMatrix* copy() const; /** \brief Return a new matrix that is a transposed version of this. */ FullMatrix* copyAndTranspose() const; /** this = alpha * op(A) * op(B) + beta * this Standard "GEMM" call, as in BLAS. \param transA 'N' or 'T', as in BLAS \param transB 'N' or 'T', as in BLAS \param alpha alpha \param a the matrix A \param b the matrix B \param beta beta */ void gemm(char transA, char transB, T alpha, const FullMatrix* a, const FullMatrix* b, T beta); /*! \brief B <- B*D or B <- B*D^-1 (or with D on the left). B = this, and D a diagonal matrix. \param d D \param inverse true : B<-B*D^-1, false B<-B*D \param left true : B<-D*B, false B<-B*D */ void multiplyWithDiagOrDiagInv(const Vector* d, bool inverse, bool left = false); /*! \brief Compute a LU factorization in place. */ void luDecomposition(); /*! \brief Compute a LDLt factorization in place. */ void ldltDecomposition(); /*! \brief Compute a LLt factorization in place (aka Cholesky). */ void lltDecomposition(); /*! \brief Solve the system L X = B, with B = X on entry, and L = this. This function requires the matrix to be factored by HMatrix::luDecomposition() beforehand. \param x B on entry, the solution on exit. */ void solveLowerTriangular(FullMatrix* x) const; /*! \brief Solve the system X U = B, with B = X on entry, and U = this. This function requires the matrix to be factored by HMatrix::luDecomposition() beforehand. \param x B on entry, the solution on exit. */ void solveUpperTriangular(FullMatrix* x, bool loweredStored = false) const; /*! \brief Solve the system U X = B, with B = X on entry, and U = this. This function requires the matrix to be factored by HMatrix::luDecomposition() beforehand. \param x B on entry, the solution on exit. */ void solveUpperTriangularLeft(FullMatrix* x, bool lowerStored) const; /*! \brief Solve the system U X = B, with B = X on entry, and U = this. This function requires the matrix to be factored by HMatrix::luDecomposition() beforehand. \param x B on entry, the solution on exit. */ void solve(FullMatrix* x) const; /*! \brief Compute the inverse of this in place. */ void inverse(); /*! Copy a matrix A into 'this' at offset (rowOffset, colOffset) (indices start at 0). \param a the matrix A \param rowOffset the row offset \param colOffset the column offset */ void copyMatrixAtOffset(const FullMatrix* a, int rowOffset, int colOffset); /*! Copy a matrix A into 'this' at offset (rowOffset, colOffset) (indices start at 0). In this function, only copy a sub-matrix of size (rowsToCopy, colsToCopy). \param a the matrix A \param rowOffset the row offset \param colOffset the column offset \param rowsToCopy number of rows to copy \param colsToCopy number of columns to copy */ void copyMatrixAtOffset(const FullMatrix* a, int rowOffset, int colOffset, int rowsToCopy, int colsToCopy); /*! \brief this += alpha * A \param a the Matrix A */ void axpy(T alpha, const FullMatrix* a); /*! \brief Return the Frobenius norm of the matrix. \return the matrix norm. */ double norm() const; /*! \brief Write the matrix to a binary file. \param filename output filename */ void toFile(const char *filename) const; /** Simpler accessors for the data. There are 2 types to allow matrix modification or not. */ T& get(size_t i, size_t j) { return m[i + lda * j]; } T get(size_t i, size_t j) const { return m[i + lda * j]; } /** Simpler accessors for the diagonal. \warning This will only work if the matrix has been factored with \a FullMatrix::ldltDecomposition() beforehand. */ T getD(size_t i) const { return diagonal->v[i]; } T& getD(size_t i) { return diagonal->v[i]; } /*! Check the matrix for the presence of NaN numbers. If a NaN is found, an assertion is triggered. */ void checkNan() const; private: /// Disallow the copy FullMatrix(const FullMatrix& o); }; /** \brief Wraps a FullMatrix backed by a file (using mmap()). */ template class MmapedFullMatrix { public: FullMatrix m; private: void* mmapedFile; int fd; size_t size; public: /** \brief Maps a .res file into memory. The mapping is read-only. \param filename The filename \return an instance of MMapedFullMatrix mapping the file. */ static MmapedFullMatrix* fromFile(const char* filename); /** \brief Creates a FullMatrix backed by a file, wrapped into a MMapedFullMatrix. \param rows number of rows of the matrix. \param cols number of columns of the matrix. \param filename Filename. The file is not destroyed with the object. */ MmapedFullMatrix(size_t rows, size_t cols, const char* filename); ~MmapedFullMatrix(); private: MmapedFullMatrix() : m(NULL, 0, 0), mmapedFile(NULL), fd(-1), size(0) {}; }; /*! \brief Templated Vector class. As for \a FullMatrix, the template parameter is the scalar type. */ template class Vector { private: /// True if the vector owns its memory bool ownsMemory; public: /// Pointer to the data T* v; /// Rows count size_t rows; public: Vector(T* _v, size_t _rows); Vector(size_t _rows); ~Vector(); /** Create a vector filled with 0s. */ static Vector* Zero(size_t rows); /** this = alpha * A * x + beta * this Wrapper around the 'GEMV' BLAS call. \param trans 'N' or 'T', as in BLAS \param alpha alpha \param a A \param x X \param beta beta */ void gemv(char trans, T alpha, const FullMatrix* a, const Vector* x, T beta); /** \brief this += v */ void addToMe(const Vector* x); /** \brief this -= v */ void subToMe(const Vector* x); /** L2 norm of the vector. */ double norm() const; /** Set the vector to 0. */ void clear(); /** This *= alpha. */ void scale(T alpha); /** Compute This += alpha X, X a vector. */ void axpy(T alpha, const Vector* x); /** \brief Return the index of the maximum absolute value element. This function is similar to iXamax() in BLAS. \return the index of the maximum absolute value element in the vector. */ int absoluteMaxIndex() const; /** Compute the dot product of two \Vector. For real-valued vectors, this is the usual dot product. For complex-valued ones, this is defined as: = \bar{x}^t \times y as in BLAS \warning DOES NOT work with vectors with >INT_MAX elements \param x \param y \return */ static T dot(const Vector* x, const Vector* y); private: /// Disallow the copy Vector(const Vector& o); }; /*! \brief Wrapper around BLAS copy function. \param n Number of elements to copy \param from Source \param incFrom increment between elements in from \param to Destination \paarm incTo increment between elenents in to */ template void blasCopy(int n, T* from, int incFrom, T* to, int incTo); #endif hmat-oss-1.0.4/src/h_matrix.cpp000066400000000000000000001717551245574051100164010ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "config.h" /*! \file \ingroup HMatrix \brief HMatrix type. */ #include #include #include #include #include "h_matrix.hpp" #include "cluster_tree.hpp" #include "rk_matrix.hpp" #include "data_types.hpp" #include "compression.hpp" #include "postscript.hpp" #include "common/context.hpp" #include "common/my_assert.h" #include "fromdouble.hpp" using namespace std; // The default values below will be overwritten in default_engine.cpp by HMatSettings values template bool HMatrix::coarsening = false; template bool HMatrix::recompress = false; template bool HMatrix::validateCompression = false; template bool HMatrix::validationReRun = false; template bool HMatrix::validationDump = false; template double HMatrix::validationErrorThreshold = 0; template HMatrixData::~HMatrixData() { if (rk) { delete rk; } if (m) { delete m; } } template void reorderVector(FullMatrix* v, int* indices) { DECLARE_CONTEXT; const size_t n = v->rows; Vector tmp(n); for (int col = 0; col < v->cols; col++) { T* column = v->m + (n * col); for (size_t i = 0; i < n; i++) { tmp.v[i] = column[indices[i]]; } memcpy(column, tmp.v, sizeof(T) * n); } } template void restoreVectorOrder(FullMatrix* v, int* indices) { DECLARE_CONTEXT; const size_t n = v->rows; Vector tmp(n); for (int col = 0; col < v->cols; col++) { T* column = v->m + (n * col); for (size_t i = 0; i < n; i++) { tmp.v[indices[i]] = column[i]; } memcpy(column, tmp.v, sizeof(T) * n); } } template HMatrix::HMatrix(ClusterTree* _rows, ClusterTree* _cols, SymmetryFlag symFlag) : Tree<4>(NULL), data(HMatrixData(_rows, _cols)), isUpper(false), isLower(false), isTriUpper(false), isTriLower(false) { if (_rows->isLeaf() || _cols->isLeaf() || _rows->isAdmissibleWith(_cols)) { if (_rows->isAdmissibleWith(_cols)) { data.rk = new RkMatrix(NULL, rows(), NULL, cols(), NoCompression); } return; } else { isUpper = false; isLower = (symFlag == kLowerSymmetric ? true : false); isTriUpper = false; isTriLower = false; for (int i = 0; i < 2; ++i) { ClusterTree* rowChild = static_cast(_rows->getChild(i)); for (int j = 0; j < 2; ++j) { if ((symFlag == kNotSymmetric) || (isUpper && (i <= j)) || (isLower && (i >= j))) { ClusterTree* colChild = static_cast(_cols->getChild(j)); this->insertChild(i + j * 2, new HMatrix(rowChild, colChild, (i == j ? symFlag : kNotSymmetric))); } } } } } template HMatrix::HMatrix() : Tree<4>(NULL), data(HMatrixData()), isUpper(false), isLower(false) {} template HMatrix* HMatrix::copyStructure() const { HMatrix* h = new HMatrix(); h->data.rows = data.rows; h->data.cols = data.cols; h->isUpper = isUpper; h->isLower = isLower; h->isTriUpper = isTriUpper; h->isTriLower = isTriLower; if (isLeaf()) { h->data.rk = NULL; h->data.m = NULL; if (isRkMatrix()) { // We have to create a RkMatrix because // h->isRkMatrix () returns false otherwise, // which may cause trouble for some operations. h->data.rk = new RkMatrix(NULL, data.rk->rows, NULL, data.rk->cols, data.rk->method); } } else { for (int i = 0; i < 4; ++i) { if (getChild(i)) { const HMatrix* child = static_cast*>(getChild(i)); h->insertChild(i, child->copyStructure()); } } } return h; } template HMatrix* HMatrix::Zero(const HMatrix* o) { // leaves are filled by 0 HMatrix *h = new HMatrix(); h->data.rows = o->data.rows; h->data.cols = o->data.cols; h->isLower = o->isLower; h->isUpper = o->isUpper; h->isTriUpper = o->isTriUpper; h->isTriLower = o->isTriLower; if (o->isLeaf()) { if (o->isRkMatrix()) { h->data.rk = new RkMatrix(NULL, h->rows(), NULL, h->cols(), o->data.rk->method); h->data.m = NULL; } else { h->data.rk = NULL; h->data.m = FullMatrix::Zero(h->rows()->n, h->cols()->n); } } else { for (int i = 0; i < 2; ++i) { for (int j = 0; j < 2; ++j) { if (o->get(i, j)) { h->insertChild(i + j * 2, HMatrix::Zero(o->get(i, j))); } } } } return h; } template HMatrix* HMatrix::Zero(const ClusterTree* rows, const ClusterTree* cols) { // Leaves are filled by 0 HMatrix *h = new HMatrix(); h->data.rows = (ClusterTree *) rows; h->data.cols = (ClusterTree *) cols; if (rows->isLeaf() || cols->isLeaf() || h->data.isAdmissibleLeaf()) { if (h->data.isAdmissibleLeaf()) { h->data.rk = new RkMatrix(NULL, h->rows(), NULL, h->cols(), NoCompression); h->data.m = NULL; } else { h->data.rk = NULL; h->data.m = FullMatrix::Zero(h->rows()->n, h->cols()->n); } } else { for (int i = 0; i < 2; ++i) { const ClusterTree* rowChild = static_cast(h->data.rows->getChild(i)); for (int j = 0; j < 2; ++j) { const ClusterTree* colChild = static_cast(h->data.cols->getChild(j)); h->insertChild(i + j * 2, HMatrix::Zero(rowChild, colChild)); } } } return h; } template void HMatrix::assemble(const AssemblyFunction& f) { if (isLeaf()) { // If the leaf is admissible, matrix assembly and compression. // if not we keep the matrix. if (data.isAdmissibleLeaf()) { RkMatrix::dp>* rkDp = compress(RkMatrix::approx.method, f, rows(), cols()); if (recompress) { rkDp->truncate(); } RkMatrix* rk = fromDoubleRk(rkDp); data.m = NULL; data.rk->swap(*rk); delete rk; } else { FullMatrix* mat = fromDoubleFull(f.assemble(rows(), cols())); data.rk = NULL; data.m = mat; } } else { data.m = NULL; data.rk = NULL; for (int i = 0; i < 4; i++) { HMatrix *child = static_cast(getChild(i)); child->assemble(f); } if (coarsening) { // If all children are Rk leaves, then we try to merge them into a single Rk-leaf. // This is done if the memory of the resulting leaf is less than the sum of the initial // leaves. Note that this operation could be used hierarchically. bool allRkLeaves = true; const RkMatrix* childrenArray[4]; size_t childrenElements = 0; for (int i = 0; i < 4; i++) { HMatrix *child = static_cast(getChild(i)); if (!child->isRkMatrix()) { allRkLeaves = false; break; } else { childrenArray[i] = child->data.rk; childrenElements += (childrenArray[i]->rows->n + childrenArray[i]->cols->n) * childrenArray[i]->k; } } if (allRkLeaves) { RkMatrix dummy(NULL, rows(), NULL, cols(), NoCompression); T alpha[4] = {Constants::pone, Constants::pone, Constants::pone, Constants::pone}; RkMatrix* candidate = dummy.formattedAddParts(alpha, childrenArray, 4); size_t elements = (candidate->rows->n + candidate->cols->n) * candidate->k; if (elements < childrenElements) { cout << "Coarsening ! " << elements << " < " << childrenElements << endl; for (int i = 0; i < 4; i++) { removeChild(i); } delete[] children; children = NULL; data.m = NULL; data.rk = candidate; myAssert(isLeaf()); myAssert(isRkMatrix()); } else { delete candidate; } } } } } template void HMatrix::assembleSymmetric(const AssemblyFunction& f, HMatrix* upper, bool onlyLower) { if (!onlyLower) { if (!upper){ upper = this; } myAssert(*this->rows() == *upper->cols()); myAssert(*this->cols() == *upper->rows()); } if (isLeaf()) { // If the leaf is admissible, matrix assembly and compression. // if not we keep the matrix. if (data.isAdmissibleLeaf()) { this->assemble(f); if ((!onlyLower) && (upper != this)) { // Admissible leaf: a matrix represented by AB^t is transposed by exchanging A and B. RkMatrix* rk = new RkMatrix(NULL, upper->rows(), NULL, upper->cols(), this->data.rk->method); rk->k = this->data.rk->k; rk->a = (data.rk->b ? data.rk->b->copy() : NULL); rk->b = (data.rk->a ? data.rk->a->copy() : NULL); upper->data.rk = rk; upper->data.m = NULL; } } else { this->assemble(f); if ((!onlyLower) && ( upper != this)) { upper->data.rk = NULL; upper->data.m = data.m->copyAndTranspose(); } } } else { if (onlyLower) { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { if ((*rows() == *cols()) && (j > i)) { continue; } get(i,j)->assembleSymmetric(f, NULL, true); } } } else { if (this == upper) { for (int i = 0; i < 2; i++) { for (int j = 0; j <= i; j++) { HMatrix *child = get(i, j); HMatrix *upperChild = get(j, i); myAssert(child != NULL); child->assembleSymmetric(f, upperChild); } } } else { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { HMatrix *child = get(i, j); HMatrix *upperChild = upper->get(j, i); child->assembleSymmetric(f, upperChild); } } if (coarsening) { // If all children are Rk leaves, then we try to merge them into a single Rk-leaf. // This is done if the memory of the resulting leaf is less than the sum of the initial bool allRkLeaves = true; const RkMatrix* childrenArray[4]; size_t childrenElements = 0; for (int i = 0; i < 4; i++) { HMatrix *child = static_cast(getChild(i)); if (!child->isRkMatrix()) { allRkLeaves = false; break; } else { childrenArray[i] = child->data.rk; childrenElements += (childrenArray[i]->rows->n + childrenArray[i]->cols->n) * childrenArray[i]->k; } } if (allRkLeaves) { T alpha[4] = {Constants::pone, Constants::pone, Constants::pone, Constants::pone}; RkMatrix dummy(NULL, rows(), NULL, cols(), NoCompression); RkMatrix* candidate = dummy.formattedAddParts(alpha, childrenArray, 4); size_t elements = (candidate->rows->n + candidate->cols->n) * candidate->k; if (elements < childrenElements) { cout << "Coarsening ! " << elements << " < " << childrenElements << endl; for (int i = 0; i < 4; i++) { removeChild(i); upper->removeChild(i); } delete[] children; children = NULL; delete[] upper->children; upper->children = NULL; data.m = NULL; data.rk = candidate; upper->data.m = NULL; upper->data.rk = new RkMatrix(NULL, upper->rows(), NULL, upper->cols(), data.rk->method); upper->data.rk->k = data.rk->k; upper->data.rk->a = data.rk->b->copy(); upper->data.rk->b = data.rk->a->copy(); myAssert(isLeaf() && upper->isLeaf()); myAssert(isRkMatrix() && upper->isRkMatrix()); } else { delete candidate; } } } } } } } template pair HMatrix::compressionRatio() const { pair result = pair(0, 0); if (isLeaf()) { if (data.m) { result.first = data.m->rows * data.m->cols; result.second = result.first; } else { if (data.rk) { result = data.rk->compressionRatio(); } } return result; } for (int i = 0; i < 4; i++) { HMatrix *child = static_cast*>(getChild(i)); if (child) { pair p = child->compressionRatio(); result.first += p.first; result.second += p.second; } } return result; } template void HMatrix::eval(FullMatrix* result, bool renumber) const { if (isLeaf()) { FullMatrix *mat = data.m; if (data.rk) { mat = data.rk->eval(); } int *rowIndices = rows()->indices + rows()->offset; size_t rowCount = rows()->n; int *colIndices = cols()->indices + cols()->offset; size_t colCount = cols()->n; for (size_t i = 0; i < rowCount; i++) { for (size_t j = 0; j < colCount; j++) { if(renumber) result->get(rowIndices[i], colIndices[j]) = mat->get(i, j); else result->get(rows()->offset + i, cols()->offset + j) = mat->get(i, j); } } if (data.rk) { delete mat; } } else { for (int i = 0; i < 4; i++) { if (getChild(i)) { static_cast*>(getChild(i))->eval(result, renumber); } } } } template void HMatrix::evalPart(FullMatrix* result, const ClusterData* _rows, const ClusterData* _cols) const { if (isLeaf()) { FullMatrix *mat = data.m; if (data.rk) { mat = data.rk->eval(); } int rowOffset = rows()->offset - _rows->offset; int rowCount = rows()->n; int colOffset = cols()->offset - _cols->offset; int colCount = cols()->n; for (int i = 0; i < rowCount; i++) { for (int j = 0; j < colCount; j++) { result->get(i + rowOffset, j + colOffset) = mat->get(i, j); } } if (data.rk) { delete mat; } } else { for (int i = 0; i < 4; i++) { if (getChild(i)) { static_cast*>(getChild(i))->evalPart(result, _rows, _cols); } } } } template double HMatrix::norm() const { double result = 0.; if (isLeaf()) { FullMatrix* mat = data.m; if (data.rk) { // TODO: This is not optimized and problematic for // RKMatrix of too big size. mat = data.rk->eval(); result = mat->norm(); delete mat; } else { result = mat->norm(); } } else { for (int i = 0; i < 4; i++) { if (getChild(i)) { result += pow(static_cast*>(getChild(i))->norm(), 2.) ; } } result = sqrt(result) ; } return result; } template void HMatrix::scale(T alpha) { if (isLeaf()) { if (isRkMatrix()) { data.rk->scale(alpha); } else { myAssert(isFullMatrix()); data.m->scale(alpha); } } else { for (int i = 0; i < 4; i++) { if (getChild(i)) { HMatrix* child = static_cast*>(getChild(i)); child->scale(alpha); } } } } template void HMatrix::gemv(char trans, T alpha, const Vector* x, T beta, Vector* y) const { FullMatrix mx(x->v, x->rows, 1); FullMatrix my(y->v, y->rows, 1); gemv(trans, alpha, &mx, beta, &my); } template void HMatrix::gemv(char trans, T alpha, const FullMatrix* x, T beta, FullMatrix* y) const { if (beta != Constants::pone) { y->scale(beta); } beta = Constants::pone; if (!isLeaf()) { const ClusterData* myRows = rows(); const ClusterData* myCols = cols(); // TODO: make this work with symmetric lower-stored matrices. for (int i = 0; i < 4; i++) { HMatrix* child = static_cast*>(getChild(i)); if(isTriLower && !child) continue; const ClusterData* childRows = child->rows(); const ClusterData* childCols = child->cols(); size_t rowsOffset = childRows->offset - myRows->offset; size_t colsOffset = childCols->offset - myCols->offset; if (trans == 'N') { myAssert(colsOffset + childCols->n <= (size_t) x->rows); myAssert(rowsOffset + childRows->n <= (size_t) y->rows); FullMatrix subX(x->m + colsOffset, childCols->n, x->cols, x->lda); FullMatrix subY(y->m + rowsOffset, childRows->n, y->cols, y->lda); child->gemv(trans, alpha, &subX, beta, &subY); } else { myAssert(trans == 'T'); myAssert(rowsOffset + childRows->n <= (size_t) x->rows); myAssert(colsOffset + childCols->n <= (size_t) y->rows); FullMatrix subX(x->m + rowsOffset, childRows->n, x->cols, x->lda); FullMatrix subY(y->m + colsOffset, childCols->n, y->cols, y->lda); child->gemv(trans, alpha, &subX, beta, &subY); } } } else { if (data.m) { y->gemm(trans, 'N', alpha, data.m, x, beta); } else { data.rk->gemv(trans, alpha, x, beta, y); } } } template void HMatrix::axpy(T alpha, const HMatrix* b) { myAssert(*rows() == *b->rows()); myAssert(*cols() == *b->cols()); if (isLeaf()) { if (isRkMatrix()) { myAssert(b->isRkMatrix()); if (b->data.rk->k == 0) { return; } data.rk->axpy(alpha, b->data.rk); } else { myAssert(b->isFullMatrix()); data.m->axpy(alpha, b->data.m); } } else { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { HMatrix* child = get(i, j); HMatrix* bChild = b->get(i, j); child->axpy(alpha, bChild); } } } } template void HMatrix::axpy(T alpha, const RkMatrix* b) { DECLARE_CONTEXT; // this += alpha * b myAssert(b->rows->isSuperSet(*rows())); myAssert(b->cols->isSuperSet(*cols())); if (b->k == 0) { return; } if (!isLeaf()) { for (int i = 0; i < 4; i++) { if (getChild(i)) { static_cast*>(getChild(i))->axpy(alpha, b); } } } else { // To add-up a leaf to a RkMatrix, resizing may be necessary. bool needResizing = b->rows->isStrictSuperSet(*rows()) || b->cols->isStrictSuperSet(*cols()); const RkMatrix* rk = b; if (needResizing) { rk = b->subset(rows(), cols()); } if (isFullMatrix()) { // In this case, the matrix has small size // then evaluating the Rk-matrix is cheaper FullMatrix* rkMat = rk->eval(); data.m->axpy(alpha, rkMat); delete rkMat; } else { myAssert(isRkMatrix()); data.rk->axpy(alpha, rk); } if (needResizing) { delete rk; } } } template void HMatrix::axpy(T alpha, const FullMatrix* b, const ClusterData* rows, const ClusterData* cols) { DECLARE_CONTEXT; // this += alpha * b myAssert(rows->isSuperSet(*this->rows()) && cols->isSuperSet(*this->cols())); if (!isLeaf()) { for (int i = 0; i < 4; i++) { HMatrix* child = static_cast*>(getChild(i)); if (!child) { continue; } if (child->rows()->intersects(*rows) && child->cols()->intersects(*cols)) { const ClusterData *childRows = child->rows()->intersection(*rows); const ClusterData *childCols = child->cols()->intersection(*cols); int rowOffset = childRows->offset - rows->offset; int colOffset = childCols->offset - cols->offset; FullMatrix subB(b->m + rowOffset + colOffset * b->lda, childRows->n, childCols->n, b->lda); child->axpy(alpha, &subB, childRows, childCols); delete childRows; delete childCols; } } } else { size_t rowOffset = this->rows()->offset - rows->offset; size_t colOffset = this->cols()->offset - cols->offset; FullMatrix subMat(b->m + rowOffset + (colOffset * b->lda), this->rows()->n, this->cols()->n, b->lda); if (isFullMatrix()) { data.m->axpy(alpha, &subMat); } else { myAssert(isRkMatrix()); data.rk->axpy(alpha, &subMat); } } } template void HMatrix::gemm(char transA, char transB, T alpha, const HMatrix* a, const HMatrix* b, T beta, int depth) { myAssert((transA == 'N' ? *a->cols() : *a->rows()) == ( transB == 'N' ? *b->rows() : *b->cols())); // pour le produit A*B if ((transA == 'T') && (transB == 'T')) { strongAssert(false); // This assertion is only a warning, this code has *not* been tested. const ClusterData& tmp_rows = *this->rows(); const ClusterData& tmp_cols = *this->cols(); this->transpose(); myAssert(tmp_rows == *cols()); myAssert(tmp_cols == *rows()); this->gemm('N', 'N', alpha, b, a, beta); this->transpose(); myAssert(tmp_rows == *rows()); myAssert(tmp_cols == *cols()); return; } myAssert((transA == 'N' ? *a->rows() : *a->cols()) == *this->rows()); // compatibility of A*B + this : Rows myAssert((transB == 'N' ? *b->cols() : *b->rows()) == *this->cols()); // compatibility of A*B + this : columns // Scaling this if (beta != Constants::pone) { if (beta == Constants::zero) { this->clear(); } else { this->scale(beta); } } // Once the scaling is done, beta is reset to 1 // to avoid an other scaling. beta = Constants::pone; // None of the matrices is a leaf if (!isLeaf() && !a->isLeaf() && !b->isLeaf()) { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { HMatrix* child = get(i, j); if (!child) { // symmetric or triangular case continue; } for (int k = 0; k < 2; k++) { char tA = transA, tB = transB; // childA states : // if A is symmetric and childA_ik is NULL // then childA_ki^T is used and transA is changed accordingly. // However A may be triangular( upper/lower ) so childA_ik is NULL // and must be taken as 0. const HMatrix* childA = (tA == 'N' ? a->get(i, k) : a->get(k, i)); const HMatrix* childB = (tB == 'N' ? b->get(k, j) : b->get(j, k)); if (!childA && (a->isTriUpper || a->isTriLower)) { myAssert(*a->rows() == *a->cols()); continue; } if (!childB && (b->isTriUpper || b->isTriLower)) { myAssert(*b->rows() == *b->cols()); continue; } if (!childA) { tA = (tA == 'N' ? 'T' : 'N'); childA = (tA == 'N' ? a->get(i, k) : a->get(k, i)); } if (!childB) { tB = (tB == 'N' ? 'T' : 'N'); childB = (tB == 'N' ? b->get(k, j) : b->get(j, k)); } child->gemm(tA, tB, alpha, childA, childB, beta); } } } return; } // One of the matrices is a leaf myAssert(isLeaf() || a->isLeaf() || b->isLeaf()); // the resulting matrix is not a leaf. if (!isLeaf()) { // If the resulting matrix is subdivided then at least one of the matrices of the product is a leaf. RkMatrix* rkMat = NULL; FullMatrix* fullMat = NULL; // One matrix is a RkMatrix if (a->isRkMatrix() || b->isRkMatrix()) { if ((a->isRkMatrix() && (a->data.rk->k == 0)) || (b->isRkMatrix() && (b->data.rk->k == 0))) { return; } rkMat = HMatrix::multiplyRkMatrix(transA, transB, a, b); } else { // None of the matrices of the product is a Rk-matrix so one of them is // a full matrix so as the result. myAssert(a->isFullMatrix() || b->isFullMatrix()); fullMat = HMatrix::multiplyFullMatrix(transA, transB, a, b); } // The resulting matrix is added to a H-matrix with the H-matrix class operations. if (rkMat) { axpy(alpha, rkMat); delete rkMat; } else { myAssert(fullMat); axpy(alpha, fullMat, rows(), cols()); delete fullMat; } return; } if (isRkMatrix()) { // The resulting matrix is a RkMatrix leaf. // At least one of the matrix is not a leaf. // The different cases are : // a. R += H * H // b. R += H * R // c. R += R * H // d. R += H * M // e. R += M * H // f. R += M * M // Cases a, b and c give an Hmatrix which has to be hierarchically converted into a Rkmatrix. // Cases c, d, e and f give a RkMatrix myAssert(isRkMatrix()); myAssert((transA == 'N' ? *a->cols() : *a->rows()) == (transB == 'N' ? *b->rows() : *b->cols())); myAssert(*rows() == (transA == 'N' ? *a->rows() : *a->cols())); myAssert(*cols() == (transB == 'N' ? *b->cols() : *b->rows())); data.rk->gemmRk(transA, transB, alpha, a, b, beta); return; } // The resulting matrix is a full matrix myAssert(isFullMatrix()); if (a->isRkMatrix() || b->isRkMatrix()) { myAssert(a->isRkMatrix() || b->isRkMatrix()); if ((a->isRkMatrix() && (a->data.rk->k == 0)) || (b->isRkMatrix() && (b->data.rk->k == 0))) { return; } RkMatrix* rkMat = HMatrix::multiplyRkMatrix(transA, transB, a, b); FullMatrix* fullMat = rkMat->eval(); delete rkMat; data.m->axpy(alpha, fullMat); delete fullMat; } else { myAssert(a->isFullMatrix() || b->isFullMatrix()); FullMatrix* fullMat = HMatrix::multiplyFullMatrix(transA, transB, a, b); this->data.m->axpy(alpha, fullMat); delete fullMat; } } template FullMatrix* HMatrix::multiplyFullH(char transM, char transH, const FullMatrix* mat, const HMatrix* h) { // R = M * H = (H^t * M^t*)^t FullMatrix* resultT = multiplyHFull(transH == 'N' ? 'T' : 'N', transM == 'N' ? 'T' : 'N', h, mat); resultT->transpose(); return resultT; } template FullMatrix* HMatrix::multiplyHFull(char transH, char transM, const HMatrix* h, const FullMatrix* mat) { myAssert((transH == 'N' ? h->cols()->n : h->rows()->n) == (transM == 'N' ? mat->rows : mat->cols)); FullMatrix* result = new FullMatrix((transH == 'N' ? h->rows()->n : h->cols()->n), (transM == 'N' ? mat->cols : mat->rows)); if (transM == 'N') { h->gemv(transH, Constants::pone, mat, Constants::zero, result); } else { FullMatrix* matT = mat->copyAndTranspose(); h->gemv(transH, Constants::pone, matT, Constants::zero, result); delete matT; } return result; } template RkMatrix* HMatrix::multiplyRkMatrix(char transA, char transB, const HMatrix* a, const HMatrix* b){ // We know that one of the matrices is a RkMatrix assert((transA == 'N') || (transB == 'N')); // Exclusion of the case At * Bt myAssert(a->isRkMatrix() || b->isRkMatrix()); RkMatrix *rk = NULL; // Matrices range compatibility if((transA == 'N') && (transB == 'N')) assert(a->cols()->n == b->rows()->n); if((transA == 'T') && (transB == 'N')) assert(a->rows()->n == b->rows()->n); if((transA == 'N') && (transB == 'T')) assert(a->cols()->n == b->cols()->n); // The cases are: // - A Rk, B H // - A H, B Rk // - A Rk, B Rk // - A Rk, B F // - A F, B Rk if (a->isRkMatrix() && b->isHMatrix()) { rk = RkMatrix::multiplyRkH(transA, transB, a->data.rk, b); strongAssert(rk); } else if (a->isHMatrix() && b->isRkMatrix()) { rk = RkMatrix::multiplyHRk(transA, transB, a, b->data.rk); strongAssert(rk); } else if (a->isRkMatrix() && b->isRkMatrix()) { rk = RkMatrix::multiplyRkRk(transA, transB, a->data.rk, b->data.rk); strongAssert(rk); } else if (a->isRkMatrix() && b->isFullMatrix()) { rk = RkMatrix::multiplyRkFull(transA, transB, a->data.rk, b->data.m, (transB == 'N' ? b->cols() : b->rows())); strongAssert(rk); } else if (a->isFullMatrix() && b->isRkMatrix()) { rk = RkMatrix::multiplyFullRk(transA, transB, a->data.m, b->data.rk, (transA == 'N' ? a->rows() : a->cols())); strongAssert(rk); } else { // None of the above cases, impossible. strongAssert(false); } return rk; } template FullMatrix* HMatrix::multiplyFullMatrix(char transA, char transB, const HMatrix* a, const HMatrix* b) { // At least one full matrix, and not RkMatrix. assert((transA == 'N') || (transB == 'N'));// Not for the products At*Bt myAssert(a->isFullMatrix() || b->isFullMatrix()); myAssert(!(a->isRkMatrix() || b->isRkMatrix())); FullMatrix *result = NULL; // The cases are: // - A H, B F // - A F, B H // - A F, B F if (a->isHMatrix() && b->isFullMatrix()) { result = HMatrix::multiplyHFull(transA, transB, a, b->data.m); strongAssert(result); } else if (a->isFullMatrix() && b->isHMatrix()) { result = HMatrix::multiplyFullH(transA, transB, a->data.m, b); strongAssert(result); } else if (a->isFullMatrix() && b->isFullMatrix()) { int aRows = ((transA == 'N')? a->data.m->rows : a->data.m->cols); int bCols = ((transB == 'N')? b->data.m->cols : b->data.m->rows); result = new FullMatrix(aRows, bCols); result->gemm(transA, transB, Constants::pone, a->data.m, b->data.m, Constants::zero); strongAssert(result); } else { // None of above, impossible strongAssert(false); } return result; } template void HMatrix::multiplyWithDiag(const HMatrix* d, bool left) { DECLARE_CONTEXT; multiplyWithDiagOrDiagInv(d, false, left); } template void HMatrix::multiplyWithDiagOrDiagInv(const HMatrix* d, bool inverse, bool left) { myAssert(*d->rows() == *d->cols()); myAssert(left || (*cols() == *d->rows())); myAssert(!left || (*rows() == *d->cols())); // The symmetric matrix must be taken into account: lower or upper if (isHMatrix()) { get(0,0)->multiplyWithDiagOrDiagInv(d->get(0,0), inverse, left); get(1,1)->multiplyWithDiagOrDiagInv(d->get(1,1), inverse, left); if (get(0, 1)) { get(0, 1)->multiplyWithDiagOrDiagInv(left ? d->get(0, 0) : d->get(1, 1), inverse, left); } if (get(1, 0)) { get(1, 0)->multiplyWithDiagOrDiagInv(left ? d->get(1, 1) : d->get(0, 0), inverse, left); } } else if (isRkMatrix()) { myAssert(!data.rk->a->isTriUpper && !data.rk->b->isTriUpper); myAssert(!data.rk->a->isTriLower && !data.rk->b->isTriLower); data.rk->multiplyWithDiagOrDiagInv(d, inverse, left); } else { myAssert(isFullMatrix()); if (d->isFullMatrix()) { data.m->multiplyWithDiagOrDiagInv(d->data.m->diagonal, inverse, left); } else { Vector diag(d->rows()->n); d->getDiag(&diag); data.m->multiplyWithDiagOrDiagInv(&diag, inverse, left); } } } template void HMatrix::transpose() { if (!isLeaf()) { if (isLower || isUpper) { // if the matrix is symmetric, inverting it(Upper/Lower) isLower = !isLower; isUpper = !isUpper; } if (isTriLower || isTriUpper) { // if the matrix is triangular, on inverting it (isTriUpper/isTriLower) isTriLower = !isTriLower; isTriUpper = !isTriUpper; } get(1, 1)->transpose(); get(0, 0)->transpose(); swap(children[1 + 0 * 2], children[0 + 1 * 2]); if (get(1, 0)) get(1, 0)->transpose(); if (get(0, 1)) get(0, 1)->transpose(); swap(data.rows, data.cols); } else { swap(data.rows, data.cols); if (isRkMatrix()) { // To transpose an Rk-matrix, simple exchange A and B : (AB^T)^T = (BA^T) swap(data.rk->a, data.rk->b); swap(data.rk->rows, data.rk->cols); } else if (isFullMatrix()) { myAssert(data.m->lda == data.m->rows); data.m->transpose(); } } } template void HMatrix::copyAndTranspose(const HMatrix* o) { myAssert(o); myAssert(*this->rows() == *o->cols()); myAssert(*this->cols() == *o->rows()); myAssert(isLeaf() == o->isLeaf()); if (isLeaf()) { if (o->isRkMatrix()) { myAssert(!isFullMatrix()); if (data.rk) { delete data.rk; } const RkMatrix* oRk = o->data.rk; FullMatrix* newA = oRk->b->copy(); FullMatrix* newB = oRk->a->copy(); data.rk = new RkMatrix(newA, oRk->cols, newB, oRk->rows, oRk->method); } else { myAssert(o->isFullMatrix()); if (data.m) { delete data.m; } const FullMatrix* oF = o->data.m; data.m = oF->copyAndTranspose(); if (oF->diagonal) { if (!data.m->diagonal) { data.m->diagonal = new Vector(oF->rows); strongAssert(data.m->diagonal); } memcpy(data.m->diagonal->v, oF->diagonal->v, oF->rows * sizeof(T)); } } } else { get(0, 0)->copyAndTranspose(o->get(0, 0)); get(1, 1)->copyAndTranspose(o->get(1, 1)); if (get(0, 1)) { get(0, 1)->copyAndTranspose(o->get(1, 0)); } if (get(1, 0)) { get(1, 0)->copyAndTranspose(o->get(0, 1)); } } } template const ClusterData* HMatrix::rows() const { return &(data.rows->data); } template const ClusterData* HMatrix::cols() const { return &(data.cols->data); } template void HMatrix::createPostcriptFile(const char* filename) const { PostscriptDumper dumper; dumper.write(this, filename); } template void HMatrix::dumpTreeToFile(const char* filename) const { ofstream file; const vector* points = rows()->points; int* indices = rows()->indices; string delimiter; file.open(filename); // Points file << "{" << endl << " \"points\": [" << endl; delimiter = ""; for (size_t i = 0; i < points->size(); i++) { file << " " << delimiter << "[" << (*points)[i].x << ", " << (*points)[i].y << ", " << (*points)[i].z << "]" << endl; delimiter = " ,"; } // Mapping file << " ]," << endl << " \"mapping\": [" << endl << " "; delimiter = ""; for (size_t i = 0; i < points->size(); i++) { file << delimiter << indices[i]; delimiter = " ,"; } file << "]," << endl; file << " \"tree\":" << endl; dumpSubTree(file, 0); file << "}" << endl; } template void HMatrix::dumpSubTree(ofstream& f, int depth) const { string prefix(" "); for (int i = 0; i < depth; i++) { prefix += " "; } f << prefix << "{\"isLeaf\": " << (isLeaf() ? "true" : "false") << "," << endl << prefix << " \"depth\": " << depth << "," << endl << prefix << " \"rows\": " << "{\"offset\": " << rows()->offset << ", \"n\": " << rows()->n << ", " << "\"boundingBox\": [[" << data.rows->boundingBox[0].x << ", " << data.rows->boundingBox[0].y << ", " << data.rows->boundingBox[0].z << "], [" << data.rows->boundingBox[1].x << ", " << data.rows->boundingBox[1].y << ", " << data.rows->boundingBox[1].z << "]]}," << endl << prefix << " \"cols\": " << "{\"offset\": " << cols()->offset << ", \"n\": " << cols()->n << ", " << "\"boundingBox\": [[" << data.cols->boundingBox[0].x << ", " << data.cols->boundingBox[0].y << ", " << data.cols->boundingBox[0].z << "], [" << data.cols->boundingBox[1].x << ", " << data.cols->boundingBox[1].y << ", " << data.cols->boundingBox[1].z << "]]},"; if (!isLeaf()) { f << endl << prefix << " \"children\": [" << endl; string delimiter(""); for (int i = 0; i < 4; i++) { const HMatrix* child = static_cast*>(getChild(i)); if (!child) continue; f << delimiter; child->dumpSubTree(f, depth + 1); f << endl; delimiter = ","; } f << prefix << " ]"; } else { // It's a leaf if (isFullMatrix()) { f << endl << prefix << " \"leaf_type\": \"Full\""; } else { myAssert(isRkMatrix()); f << endl << prefix << " \"leaf_type\": \"Rk\", \"k\": " << data.rk->k << ","; f << endl << prefix << " \"eta\": " << this->data.rows->getEta(this->data.cols) << ","; f << endl << prefix << " \"method\": " << this->data.rk->method; } } f << "}"; } template void HMatrix::copy(const HMatrix* o) { DECLARE_CONTEXT; myAssert(*rows() == *o->rows()); myAssert(*cols() == *o->cols()); isLower = o->isLower; isUpper = o->isUpper; isTriUpper = o->isTriUpper; isTriLower = o->isTriLower; if (isLeaf()) { myAssert(o->isLeaf()); if ((!data.m && !o->data.m) && (!data.rk && !o->data.rk)) { return; } // When the matrix has not allocated but only the structure if (o->isFullMatrix() && (!data.m)) { data.m = FullMatrix::Zero(o->data.m->rows, o->data.m->cols); } else if (o->isRkMatrix() && (!data.rk)) { data.rk = new RkMatrix(NULL, o->data.rk->rows, NULL, o->data.rk->cols, o->data.rk->method); } myAssert((isRkMatrix() == o->isRkMatrix()) && (isFullMatrix() == o->isFullMatrix())); if (o->isRkMatrix()) { data.rk->copy(o->data.rk); } else { myAssert(isFullMatrix()); if (o->data.m->diagonal) { if (!data.m->diagonal) { data.m->diagonal = new Vector(o->data.m->rows); strongAssert(data.m->diagonal); } memcpy(data.m->diagonal->v, o->data.m->diagonal->v, o->data.m->rows * sizeof(T)); } data.m->isTriLower = o->data.m->isTriLower; data.m->isTriUpper = o->data.m->isTriUpper; data.m->copyMatrixAtOffset(o->data.m, 0, 0); myAssert(data.m->m); } } else { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { if (o->get(i, j)) { myAssert(get(i, j)); get(i, j)->copy(o->get(i, j)); } else { myAssert(!get(i, j)); } } } } } template void HMatrix::clear() { if (isLeaf()) { if (isFullMatrix()) { data.m->clear(); } else { myAssert(isRkMatrix()); delete data.rk; data.rk = new RkMatrix(NULL, rows(), NULL, cols(), NoCompression); } } else { for (int i = 0; i < 4; i++) { HMatrix* child = static_cast*>(getChild(i)); if (child) child->clear(); } } } template void HMatrix::inverse(HMatrix* tmp, int depth) { DECLARE_CONTEXT; bool hasToFreeTmp = false; if (!tmp) { hasToFreeTmp = true; tmp = HMatrix::Zero(this); } myAssert(*(rows()) == *(tmp->rows())); myAssert(*(cols()) == *(tmp->cols())); if (this->isLeaf()) { myAssert(isFullMatrix()); data.m->inverse(); } else { myAssert(!tmp->isLeaf()); HMatrix* m11 = static_cast*>(getChild(0)); HMatrix* m21 = static_cast*>(getChild(1)); HMatrix* m12 = static_cast*>(getChild(2)); HMatrix* m22 = static_cast*>(getChild(3)); HMatrix* x11 = static_cast*>(tmp->getChild(0)); HMatrix* x21 = static_cast*>(tmp->getChild(1)); HMatrix* x12 = static_cast*>(tmp->getChild(2)); HMatrix* x22 = static_cast*>(tmp->getChild(3)); // cout << prefix << "X11 <- X11^-1" << endl; // Destroy x11 // X11 <- X11^-1 m11->inverse(x11, depth + 1); // cout << prefix << "X12 <- -M11^-1 M12" << endl; // X12 <- - M11^-1 M12 x12->gemm('N', 'N', Constants::mone, m11, m12, Constants::zero); // cout << prefix << "X21 <- M21 M11^-1" << endl; // X21 <- M21 * M11^-1 x21->gemm('N', 'N', Constants::pone, m21, m11, Constants::zero); // cout << prefix << "M22 <- M22 - M21 M11^-1 M12" << endl; // M22 <- M22 - M21 M11^-1 M12 = S m22->gemm('N', 'N', Constants::pone, m21, x12, Constants::pone); // cout << prefix << "M22 < S^-1" << endl; // M22 <- S^-1 // The contents of X22 is deleted m22->inverse(x22, depth + 1); // cout << prefix << "M12 <- -M11^-1 M12 S^-1" << endl; // M12 <- -M11^-1 M12 S^-1 m12->gemm('N', 'N', Constants::pone, x12, m22, Constants::zero); // cout << prefix << "M11 <- M11 + M11^-1 M12 S^-1 M21 M11^-1" << endl; // M11 <- M11 + M11^-1 M12 S^-1 M21 M11^-1 m11->gemm('N', 'N', Constants::mone, m12, x21, Constants::pone); // cout << prefix << "M21 <- S^-1 M21 M11^-1" << endl; // M21 <- -S^-1 M21 M11^-1 m21->gemm('N', 'N', Constants::mone, m22, x21, Constants::zero); } if (hasToFreeTmp) { delete tmp; } } template void HMatrix::solveLowerTriangular(HMatrix* b) const { DECLARE_CONTEXT; // At first, the recursion one (simple case) if (!isLeaf() && !b->isLeaf()) { // Forward substitution: // [ L11 | 0 ] [ X11 | X12 ] [ b11 | b12 ] // [ ----+---- ] * [-----+-----] = [ ----+---- ] // [ L21 | L22 ] [ X21 | X22 ] [ b21 | b22 ] // // L11 * X11 = b11 (by recursive forward substitution) // L11 * X12 = b12 (by recursive forward substitution) // L21 * X11 + L22 * X21 = b21 (forward substitution of L22*X21=b21-L21*X11) // L21 * X12 + L22 * X22 = b22 (forward substitution of L22*X22=b22-L21*X12) // const HMatrix* l11 = get(0, 0); const HMatrix* l21 = get(1, 0); const HMatrix* l22 = get(1, 1); HMatrix* b11 = b->get(0, 0); HMatrix* b21 = b->get(1, 0); HMatrix* b12 = b->get(0, 1); HMatrix* b22 = b->get(1, 1); l11->solveLowerTriangular(b11); l11->solveLowerTriangular(b12); b21->gemm('N', 'N', Constants::mone, l21, b11, Constants::pone); l22->solveLowerTriangular(b21); b22->gemm('N', 'N', Constants::mone, l21, b12, Constants::pone); l22->solveLowerTriangular(b22); } else { // if B is a leaf, the resolve is done by column if (b->isLeaf()) { if (b->isFullMatrix()) { this->solveLowerTriangular(b->data.m); } else { myAssert(b->isRkMatrix()); if (b->data.rk->k == 0) { return; } this->solveLowerTriangular(b->data.rk->a); } } else { // B isn't a leaf, then 'this' is one myAssert(this->isLeaf()); // Evaluate B, solve by column, and restore in the matrix // TODO: check if it's not too bad FullMatrix* bFull = new FullMatrix(b->rows()->n, b->cols()->n); b->evalPart(bFull, b->rows(), b->cols()); this->solveLowerTriangular(bFull); b->clear(); b->axpy(Constants::pone, bFull, b->rows(), b->cols()); delete bFull; } } } template void HMatrix::solveLowerTriangular(FullMatrix* b) const { DECLARE_CONTEXT; myAssert(*rows() == *cols()); myAssert(cols()->n == b->rows); // Here : the change : OK or not ??????? : cols <-> rows if (this->isLeaf()) { myAssert(this->isFullMatrix()); // LAPACK resolution this->data.m->solveLowerTriangular(b); } else { const HMatrix* l11 = get(0, 0); const HMatrix* l21 = get(1, 0); const HMatrix* l22 = get(1, 1); FullMatrix b1(b->m, l11->cols()->n, b->cols, b->lda); FullMatrix b2(b->m + l11->cols()->n, l22->cols()->n, b->cols, b->lda); l11->solveLowerTriangular(&b1); l21->gemv('N', Constants::mone, &b1, Constants::pone, &b2); l22->solveLowerTriangular(&b2); } } template void HMatrix::solveUpperTriangular(HMatrix* b, bool lowerStored) const { DECLARE_CONTEXT; myAssert(*b->cols() == *this->rows()); myAssert(*this->rows() == *this->cols()); myAssert(*b->cols() == *this->cols()); // The recursion one (simple case) if (!isLeaf() && !b->isLeaf()) { const HMatrix* u11 = get(0, 0); const HMatrix* u12 = lowerStored ? get(1, 0) : get(0, 1); const HMatrix* u22 = get(1, 1); HMatrix* b11 = b->get(0, 0); HMatrix* b21 = b->get(1, 0); HMatrix* b12 = b->get(0, 1); HMatrix* b22 = b->get(1, 1); u11->solveUpperTriangular(b11, lowerStored); u11->solveUpperTriangular(b21, lowerStored); // B12 <- -B11*U12 + B12 b12->gemm('N', lowerStored ? 'T' : 'N', Constants::mone, b11, u12, Constants::pone); // B12 <- the solution of X * U22 = B12 u22->solveUpperTriangular(b12, lowerStored); // B22 <- - B21*U12 + B22 b22->gemm('N', lowerStored ? 'T' : 'N', Constants::mone, b21, u12, Constants::pone); // B22 <- the solution of X*U22 = B22 u22->solveUpperTriangular(b22, lowerStored); } else { // if B is a leaf, the resolve is done by row if (b->isLeaf()) { if (b->isFullMatrix()) { b->data.m->transpose(); this->solveUpperTriangular(b->data.m, lowerStored); b->data.m->transpose(); } else { // Xa Xb^t U = Ba Bb^t // - Xa = Ba // - Xb^t U = Bb^t // Xb is stored without been transposed // it become again a resolution by column of Bb myAssert(b->isRkMatrix()); if (b->data.rk->k == 0) { return; } this->solveUpperTriangular(b->data.rk->b, lowerStored); } } else { // B is not a leaf, then so is L myAssert(isLeaf()); myAssert(isFullMatrix()); // Evaluate B, solve by column and restore all in the matrix // TODO: check if it's not too bad FullMatrix* bFull = new FullMatrix(b->rows()->n, b->cols()->n); b->evalPart(bFull, b->rows(), b->cols()); bFull->transpose(); this->solveUpperTriangular(bFull, lowerStored); bFull->transpose(); // int bRows = b->rows()->n; // int bCols = b->cols()->n; // Vector bRow(bCols); // for (int row = 0; row < bRows; row++) { // blasCopy(bCols, bFull->m + row, bRows, bRow.v, 1); // FullMatrix tmp(bRow.v, bRow.rows, 1); // this->solveUpperTriangular(&tmp); // blasCopy(bCols, bRow.v, 1, bFull->m + row, bRows); // } // } b->clear(); b->axpy(Constants::pone, bFull, b->rows(), b->cols()); delete bFull; } } } /* Resolve U.X=B, solution saved in B, with B Hmat */ template void HMatrix::solveUpperTriangularLeft(HMatrix* b) const { DECLARE_CONTEXT; // At first, the recursion one (simple case) if (!isLeaf() && !b->isLeaf()) { const HMatrix* u11 = get(0, 0); const HMatrix* u12 = get(0, 1); const HMatrix* u22 = get(1, 1); HMatrix* b11 = b->get(0, 0); HMatrix* b21 = b->get(1, 0); HMatrix* b12 = b->get(0, 1); HMatrix* b22 = b->get(1, 1); u22->solveUpperTriangularLeft(b21); u22->solveUpperTriangularLeft(b22); b12->gemm('N', 'N', Constants::mone, u12, b22, Constants::pone); u11->solveUpperTriangularLeft(b12); b11->gemm('N', 'N', Constants::mone, u12, b21, Constants::pone); u11->solveUpperTriangularLeft(b11); } else { // if B is a leaf, the resolve is done by column if (b->isLeaf()) { if (b->isFullMatrix()) { this->solveUpperTriangularLeft(b->data.m); } else { myAssert(b->isRkMatrix()); if (b->data.rk->k != 0) { this->solveUpperTriangularLeft(b->data.rk->a); } } } else { // B isn't a leaf, then so is L myAssert(this->isLeaf()); // Evaluate B, solve by column, and restore in the matrix // TODO: check if it's not too bad FullMatrix* bFull = new FullMatrix(b->rows()->n, b->cols()->n); b->evalPart(bFull, b->rows(), b->cols()); this->solveUpperTriangularLeft(bFull); b->clear(); b->axpy(Constants::pone, bFull, b->rows(), b->cols()); delete bFull; } } } template void HMatrix::solveUpperTriangular(FullMatrix* b, bool loweredStored) const { DECLARE_CONTEXT; myAssert(*rows() == *cols()); // B is supposed given in form of a row vector, but transposed // so we can deal it with a subset as usual. if (this->isLeaf()) { myAssert(this->isFullMatrix()); FullMatrix* bCopy = b->copyAndTranspose(); this->data.m->solveUpperTriangular(bCopy, loweredStored); bCopy->transpose(); b->copyMatrixAtOffset(bCopy, 0, 0); delete bCopy; } else { const HMatrix* u11 = get(0, 0); const HMatrix* u12 = loweredStored ? get(1, 0) : get(0, 1); const HMatrix* u22 = get(1, 1); FullMatrix b1(b->m, u11->rows()->n, b->cols, b->lda); FullMatrix b2(b->m + u11->rows()->n, u22->rows()->n, b->cols, b->lda); u11->solveUpperTriangular(&b1, loweredStored); // b2 <- -x1 U12 + b2 = -U12^t x1^t + b2 u12->gemv(loweredStored ? 'N' : 'T', Constants::mone, &b1, Constants::pone, &b2); u22->solveUpperTriangular(&b2, loweredStored); } } template void HMatrix::solveUpperTriangularLeft(FullMatrix* b, bool lowerStored) const { DECLARE_CONTEXT; myAssert(*rows() == *cols()); if (this->isLeaf()) { this->data.m->solveUpperTriangularLeft(b, lowerStored); } else { const HMatrix* u11 = get(0, 0); const HMatrix* u12 = (lowerStored ? get(1, 0) : get(0, 1)); const HMatrix* u22 = get(1, 1); FullMatrix b1(b->m, u11->cols()->n, b->cols, b->lda); FullMatrix b2(b->m + u11->cols()->n, u22->cols()->n, b->cols, b->lda); u22->solveUpperTriangularLeft(&b2, lowerStored); // b1 <- -U12 b2 + b1 u12->gemv(lowerStored ? 'T' : 'N', Constants::mone, &b2, Constants::pone, &b1); u11->solveUpperTriangularLeft(&b1, lowerStored); } } template void HMatrix::lltDecomposition() { // | | | | | | | | | // | h11 | h21 | | L1 | | | L1t | Lt | // |-----|-----| = |-----|-----| * |-----|-----| // | h21 | h22 | | L | L2 | | | L2t | // | | | | | | | | | // // h11 = L1 * L1t => L1 = h11.lltDecomposition // h21 = L*L1t => L = L1t.solve(h21) => trsm R with L1 lower stored // h22 = L*Lt + L2 * L2t => L2 = (h22 - L*Lt).lltDecomposition() // // #ifdef DEBUG_LDLT assertLower(); #endif if(isLeaf()) { data.m->lltDecomposition(); } else { strongAssert(isLower); strongAssert(!get(0,1)); HMatrix* h11 = get(0,0); HMatrix* h21 = get(1,0); HMatrix* h22 = get(1,1); h11->lltDecomposition(); h11->solveUpperTriangular(h21, true); h22->gemm('N', 'T', Constants::mone, h21, h21, Constants::pone); h22->lltDecomposition(); } isTriLower = true; } template void HMatrix::luDecomposition() { DECLARE_CONTEXT; if (isLeaf()) { myAssert(isFullMatrix()); data.m->luDecomposition(); data.m->checkNan(); } else { HMatrix* h11 = get(0, 0); HMatrix* h21 = get(1, 0); HMatrix* h12 = get(0, 1); HMatrix* h22 = get(1, 1); // H11 <- L11 * U11 h11->luDecomposition(); const HMatrix* l11 = h11; const HMatrix* u11 = h11; // Solve L11 U12 = H12 (get U12) l11->solveLowerTriangular(h12); // Solve L21 U11 = H21 (get L21) u11->solveUpperTriangular(h21); // H22 <- H22 - L21 U12 h22->gemm('N', 'N', Constants::mone, h21, h12, Constants::pone); h22->luDecomposition(); } } template void HMatrix::mdmtProduct(const HMatrix* m, const HMatrix* d) { DECLARE_CONTEXT; // this <- this - M * D * M^T // // D is stored separately in full matrix of diagonal leaves (see full_matrix.hpp). // this is symmetric and stored as lower triangular. // Warning: d must be the result of an ldlt factorization #ifdef DEBUG_LDLT assertLower(); #endif myAssert(*d->rows() == *d->cols()); // D is square myAssert(*this->rows() == *this->cols()); // this is square myAssert(*m->cols() == *d->rows()); // Check if we can have the produit M*D and D*M^T myAssert(*this->rows() == *m->rows()); if(!isLeaf()) { HMatrix* h11 = get(0,0); HMatrix* h21 = get(1,0); HMatrix* h22 = get(1,1); if (!m->isLeaf()) { HMatrix* m11 = m->get(0,0); HMatrix* m21 = m->get(1,0); HMatrix* m12 = m->get(0,1); HMatrix* m22 = m->get(1,1); HMatrix* d11 = d->get(0,0); HMatrix* d22 = d->get(1,1); h11->mdmtProduct(m11, d11); h11->mdmtProduct(m12, d22); HMatrix* x = Zero(m21); x->copy(m21); x->multiplyWithDiag(d11); h21->gemm('N', 'T', Constants::mone, x, m11, Constants::pone); delete x; HMatrix* y = Zero(m22); y->copy(m22); myAssert(*y->cols() == *d22->rows()); y->multiplyWithDiag(d22, false); h21->gemm('N', 'T', Constants::mone, y, m12, Constants::pone); delete y; h22->mdmtProduct(m21, d11); h22->mdmtProduct(m22, d22); } else if (m->isRkMatrix()) { HMatrix* m_copy = Zero(m); m_copy->copy(m); myAssert(*m->cols() == *d->rows()); myAssert(*m_copy->data.rk->cols == *d->rows()); m_copy->multiplyWithDiagOrDiagInv(d, false, false); // right multiplication by D RkMatrix* rkMat = RkMatrix::multiplyRkRk('N', 'T', m_copy->data.rk, m->data.rk); delete m_copy; this->axpy(Constants::mone, rkMat); delete rkMat; } else { myAssert(m->isFullMatrix()); HMatrix* copy_m = Zero(m); strongAssert(copy_m); copy_m->copy(m); copy_m->multiplyWithDiagOrDiagInv(d, false, false); // right multiplication by D FullMatrix* fullMat = HMatrix::multiplyFullMatrix('N', 'T', copy_m, m); strongAssert(fullMat); delete copy_m; this->axpy(Constants::mone, fullMat, rows(), cols()); delete fullMat; } } else { myAssert(isFullMatrix()); if (m->isRkMatrix()) { // this : full // m : rk // Strategy: compute mdm^T as FullMatrix and then do this<-this - mdm^T // 1) copy m = AB^T : m_copy // 2) m_copy <- m_copy * D (multiplyWithDiag) // 3) rkMat <- multiplyRkRk ( m_copy , m^T) // 4) fullMat <- evaluation as a FullMatrix of the product rkMat = (A*(D*B)^T) * (A*B^T)^T // 5) this <- this - fullMat HMatrix* m_copy = Zero(m); m_copy->copy(m); m_copy->multiplyWithDiagOrDiagInv(d, false, false); RkMatrix* rkMat = RkMatrix::multiplyRkRk('N', 'T', m_copy->data.rk, m->data.rk); FullMatrix* fullMat = rkMat->eval(); delete m_copy; delete rkMat; data.m->axpy(Constants::mone, fullMat); delete fullMat; } else if (m->isFullMatrix()) { // S <- S - M*D*M^T myAssert(!data.m->isTriUpper); myAssert(!data.m->isTriLower); myAssert(!m->data.m->isTriUpper); myAssert(!m->data.m->isTriLower); FullMatrix mTmp(m->data.m->rows, m->data.m->cols); mTmp.copyMatrixAtOffset(m->data.m, 0, 0); if (d->isFullMatrix()) { mTmp.multiplyWithDiagOrDiagInv(d->data.m->diagonal, false, false); } else { Vector diag(d->cols()->n); d->getDiag(&diag); mTmp.multiplyWithDiagOrDiagInv(&diag, false, false); } data.m->gemm('N', 'T', Constants::mone, &mTmp, m->data.m, Constants::pone); } } } #ifdef DEBUG_LDLT template bool HMatrix::assertLdlt() const { if (isLeaf()) { myAssert(this->isFullMatrix()); myAssert(data.m->diagonal); myAssert(data.m->isTriLower); return ((data.m->diagonal != NULL) && data.m->isTriLower); } else assert(isTriLower); return (get(0,0)->assertLdlt()) && (get(1,1)->assertLdlt()); } template void HMatrix::assertLower() { if (this->isLeaf()) { return; } else { myAssert(isLower); myAssert(!get(0,1)); get(0,0)->assertLower(); //isLower || get(0,0)->isLeaf()); get(1,1)->assertLower(); // isLower || get(1,1)->isLeaf()); } } template void HMatrix::assertUpper() { if (this->isLeaf()) { return; } else { myAssert(isUpper); myAssert(!get(1,0)); get(0,0)->assertUpper(); //isLower || get(0,0)->isLeaf()); get(1,1)->assertUpper(); // isLower || get(1,1)->isLeaf()); get(0,1)->assertAllocFull(); } } template void HMatrix::assertAllocFull() { if (this->isLeaf()) { if (isFullMatrix()) myAssert(data.m->m); } else { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) if (get(i,j)) get(i,j)->assertAllocFull(); } } #endif template void HMatrix::ldltDecomposition() { DECLARE_CONTEXT; #ifdef DEBUG_LDLT this->assertLower(); #endif if (isLeaf()) { //The basic case of the recursion is necessarily a full matrix leaf //since the recursion is done with *rows() == *cols(). myAssert(isFullMatrix()); this->data.m->ldltDecomposition(); myAssert(this->data.m->diagonal); } else { HMatrix* h11 = get(0,0); HMatrix* h21 = get(1,0); HMatrix* h22 = get(1,1); // H11 <- L11 and D11 is stored additionally to each diagonal leaf h11->ldltDecomposition(); #ifdef DEBUG_LDLT myAssert(h11->assertLdlt()); #endif HMatrix* y = Zero(h11); // H11 is lower triangular, therefore either upper or lower part is NULL y->copy(h11); // X <- copy(L11) // Y <- Y*D11 , D11 is stocked in h11 (in diagonal leaves) y->multiplyWithDiag(h11); // MultiplyWithDiag takes into account the fact that "y" is Upper or Lower // Y <- Y^T #ifdef DEBUG_LDLT y->assertLower(); #endif // The transpose function keeps the Upper or Lower matrix but // reverse storage. y->transpose(); // H21 <- solution of X*Y = H21, with Y = (L11 * D11)^T give L21 // stored in H21 myAssert(y->isUpper || y->isLeaf()); myAssert(!y->isLower); y->solveUpperTriangular(h21); #ifdef DEBUG_LDLT y->assertUpper(); y->assertAllocFull(); #endif delete y; // H22 <- H22 - L21 * D11 * L21^T // D11 is contained on the diagonal leaves (which are full) h22->mdmtProduct(h21, h11); #ifdef DEBUG_LDLT h22->assertAllocFull(); h22->assertLower(); #endif h22->ldltDecomposition(); } isTriLower = true; } template void HMatrix::solve(FullMatrix* b) const { DECLARE_CONTEXT; this->solveLowerTriangular(b); this->solveUpperTriangularLeft(b, false); } template void HMatrix::getDiag(Vector* diag, int start) const { DECLARE_CONTEXT; if(isLeaf()) { myAssert(isFullMatrix()); if(data.m->diagonal) { // LDLt memcpy(diag->v + start, data.m->diagonal->v, data.m->rows * sizeof(T)); } else { // LLt for (int i = 0; i < data.m->rows; ++i) diag->v[start + i] = data.m->m[i*data.m->rows + i]; } } else { get(0,0)->getDiag(diag, start); get(1,1)->getDiag(diag, start + get(0,0)->rows()->n); } } /* Solve M.X=B with M hmat LU factorized*/ template void HMatrix::solve(HMatrix* b) const { DECLARE_CONTEXT; /* Solve LX=B, result in B */ this->solveLowerTriangular(b); /* Solve UX=B, result in B */ this->solveUpperTriangularLeft(b); } template void HMatrix::solveDiagonal(FullMatrix* b) const { // Diagonal extraction Vector diag(cols()->n); getDiag(&diag); // TODO: use BLAS for that for (int j = 0; j < b->cols; j++) { for (int i = 0; i < b->rows; i++) { b->get(i, j) = b->get(i, j) / diag.v[i]; } } } template void HMatrix::solveLdlt(FullMatrix* b) const { DECLARE_CONTEXT; #ifdef DEBUG_LDLT assertLdlt(); #endif // L*D*L^T * X = B // B <- solution of L * Y = B : Y = D*L^T * X this->solveLowerTriangular(b); // B <- D^{-1} Y : solution of D*Y = B : Y = L^T * X this->solveDiagonal(b); // B <- solution of L^T X = B : the solution X we are looking for is stored in B this->solveUpperTriangularLeft(b, true); } template void HMatrix::checkNan() const { return; if (isLeaf()) { if (isFullMatrix()) { data.m->checkNan(); } if (isRkMatrix()) { data.rk->checkNan(); } } else { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { if (get(i, j)) { get(i, j)->checkNan(); } } } } } template void HMatrix::setTriLower(bool value) { isTriLower = value; if(!isLeaf()) { get(0, 0)->setTriLower(value); get(1, 1)->setTriLower(value); } } // Templates declaration template class HMatrix; template class HMatrix; template class HMatrix; template class HMatrix; template void reorderVector(FullMatrix* v, int* indices); template void reorderVector(FullMatrix* v, int* indices); template void reorderVector(FullMatrix* v, int* indices); template void reorderVector(FullMatrix* v, int* indices); template void restoreVectorOrder(FullMatrix* v, int* indices); template void restoreVectorOrder(FullMatrix* v, int* indices); template void restoreVectorOrder(FullMatrix* v, int* indices); template void restoreVectorOrder(FullMatrix* v, int* indices); hmat-oss-1.0.4/src/h_matrix.hpp000066400000000000000000000402021245574051100163640ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Implementation of the HMatrix class. */ #ifndef _H_MATRIX_HPP #define _H_MATRIX_HPP #include "tree.hpp" #include "interaction.hpp" #include "data_types.hpp" #include "full_matrix.hpp" #include "cluster_tree.hpp" #include #include #include template class Vector; template class RkMatrix; /** Flag used to describe the symmetry of a matrix. */ enum SymmetryFlag {kNotSymmetric, kLowerSymmetric}; /** Degrees of freedom permutation of a vector required in HMatrix context. In order that the subsets of rows and columns are contiguous in HMatrix, we must reorder the elements of the vector. This order is induced by the array of indices after the construction of ClusterTree, which must be passed as a parameter. \param v Vector to reorder. \param indices Array of indices after construction ClusterTree. */ template void reorderVector(FullMatrix* v, int* indices); /** Inverse permutation of a vector. See \a reorderVector () for more details. \param v Vector to reorder of the problem. \param indices Array of indices after construction ClusterTree. */ template void restoreVectorOrder(FullMatrix* v, int *indices); /*! \brief Data held by an HMatrix tree node. */ template class HMatrixData { public: /// Rows of this HMatrix block ClusterTree * rows; /// Columns of this HMatrix block ClusterTree * cols; /// Compressed block, or NULL if the block is not a leaf or is full. RkMatrix *rk; /// Full block, or NULL if the block is not a leaf or is compressed. FullMatrix *m; public: HMatrixData() : rows(NULL), cols(NULL), rk(NULL), m(NULL) {} HMatrixData(ClusterTree* _rows, ClusterTree* _cols) : rows(_rows), cols(_cols), rk(NULL), m(NULL) {} ~HMatrixData(); /*! \brief Return true if the block is admissible. */ bool isAdmissibleLeaf() const { return (rows->isAdmissibleWith(cols)); } }; /*! \brief The HMatrix class, representing a HMatrix. It is a tree of arity arity(ClusterTree)^2, 4 in this case. An HMatrix is a tree-like structure that is: - a Leaf : in this case the node is either really a RkMatrix (compressed block), or a small dense block. - an internal node : in this case, it has 4 children that form a partition of the HMatrix Dofs, and the node doesn't carry data itself. */ template class HMatrix : public Tree<4> { friend class RkMatrix; public: /*! \brief Create a HMatrix based on a row and column ClusterTree. \param _rows The row cluster tree \param _cols The column cluster tree */ HMatrix(ClusterTree* _rows, ClusterTree* _cols, SymmetryFlag symFlag = kNotSymmetric); /*! \brief HMatrix assembly. */ void assemble(const AssemblyFunction& f); /*! \brief HMatrix assembly. \param f the assembly function \param upper the upper part of the matrix. If NULL, it is assumed that upper=this (that is, the current block is on the diagonal) \param onlyLower if true, only assemble the lower part of the matrix, ie don't copy. */ void assembleSymmetric(const AssemblyFunction& f, HMatrix* upper=NULL, bool onlyLower=false); /*! \brief Evaluate the HMatrix, ie converts it to a full matrix. This conversion does the reorderng of the unknowns such that the resulting matrix can directly be used or compared with a full matrix assembled otherwise. \param result a FullMatrix that has to be preallocated at the same size than this. */ void eval(FullMatrix* result, bool renumber = true) const; /*! \brief Evaluate this as a subblock of the larger matrix result. _rows and _cols are the rows and columns of the result matrix. This has to be a subset of _rows and _cols, and it is put at its place inside the result matrix. This function does not do any reodering. \param result Result matrix, of size (_rows->n, _cols->n) \param _rows Rows of the result matrix \param _cols Columns of the result matrix */ void evalPart(FullMatrix* result, const ClusterData* _rows, const ClusterData* _cols) const; /*! Compute the compression ratio of the HMatrix. \return the pair (elements_stored, total_elements). */ std::pair compressionRatio() const; /** This *= alpha \param alpha scaling factor */ void scale(T alpha); /** Compute y <- alpha * op(A) * y + beta * y. The arguments are similar to BLAS GEMV. */ void gemv(char trans, T alpha, const Vector* x, T beta, Vector* y) const; /** Compute y <- alpha * op(A) * y + beta * y. The arguments are similar to BLAS GEMV. This version is in principle similar to GEMM(), but if you take enough drug, it is not the same thing at all. */ void gemv(char trans, T alpha, const FullMatrix* x, T beta, FullMatrix* y) const; /*! \brief this <- alpha * A * B + beta * C \param transA 'N' or 'T', as in BLAS \param transB 'N' or 'T', as in BLAS \param alpha alpha \param a the matrix A \param b the matrix B \param beta beta */ void gemm(char transA, char transB, T alpha, const HMatrix* a, const HMatrix*b, T beta, int depth=0); /*! \brief S <- S - M * D * M^T, where S is symmetric (Lower stored), D diagonal, this = S \warning D has to be reduced in ldlt form with d->ldltDecomposition() before \param m M \param d D : only the diagonal of this matrix is considered */ void mdmtProduct(const HMatrix * m, const HMatrix * d); /** Create a matrix filled with 0s, with the same structure as H. \param h the model matrix, \return a 0 matrix with the same structure as H. */ static HMatrix* Zero(const HMatrix* h); /** Create a matrix filled with 0s, based on 2 ClusterTree. \param rows the row ClusterTree. \param cols the column ClusterTree. \return a 0 HMatrix. */ static HMatrix* Zero(const ClusterTree* rows, const ClusterTree* cols); /*! \brief Create a Postscript file representing the HMatrix. The result .ps file shows the matrix structure and the compression ratio. In the output, red = full block, green = compressed. The darker the green, the worst the compression ration is. There is saturation at black when the block size is divided by less than 5. \param filename output filename. */ void createPostcriptFile(const char* filename) const; /*! \brief Dump some HMatrix metadata to a Python-readable file. This function create a file that is readable by Python's eval() function, which contains a dictionnary with the following data: {'points': [(x1, y1, z1), ...], 'mapping': [indices[0], indices[1], ...], 'tree': { 'isLeaf': False, 'depth': 0, 'rows': {'offset': 0, 'n': 15243, 'boundingBox': [(-0.0249617, -0.0249652, -0.0249586), (0.0962927, 0.0249652, 0.0249688)]}, 'cols': {'offset': 0, 'n': 15243, 'boundingBox': [(-0.0249617, -0.0249652, -0.0249586), (0.0962927, 0.0249652, 0.0249688)]}, 'children': [child1, child2, child3, child4] } } \param filename path to the output file. */ void dumpTreeToFile(const char* filename) const; /** this <- o (copy) \param o The HMatrix t copy */ void copy(const HMatrix* o); /** Copy the structure of an HMatrix without copying its content. \return an empty HMatrix (not even the Full leaves are allocated) mirroring the structure of this. */ HMatrix* copyStructure() const; /*! \brief Return the Frobenius norm of the matrix. */ double norm() const; /** Set a matrix to 0. */ void clear(); /** Inverse an HMatrix in place. \param tmp temporary HMatrix used in the inversion. If set, it must have the same structure as this. Otherwise, it is allocated at the start of the computation (and will be freed at the end). \param depth The depth, used for pretty printing purposes */ void inverse(HMatrix* tmp=NULL, int depth=0); /*! \brief Transpose the H-matrix in place */ void transpose(); /*! \brief this <- o^t \param o */ void copyAndTranspose(const HMatrix* o); /*! \brief LU decomposition in place. \warning Do not use. Doesn't work */ void luDecomposition(); /* \brief LDL^t decomposition in place \warning this has to be created with the flag lower \warning this has to be assembled with assembleSymmetric with onlyLower = true */ void ldltDecomposition(); void lltDecomposition(); private: /*! \brief Auxiliary function used by HMatrix::dumpTreeToFile(). */ void dumpSubTree(std::ofstream& f, int depth) const; public: /*! \brief Build a "fake" HMatrix for internal use only */ HMatrix(); /** This <- This + alpha * b \param alpha \param b */ void axpy(T alpha, const HMatrix* b); /** This <- This + alpha * b \param alpha \param b */ void axpy(T alpha, const RkMatrix* b); /** This <- This + alpha * b \param alpha \param b \param rows \param cols */ void axpy(T alpha, const FullMatrix* b, const ClusterData* rows, const ClusterData* cols); /*! Return true if this is a full block. */ inline bool isFullMatrix() const { return isLeaf() && data.m; }; /* Return the full matrix corresponding to the current leaf */ FullMatrix* getFullMatrix() const { assert(isFullMatrix()); return data.m; } /*! Return true if this is a compressed block. */ inline bool isRkMatrix() const { return isLeaf() && data.rk; }; /*! Return true if this is not a leaf. */ inline bool isHMatrix() const { return !isLeaf(); }; /*! \brief Return F * H (F Full, H divided) */ static FullMatrix* multiplyFullH(char transM, char transH, const FullMatrix* m, const HMatrix* h); /*! \brief Return H * F (F Full, H divided) */ static FullMatrix* multiplyHFull(char transH, char transM, const HMatrix* h, const FullMatrix* m); /*! \brief Multiplication de deux HMatrix dont au moins une est une RkMatrix. Le resultat est alors une RkMatrix. \param transA 'T' ou 'N' selon si A est transposee ou non \param transB 'T' ou 'N' selon si B est transposee ou non \param a A \param b B */ static RkMatrix* multiplyRkMatrix(char transA, char transB, const HMatrix* a, const HMatrix* b); /** Multiplication de deux HMatrix dont au moins une est une matrice pleine, et aucune n'est une RkMatrix. Le resultat est alors une matrice pleine. */ static FullMatrix* multiplyFullMatrix(char transA, char transB, const HMatrix* a, const HMatrix* b); /*! \brief B <- B*D : ou B = this et D est en argument \warning D doit avoir ete decomposee en LDL^T avant \param d matrice D */ void multiplyWithDiag(const HMatrix* d, bool left = false); /*! \brief B <- B*D ou B <- B*D^-1 (ou idem a gauche) : ou B = this et D est en argument \param d matrice D \param inverse true : B<-B*D^-1, false B<-B*D \param left true : B<-D*B, false B<-B*D */ void multiplyWithDiagOrDiagInv(const HMatrix* d, bool inverse, bool left = false); /*! \brief Resolution du systeme L X = B, avec this = L, et X = B. \param b la matrice B en entree, et X en sortie. */ void solveLowerTriangular(HMatrix* b) const; /*! \brief Resolution du systeme L x = x, avec this = L, et x = b vecteur. B est un vecteur a plusieurs colonnes, donc une FullMatrix. \param b Le vecteur b en entree, et x en sortie. */ void solveLowerTriangular(FullMatrix* b) const; /*! Resolution de X U = B, avec U = this, et X = B. \param b la matrice B en entree, X en sortie */ void solveUpperTriangular(HMatrix* b, bool loweredStored = false) const; /*! Resolution de U X = B, avec U = this, et X = B. \param b la matrice B en entree, X en sortie */ void solveUpperTriangularLeft(HMatrix* b) const; /*! Resolution de x U = b, avec U = this, et x = b. \warning b est un vecteur ligne et non colonne. \param b Le vecteur b en entree, x en sortie. */ void solveUpperTriangular(FullMatrix* b, bool loweredStored = false) const; /*! Resolution de U x = b, avec U = this, et x = b. U peut etre en fait L^T ou L est une matrice stockee inferieurement en precisant lowerStored = true \param b Le vecteur b en entree, x en sortie. \param indice les indices portes par le vecteur \param lowerStored indique le stockage de la matrice U ou L^T */ void solveUpperTriangularLeft(FullMatrix* b, bool lowerStored=false) const; /* Solve D x = b, in place with D a diagonal matrix. \param b Input: B, Output: X */ void solveDiagonal(FullMatrix* b) const; /*! Resolution de This * x = b. \warning This doit etre factorisee avec \a HMatrix::luDecomposition() avant. */ void solve(FullMatrix* b) const; /*! Resolution de This * X = b. \warning This doit etre factorisee avec \a HMatrix::luDecomposition() avant. */ void solve(HMatrix* b) const; /*! Resolution de This * x = b. \warning This doit etre factorisee avec \a HMatrix::ldltDecomposition() avant. */ void solveLdlt(FullMatrix* b) const ; /*! Triggers an assertion is the HMatrix contains any NaN. */ void checkNan() const; /** Recursively set the isTriLower flag on this matrix */ void setTriLower(bool value); public: const ClusterData* rows() const; const ClusterData* cols() const; /*! Return the child (i, j) of this. \warning do not use on a leaf ! \param i row \param j column \return the (i,j) child of this. */ inline HMatrix* get(int i, int j) const { return static_cast*>(getChild(i + j * 2)); } public: /// Should try to coarsen the matrix at assembly static bool coarsening; /// Should recompress the matrix after assembly static bool recompress; /// Validate the rk-matrices after compression static bool validateCompression; /// For blocks above error threshold, re-run the compression algorithm static bool validationReRun; /// For blocks above error threshold, dump the faulty block to disk static bool validationDump; /// Error threshold for the compression validation static double validationErrorThreshold; HMatrixData data; bool isUpper, isLower; /// symmetric, upper or lower stored bool isTriUpper, isTriLower; /// upper/lower triangular private: /* \brief Resolution de X * D = B, avec D = this (matrice dont on ne tient compte que de la diagonale) et B <- X \param b la HMatrix en entree qui contiendra la solution en sortie */ void getDiag(Vector* diag, int start=0) const; #ifdef DEBUG_LDLT /* \brief verifie que la matrice est bien Lower i.e. avec des fils NULL au-dessus */ void assertLower(); /* \brief verifie que la matrice est bien Upper i.e. avec des fils NULL au-dessous */ void assertUpper(); /* \brief verifie que toutes les feuilles full de la hmatrice sont bien allouees */ void assertAllocFull(); /* \brief verifie juste que les matrices diagonales full ont bien une "diagonale" et sont isTriLower */ bool assertLdlt() const; /* \brief fait le produit LDL^T pour verifier la decomposition Calcule la norme de L2 de deux matrices \param originalCopy hmatrice avant decomposition LDL^T */ void testLdlt(HMatrix * originalCopy) ; #endif }; #endif hmat-oss-1.0.4/src/hmat_cpp_interface.cpp000066400000000000000000000145541245574051100203720ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "hmat_cpp_interface.hpp" #include "h_matrix.hpp" #include "rk_matrix.hpp" #include "cluster_tree.hpp" #include "common/context.hpp" #include "disable_threading.hpp" // HMatInterface template class E> bool HMatInterface::initialized = false; template class E> int HMatInterface::init() { if (initialized) return 0; if (0 != E::init()) return 1; initialized = true; return 0; } template class E> void HMatInterface::finalize() { if (!initialized) return; initialized = false; E::finalize(); } template class E> HMatInterface::HMatInterface(ClusterTree* _rows, ClusterTree* _cols) : rows(_rows), cols(_cols) { DECLARE_CONTEXT; const HMatSettings& settings = HMatSettings::getInstance(); SymmetryFlag sym = (settings.useLdlt ? kLowerSymmetric : kNotSymmetric); engine.hmat = new HMatrix(rows, cols, sym); } template class E> HMatInterface::~HMatInterface() { const ClusterTree* rows = engine.hmat->data.rows; const ClusterTree* cols = engine.hmat->data.cols; bool deleteCols = !(rows == cols); engine.destroy(); delete engine.hmat; delete[] rows->data.indices; delete rows->data.points; delete rows; if (deleteCols) { delete[] cols->data.indices; delete cols->data.points; delete cols; } } template class E> HMatInterface::HMatInterface(HMatrix* h) : rows(h->data.rows), cols(h->data.cols), engine(h) {} //TODO remove the synchronize parameter which is parallel specific template class E> void HMatInterface::assemble(AssemblyFunction& f, SymmetryFlag sym, bool synchronize) { DISABLE_THREADING_IN_BLOCK; engine.assembly(f, sym, synchronize); } template class E> void HMatInterface::factorize() { DISABLE_THREADING_IN_BLOCK; engine.factorization(); } template class E> void HMatInterface::gemv(char trans, T alpha, FullMatrix& x, T beta, FullMatrix& y) const { DISABLE_THREADING_IN_BLOCK; reorderVector(&x, trans == 'N' ? engine.hmat->cols()->indices : engine.hmat->rows()->indices); reorderVector(&y, trans == 'N' ? engine.hmat->rows()->indices : engine.hmat->cols()->indices); engine.gemv(trans, alpha, x, beta, y); restoreVectorOrder(&x, trans == 'N' ? engine.hmat->cols()->indices : engine.hmat->rows()->indices); restoreVectorOrder(&y, trans == 'N' ? engine.hmat->rows()->indices : engine.hmat->cols()->indices); } template class E> void HMatInterface::gemm(char transA, char transB, T alpha, const HMatInterface* a, const HMatInterface* b, T beta) { DISABLE_THREADING_IN_BLOCK; engine.gemm(transA, transB, alpha, a->engine, b->engine, beta); } template class E> void HMatInterface::gemm(FullMatrix& c, char transA, char transB, T alpha, FullMatrix& a, const HMatInterface& b, T beta) { // C <- AB + C <=> C^t <- B^t A^t + C^t // On fait les operations dans ce sens pour etre dans le bon sens // pour la memoire, et pour reordonner correctement les "vecteurs" A // et C. if (transA == 'N') { a.transpose(); } c.transpose(); b.gemv(transB == 'N' ? 'T' : 'N', alpha, a, beta, c); c.transpose(); if (transA == 'N') { a.transpose(); } } template class E> void HMatInterface::solve(FullMatrix& b) const { DISABLE_THREADING_IN_BLOCK; reorderVector(&b, engine.hmat->cols()->indices); engine.solve(b); restoreVectorOrder(&b, engine.hmat->cols()->indices); } template class E> void HMatInterface::solve(HMatInterface& b) const { DISABLE_THREADING_IN_BLOCK; engine.solve(b.engine); } template class E> HMatInterface* HMatInterface::copy() const { ClusterTree* rowsCopy = rows->copy(); ClusterTree* colsCopy = (rows == cols ? rowsCopy : cols->copy()); HMatrix* hCopy = HMatrix::Zero(rowsCopy, colsCopy); hCopy->copy(engine.hmat); HMatInterface* result = new HMatInterface(hCopy); engine.copy(result->engine); return result; } template class E> void HMatInterface::transpose() { engine.hmat->transpose(); engine.transpose(); } template class E> double HMatInterface::norm() const { DISABLE_THREADING_IN_BLOCK; return engine.norm(); } template class E> void HMatInterface::scale(T alpha) { DISABLE_THREADING_IN_BLOCK; engine.hmat->scale(alpha); } template class E> std::pair HMatInterface::compressionRatio() const { return engine.hmat->compressionRatio(); } template class E> void HMatInterface::createPostcriptFile(const char* filename) const { engine.createPostcriptFile(filename); } template class E> void HMatInterface::dumpTreeToFile(const char* filename) const { engine.dumpTreeToFile(filename); } template class E> int HMatInterface::nodesCount() const { DISABLE_THREADING_IN_BLOCK; return engine.hmat->nodesCount(); } hmat-oss-1.0.4/src/hmat_cpp_interface.hpp000066400000000000000000000257411245574051100203770ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /** @file @ingroup HMatrix @brief C++ interface to the HMatrix library. */ #ifndef HMAT_CPP_INTERFACE_HPP #define HMAT_CPP_INTERFACE_HPP #include "hmat/hmat.h" #include "compression.hpp" #include "h_matrix.hpp" #include "default_engine.hpp" class ClusterTree; /** Type of ClusterTree */ enum ClusteringType {kGeometric, kMedian, kHybrid}; /** Settings for the HMatrix library. A single static instance of this class exist, but settings the values is not sufficient for the settings to take effect. One must call \a HMatSettings::setParameters(). */ class HMatSettings { public: double assemblyEpsilon; ///< Tolerance for the assembly. double recompressionEpsilon; ///< Tolerance for the recompression (using SVD) CompressionMethod compressionMethod; ///< Compression method /** \f$\eta\f$ in the admissiblity condition for two clusters \f$\sigma\f$ and \f$\tau\f$: \f[ \min(diam(\sigma), diam(\tau)) < \eta \cdot d(\sigma, \tau) \f] */ double admissibilityFactor; ClusteringType clustering; ///< Type of ClusterTree int maxLeafSize; ///< Maximum size of a leaf in a ClusterTree (and of a non-admissible block in an HMatrix) int maxParallelLeaves; ///< max(|L0|) int elementsPerBlock; ///< Maximum size of an admissible block. Should be size_t ! bool useLu; ///< Use an LU decomposition bool useLdlt; ///< Use an LDL^t decomposition if possible bool cholesky; bool coarsening; ///< Coarsen the matrix structure after assembly. bool recompress; ////< Recompress the matrix after assembly. bool validateCompression; ///< Validate the rk-matrices after compression bool validationReRun; ///< For blocks above error threshold, re-run the compression algorithm bool validationDump; ///< For blocks above error threshold, dump the faulty block to disk double validationErrorThreshold; ///< Error threshold for the compression validation private: /** This constructor sets the default values. */ HMatSettings() : assemblyEpsilon(1e-4), recompressionEpsilon(1e-4), compressionMethod(Svd), admissibilityFactor(2.), clustering(kMedian), maxLeafSize(100), maxParallelLeaves(5000), elementsPerBlock(2000000000), useLu(true), useLdlt(false), cholesky(false), coarsening(false), recompress(true), validateCompression(false), validationReRun(false), validationDump(false), validationErrorThreshold(0.) {} // Disable the copy. HMatSettings(const HMatSettings&); void operator=(const HMatSettings&); public: // Classic Singleton pattern. static HMatSettings& getInstance() { static HMatSettings instance; return instance; } /** Change the settings of the HMatrix library. This method has to be called for the settings to take effect. */ void setParameters() const; /** Output a textual representation of the settings to out. @param out The output stream, std::cout by default. */ void printSettings(std::ostream& out = std::cout) const; }; /** Create a ClusterTree. The exact type of the returned ClusterTree depends on the global HMatrix library settings (\a HMatSettings::clustering). Passing the returned ClusterTree pointer to the constructor of \a HMatInterface transfers its ownership to this instance. It is then automatically freed at the destrution of its owner. @note This is the only proper way to dispose of a ClusterTree instance. @param dls Array of DofCoordinate, of length n @param n number of elements in dls @return a ClusterTree instance. */ ClusterTree* createClusterTree(DofCoordinate* dls, int n); /** C++ interface to the HMatrix library. This is the sole entry point to the HMatrix library. This interface is templated over the scalar type, T. This type has to be one of {S_t, D_t, C_t, Z_t}, using the standard BLAS notation. For the complex types, the C++ complex<> type is used. It is guaranteed to have the same layout as the equivalent FORTRAN types. The user code *has* to call \a HMatInterface::init() before using any other function, and \a HMatInterface::finalize() at the end. */ template class E = DefaultEngine> class HMatInterface { private: static bool initialized; ///< True if the library has been initialized. public: ClusterTree* rows; ///< Row ClusterTree ClusterTree* cols; ///< Column ClusterTree private: E engine; public: /** Initialize the library. @warning This *must* be called before using the HMatrix library. */ static int init(); /** Finalize the library. @warning The library cannot be used after this has been called. */ static void finalize(); /** Build a new HMatrix from two cluster sets. @note The ownership of the two ClusterTree instances (which don't need to be different) is transfered to the returned HMatInterface instance, and will be disposed at destruction time. @param _rows The row ClusterTree instance, built with \a createClusterTree() @param _cols The column ClusterTree instance, built with \a createClusterTree() @return a new HMatInterface instance. */ HMatInterface(ClusterTree* _rows, ClusterTree* _cols); /** Destroy an HMatInterface instance. @note This destructor is *not* virtual, as this class is not meant to be subclassed. */ ~HMatInterface(); /** Assemble an HMatrix. This builds an HMatrix using a provided AssemblyFunction. The compression method is determined by \a HMatSettings::compressionMethod, and the tolerance by \a HMatSettings::assemblyEpsilon. A recompression is done when \a HMatSettings::recompress is true. @param f The assembly function used to compute various matrix sub-parts @param sym Set the symmetry of the HMatrix @param synchronize */ void assemble(AssemblyFunction& f, SymmetryFlag sym, bool synchronize=true); /** Compute a \f$LU\f$ or \f$LDL^T\f$ decomposition of the HMatrix, in place. An LDL^T decomposition is done if the HMatrix is symmetric and has been assembled as such (with sym = kLowerSymmetric in HMatInterface::assemble()), and if HMatSettings::useLdlt is true. Otherwise an LU decomposition is done. */ void factorize(); /** Matrix-Vector product. This computes \f$ y \gets \alpha . op(A) x + \beta y\f$, with A = this, x and y FullMatrix. If trans == 'N', then op(A) = A, if trans == 'T', then op(A) = A^T, as in BLAS. @param trans 'N' or 'T' @param alpha @param x @param beta @param y */ void gemv(char trans, T alpha, FullMatrix& x, T beta, FullMatrix& y) const; /** Matrix-Matrix product. This computes \f$ C \gets \alpha . op(A) \times op(B) + \beta C\f$ with A, B, C three HMatInterface instancem, and C = this. If trans* == 'N' then op(*) = *, if trans* == 'T', then op(*) = *^T, as in BLAS. @note transA == transB == 'T' is not supported. @param transA 'N' or 'T' @param transB 'N' or 'T' @param alpha @param a @param b @param beta */ void gemm(char transA, char transB, T alpha, const HMatInterface* a, const HMatInterface* b, T beta); /** Full <- Full x HMatrix product. This computes the product \f$ C_F \gets \alpha . op(A_F) \times op(B_H) + \beta C_F\f$, with \f$A_F\f$, \f$C_F\f$ two FullMatrix, and \f$B_H\f$ an HMatrixInterface instance. The meaning of the arguments is as in \a HMatInterface::gemm(), and in BLAS. */ static void gemm(FullMatrix& c, char transA, char transB, T alpha, FullMatrix& a, const HMatInterface& b, T beta); /** Return a new copy of this. */ HMatInterface* copy() const; /** Transpose this in place. */ void transpose(); /** Solve the system \f$A x = b\f$ in place, with A = this, and b a FullMatrix. @warning A has to be factored first with \a HMatInterface::factorize(). */ void solve(FullMatrix& b) const; /** Solve the system \f$A x = B\f$ in place, with A = this, and B a HMatInterface. @warning A has to be factored first with \a HMatInterface::factorize(). */ void solve(HMatInterface& b) const; /** Return an approximation of the Frobenius norm of this. */ double norm() const; /** this <- alpha * this */ void scale(T alpha); /** Return the compression ratio of the HMatrix. @note This is only meaningful once the HMatrix has been assembled. @return The pair (element_stored, total_element). */ std::pair compressionRatio() const; /** Create a Postscript file representing the HMatrix. The result .ps file shows the matrix structure and the compression ratio. In the output, red = full block, green = compressed. The darker the green, the worst the compression ration is. There is saturation at black when the block size is divided by less than 5. @param filename output filename. */ void createPostcriptFile(const char* filename) const; /*! \brief Dump some HMatrix metadata to a Python-readable file. This function create a file that is readable by Python's eval() function, which contains a dictionnary with the following data: {'points': [(x1, y1, z1), ...], 'mapping': [indices[0], indices[1], ...], 'tree': { 'isLeaf': False, 'depth': 0, 'rows': {'offset': 0, 'n': 15243, 'boundingBox': [(-0.0249617, -0.0249652, -0.0249586), (0.0962927, 0.0249652, 0.0249688)]}, 'cols': {'offset': 0, 'n': 15243, 'boundingBox': [(-0.0249617, -0.0249652, -0.0249586), (0.0962927, 0.0249652, 0.0249688)]}, 'children': [child1, child2, child3, child4] } } \param filename path to the output file. */ void dumpTreeToFile(const char* filename) const; /** Return the number of block cluster tree nodes. */ int nodesCount() const; typename E::Settings & engineSettings() { return engine.settings; } private: HMatInterface(HMatrix* h); private: /// Disallow the copy HMatInterface(const HMatInterface& o); }; #endif hmat-oss-1.0.4/src/interaction.cpp000066400000000000000000000167471245574051100171040ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /* Interactions between the elements of the matrix */ #include "interaction.hpp" #include "full_matrix.hpp" #include "common/context.hpp" #include "common/my_assert.h" #include template FullMatrix::dp>* SimpleAssemblyFunction::assemble(const ClusterData* rows, const ClusterData* cols) const { FullMatrix::dp>* result = new FullMatrix::dp>(rows->n, cols->n); for (int i = 0; i < rows->n; ++i) { int row = rows->indices[i + rows->offset]; for (int j = 0; j < cols->n; ++j) { int col = cols->indices[j + cols->offset]; result->get(i, j) = interaction(row, col); } } return result; } template Vector::dp>* SimpleAssemblyFunction::getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle) const { Vector::dp>* result = new Vector::dp>(cols->n); getRow(rows, cols, rowIndex, handle, result); return result; } template void SimpleAssemblyFunction::getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle, Vector::dp>* result) const { int row = rows->indices[rowIndex + rows->offset]; for (int j = 0; j < cols->n; j++) { int col = cols->indices[j + cols->offset]; result->v[j] = interaction(row, col); } } template Vector::dp>* SimpleAssemblyFunction::getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle) const { Vector::dp>* result = new Vector::dp>(rows->n); getCol(rows, cols, colIndex, handle, result); return result; } template void SimpleAssemblyFunction::getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle, Vector::dp>* result) const { int col = cols->indices[colIndex + cols->offset]; for (int i = 0; i < rows->n; i++) { int row = rows->indices[i + rows->offset]; result->v[i] = interaction(row, col); } } template BlockAssemblyFunction::BlockAssemblyFunction(const ClusterData* rowData, const ClusterData* colData, void* _user_context, prepare_func _prepare, compute_func _compute, release_func _free_data) : prepare(_prepare), compute(_compute), free_data(_free_data), user_context(_user_context) { rowMapping = rowData->indices; colMapping = colData->indices; rowReverseMapping = new int[rowData->n]; colReverseMapping = new int[colData->n]; for (int i = 0; i < (int) rowData->n; i++) { rowReverseMapping[rowMapping[i]] = i; } for (int i = 0; i < (int) colData->n; i++) { colReverseMapping[colMapping[i]] = i; } } template BlockAssemblyFunction::~BlockAssemblyFunction() { delete[] rowReverseMapping; delete[] colReverseMapping; } template typename Types::dp BlockAssemblyFunction::interaction(int i, int j) const { //TODO: FIXME strongAssert(false); return Constants::dp>::zero; } template FullMatrix::dp>* BlockAssemblyFunction::assemble(const ClusterData* rows, const ClusterData* cols) const { DECLARE_CONTEXT; FullMatrix::dp>* result = FullMatrix::dp>::Zero(rows->n, cols->n); void* data; prepare(rows->offset, rows->n, cols->offset, cols->n, rowMapping, rowReverseMapping, colMapping, colReverseMapping, user_context, &data); compute(data, 0, rows->n, 0, cols->n, (void*) result->m); free_data(data); return result; } template void BlockAssemblyFunction::prepareBlock(const ClusterData* rows, const ClusterData* cols, void** handle) const { prepare(rows->offset, rows->n, cols->offset, cols->n, rowMapping, rowReverseMapping, colMapping, colReverseMapping, user_context, handle); } template void BlockAssemblyFunction::releaseBlock(void* handle) const { free_data(handle); } template Vector::dp>* BlockAssemblyFunction::getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle) const { Vector::dp>* result = Vector::dp>::Zero(cols->n); strongAssert(result); getRow(rows, cols, rowIndex, handle, result); return result; } template void BlockAssemblyFunction::getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle, Vector::dp>* result) const { DECLARE_CONTEXT; myAssert(handle); compute(handle, rowIndex, 1, 0, cols->n, (void*) result->v); } template Vector::dp>* BlockAssemblyFunction::getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle) const { DECLARE_CONTEXT; Vector::dp>* result = Vector::dp>::Zero(rows->n); strongAssert(result); getCol(rows, cols, colIndex, handle, result); return result; } template void BlockAssemblyFunction::getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle, Vector::dp>* result) const { DECLARE_CONTEXT; myAssert(handle); compute(handle, 0, rows->n, colIndex, 1, (void*) result->v); // for (int i = 0; i < rows->n; i++) { // if (result->v[i] == Constants::zero) { // myAssert(false); // } // } } // Template declaration template class SimpleAssemblyFunction; template class SimpleAssemblyFunction; template class SimpleAssemblyFunction; template class SimpleAssemblyFunction; template class BlockAssemblyFunction; template class BlockAssemblyFunction; template class BlockAssemblyFunction; template class BlockAssemblyFunction; hmat-oss-1.0.4/src/interaction.hpp000066400000000000000000000201171245574051100170730ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _INTERACTION_HPP #define _INTERACTION_HPP #include #include "data_types.hpp" #include "hmat/hmat.h" class ClusterData; class ClusterTree; template class FullMatrix; template class Vector; /** Abstract base class representing an assembly function. */ template class AssemblyFunction { public: /** Return the element (i, j) of the matrix. This function has to ignore any mapping. */ virtual typename Types::dp interaction(int i, int j) const = 0; virtual ~AssemblyFunction() {}; virtual FullMatrix::dp>* assemble(const ClusterData* rows, const ClusterData* cols) const = 0; /*! \brief Prepare the Assembly function to optimize getRow() and getCol(). In some cases, it is more efficient to tell the client code that a given block is going to be assembled. The data needed to track this are stored in \a handle. \param rows the rows of the block \param cols the columns of the block \param handle The handle that is storing the associated data. */ virtual void prepareBlock(const ClusterData* rows, const ClusterData* cols, void** handle) const {} /*! \brief Release a block prepared with \a AssemblyFunction::releaseBlock(). \param handle the handle passed to \a AssemblyFunction::releaseBlock(). */ virtual void releaseBlock(void* handle) const {} /*! \brief Return a row of a matrix block. This functions returns a \a Vector representing the row of index \a rowIndex in the subblock defined by its \a rows and \a cols. \param rows the rows of the subblock \param cols the columns of the subblock \param rowIndex the row index in the subblock \param handle the optional handle created by \a AssemblyFunction::prepareBlock() \return the row as a \a Vector */ virtual Vector::dp>* getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle=NULL) const = 0; /*! \brief Return a row of a matrix block. This functions returns a \a Vector representing the row of index \a rowIndex in the subblock defined by its \a rows and \a cols. \param rows the rows of the subblock \param cols the columns of the subblock \param rowIndex the row index in the subblock \param handle the optional handle created by \a AssemblyFunction::prepareBlock() \param result the computed row \return the row as a \a Vector */ virtual void getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle, Vector::dp>* result) const = 0; /*! \brief Return a column of a matrix block. This functions returns a \a Vector representing the column of index \a rowIndex in the subblock defined by its \a rows and \a cols. \param rows the rows of the subblock \param cols the columns of the subblock \param colIndex the row index in the subblock \param handle the optional handle created by \a AssemblyFunction::prepareBlock() \return the column as a \a Vector */ virtual Vector::dp>* getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle=NULL) const = 0; /*! \brief Return a column of a matrix block. This functions returns a \a Vector representing the column of index \a rowIndex in the subblock defined by its \a rows and \a cols. \param rows the rows of the subblock \param cols the columns of the subblock \param colIndex the row index in the subblock \param handle the optional handle created by \a AssemblyFunction::prepareBlock() \param result the computed column \return the column as a \a Vector */ virtual void getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle, Vector::dp>* result) const = 0; }; /** Simple \a AssemblyFunction that allows to only redefine \a AssemblyFunction::interaction(). The rest of the function work by executing a trivial loop on \a SimpleAssemblyFunction::interaction(). */ template class SimpleAssemblyFunction : public AssemblyFunction { public: virtual typename Types::dp interaction(int i, int j) const = 0; virtual ~SimpleAssemblyFunction() {}; virtual FullMatrix::dp>* assemble(const ClusterData* rows, const ClusterData* cols) const; virtual Vector::dp>* getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle=NULL) const; virtual void getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle, Vector::dp>* result) const; virtual Vector::dp>* getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle=NULL) const; virtual void getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle, Vector::dp>* result) const; }; template class BlockAssemblyFunction : public AssemblyFunction { private: prepare_func prepare; compute_func compute; release_func free_data; void* user_context; int* rowMapping; int* rowReverseMapping; int* colMapping; int* colReverseMapping; public: BlockAssemblyFunction(const ClusterData* _rowData, const ClusterData* _colData, void* _user_context, prepare_func _prepare, compute_func _compute, release_func _free_data); ~BlockAssemblyFunction(); typename Types::dp interaction(int i, int j) const; FullMatrix::dp>* assemble(const ClusterData* rows, const ClusterData* cols) const; virtual void prepareBlock(const ClusterData* rows, const ClusterData* cols, void** handle) const; virtual void releaseBlock(void* handle) const; virtual Vector::dp>* getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle=NULL) const; virtual void getRow(const ClusterData* rows, const ClusterData* cols, int rowIndex, void* handle, Vector::dp>* result) const; virtual Vector::dp>* getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle=NULL) const; virtual void getCol(const ClusterData* rows, const ClusterData* cols, int colIndex, void* handle, Vector::dp>* result) const; }; #endif hmat-oss-1.0.4/src/lapack_exception.hpp000066400000000000000000000031021245574051100200600ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #pragma once #ifndef LAPACK_EXCEPTION_HPP #define LAPACK_EXCEPTION_HPP #include #include namespace hmat { class LapackException: public std::runtime_error { public: LapackException(const char * primitive, int info) : runtime_error("Lapack error"), primitive_(primitive), info_(info) {} const char * primitive() const { return primitive_; } int info() const { return info_; } virtual const char* what() const throw() { std::stringstream sstm; sstm << runtime_error::what() << " in "<< primitive_ << ", info=" << info_; return sstm.str().c_str(); } private: const char * primitive_; int info_; }; } #endif /* LAPACK_EXCEPTION_HPP */ hmat-oss-1.0.4/src/lapack_operations.cpp000066400000000000000000000234711245574051100202530ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include #include // memset #include #include "lapack_operations.hpp" #include "blas_overloads.hpp" #include "full_matrix.hpp" #include "common/context.hpp" #include "common/my_assert.h" #include "lapack_overloads.hpp" #include "blas_overloads.hpp" #include "lapack_exception.hpp" using namespace std; // Implementation template int svdCall(int order, char jobu, char jobv, int m, int n, T* a, int lda, double* sigma, T* u, int ldu, T* vt, int ldvt); template<> int svdCall(int order, char jobu, char jobv, int m, int n, S_t* a, int lda, double* sigma, S_t* u, int ldu, S_t* vt, int ldvt) { int result; int p = min(m, n); float* sigmaFloat = new float[p]; int workSize; S_t workSize_S; result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigmaFloat, u, ldu, vt, ldvt, &workSize_S, -1); if(result != 0) throw new hmat::LapackException("gesvd", result); workSize = (int) workSize_S + 1; S_t* work = new S_t[workSize]; strongAssert(work) ; result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigmaFloat, u, ldu, vt, ldvt, work, workSize); if(result != 0) throw new hmat::LapackException("gesvd", result); delete[] work; for (int i = 0; i < p; i++) { sigma[i] = sigmaFloat[i]; } delete[] sigmaFloat; return result; } template<> int svdCall(int order, char jobu, char jobv, int m, int n, D_t* a, int lda, double* sigma, D_t* u, int ldu, D_t* vt, int ldvt) { int workSize; D_t workSize_D; int result; // We request the right size for WORK result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigma, u, ldu, vt, ldvt, &workSize_D, -1); if(result != 0) throw new hmat::LapackException("gesvd", result); workSize = (int) workSize_D + 1; D_t* work = new D_t[workSize]; strongAssert(work) ; result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigma, u, ldu, vt, ldvt, work, workSize); if(result != 0) throw new hmat::LapackException("gesvd", result); delete[] work; return result; } template<> int svdCall(int order, char jobu, char jobv, int m, int n, C_t* a, int lda, double* sigma, C_t* u, int ldu, C_t* vt, int ldvt) { int result; int workSize; C_t workSize_C; int p = min(m, n); float* sigmaFloat = new float[p]; // We request the right size for WORK result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigmaFloat, u, ldu, vt, ldvt, &workSize_C, -1); if(result != 0) throw new hmat::LapackException("gesvd", result); workSize = (int) workSize_C.real() + 1; C_t* work = new C_t[workSize]; strongAssert(work) ; result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigmaFloat, u, ldu, vt, ldvt, work, workSize); if(result != 0) throw new hmat::LapackException("gesvd", result); delete[] work; for (int i = 0; i < p; i++) { sigma[i] = sigmaFloat[i]; } delete[] sigmaFloat; return result; } template<> int svdCall(int order, char jobu, char jobv, int m, int n, Z_t* a, int lda, double* sigma, Z_t* u, int ldu, Z_t* vt, int ldvt) { int result; int workSize; Z_t workSize_Z; // We request the right size for WORK result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigma, u, ldu, vt, ldvt, &workSize_Z, -1); if(result != 0) throw new hmat::LapackException("gesvd", result); workSize = (int) workSize_Z.real() + 1; Z_t* work = new Z_t[workSize]; strongAssert(work) ; result = proxy_lapack::gesvd(jobu, jobv, m, n, a, lda, sigma, u, ldu, vt, ldvt, work, workSize); if(result != 0) throw new hmat::LapackException("gesvd", result); delete[] work; return result; } template int truncatedSvd(FullMatrix* m, FullMatrix** u, Vector** sigma, FullMatrix** vt) { DECLARE_CONTEXT; // Allocate free space for U, S, V int rows = m->rows; int cols = m->cols; int p = min(rows, cols); *u = FullMatrix::Zero(rows, p); *sigma = Vector::Zero(p); *vt = FullMatrix::Zero(p, cols); myAssert(m->lda >= m->rows); char jobz = 'S'; int mm = rows; int n = cols; T* a = m->m; int lda = rows; int info; { const size_t _m = mm, _n = n; // Warning: These quantities are a rough approximation. // What's wrong with these estimates: // - Golub only gives 14 * M*N*N + 8 N*N*N // - This is for real numbers // - We assume the same number of * and + size_t adds = 7 * _m * _n * _n + 4 * _n * _n * _n; size_t muls = 7 * _m * _n * _n + 4 * _n * _n * _n; increment_flops(Multipliers::add * adds + Multipliers::mul * muls); } info = svdCall(0, jobz, jobz, mm, n, a, lda, (*sigma)->v, (*u)->m, (*u)->lda, (*vt)->m, (*vt)->lda); if (info) { cerr << "Erreur dans xGESVD: " << info << endl; } strongAssert(!info); return info; } // Explicit instantiations template int truncatedSvd(FullMatrix* m, FullMatrix** u, Vector** sigma, FullMatrix** vt); template int truncatedSvd(FullMatrix* m, FullMatrix** u, Vector** sigma, FullMatrix** vt); template int truncatedSvd(FullMatrix* m, FullMatrix** u, Vector** sigma, FullMatrix** vt); template int truncatedSvd(FullMatrix* m, FullMatrix** u, Vector** sigma, FullMatrix** vt); template T* qrDecomposition(FullMatrix* m) { DECLARE_CONTEXT; // SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) int rows = m->rows; int cols = m->cols; T* tau = (T*) calloc(min(rows, cols), sizeof(T)); { size_t mm = max(rows, cols); size_t n = min(rows, cols); size_t multiplications = mm * n * n - (n * n * n) / 3 + mm * n + (n * n) / 2 + (29 * n) / 6; size_t additions = mm * n * n + (n * n * n) / 3 + 2 * mm * n - (n * n) / 2 + (5 * n) / 6; increment_flops(Multipliers::mul * multiplications + Multipliers::add * additions); } int info; int workSize; T workSize_S; // int info = LAPACKE_sgeqrf(LAPACK_COL_MAJOR, rows, cols, m->m, rows, *tau); info = proxy_lapack::geqrf(rows, cols, m->m, rows, tau, &workSize_S, -1); strongAssert(!info); workSize = (int) hmat::real(workSize_S) + 1; T* work = new T[workSize]; strongAssert(work) ; info = proxy_lapack::geqrf(rows, cols, m->m, rows, tau, work, workSize); delete[] work; strongAssert(!info); return tau; } // templates declaration template S_t* qrDecomposition(FullMatrix* m); template D_t* qrDecomposition(FullMatrix* m); template C_t* qrDecomposition(FullMatrix* m); template Z_t* qrDecomposition(FullMatrix* m); template void myTrmm(FullMatrix* aFull, FullMatrix* bTri) { DECLARE_CONTEXT; int mm = aFull->rows; int n = aFull->rows; T alpha = Constants::pone; T *aData = bTri->m; int lda = bTri->rows; T *bData = aFull->m; int ldb = aFull->rows; { size_t m_ = mm; size_t nn = n; size_t multiplications = m_ * nn * (nn + 1) / 2; size_t additions = m_ * nn * (nn - 1) / 2; increment_flops(Multipliers::mul * multiplications + Multipliers::add * additions); } proxy_cblas::trmm('R', 'U', 'T', 'N', mm, n, alpha, aData, lda, bData, ldb); } // Explicit instantiations template void myTrmm(FullMatrix* aFull, FullMatrix* bTri); template void myTrmm(FullMatrix* aFull, FullMatrix* bTri); template void myTrmm(FullMatrix* aFull, FullMatrix* bTri); template void myTrmm(FullMatrix* aFull, FullMatrix* bTri); template int productQ(char side, char trans, FullMatrix* qr, T* tau, FullMatrix* c) { DECLARE_CONTEXT; int m = c->rows; int n = c->cols; int k = qr->cols; T* a = qr->m; myAssert((side == 'L') ? qr->rows == m : qr->rows == n); int ldq = qr->lda; int ldc = c->lda; int info; int workSize; T workSize_req; { size_t _m = m, _n = n, _k = k; size_t muls = 2 * _m * _n * _k - _n * _k * _k + 2 * _n * _k; size_t adds = 2 * _m * _n * _k - _n * _k * _k + _n * _k; increment_flops(Multipliers::mul * muls + Multipliers::add * adds); } info = proxy_lapack_convenience::or_un_mqr(side, trans, m, n, k, a, ldq, tau, c->m, ldc, &workSize_req, -1); strongAssert(!info); workSize = (int) hmat::real(workSize_req) + 1; T* work = new T[workSize]; strongAssert(work); info = proxy_lapack_convenience::or_un_mqr(side, trans, m, n, k, a, ldq, tau, c->m, ldc, work, workSize); strongAssert(!info); delete[] work; return 0; } // Explicit instantiations template int productQ(char side, char trans, FullMatrix* qr, S_t* tau, FullMatrix* c); template int productQ(char side, char trans, FullMatrix* qr, D_t* tau, FullMatrix* c); template int productQ(char side, char trans, FullMatrix* qr, C_t* tau, FullMatrix* c); template int productQ(char side, char trans, FullMatrix* qr, Z_t* tau, FullMatrix* c); hmat-oss-1.0.4/src/lapack_operations.hpp000066400000000000000000000044071245574051100202560ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /* Contient des definitions permettant d'acceder a certaines parties de LAPACK dans la MKL. */ #ifndef _LAPACK_OPERATIONS_HPP #define _LAPACK_OPERATIONS_HPP #include "data_types.hpp" #include "full_matrix.hpp" /** Makes an SVD with LAPACK in MKL. \param m \param u \param sigma \param vt \return */ template int truncatedSvd(FullMatrix* m, FullMatrix** u, Vector** sigma, FullMatrix** vt); /** QR matrix decomposition. Warning: m is modified! \param m \param tau \return */ template T* qrDecomposition(FullMatrix* m); /** Do the product by Q. qr has to be factored using \a qrDecomposition. The arguments side and trans have the same meaning as in the LAPACK xORMQR function. Beware, only the 'L', 'N' case has been tested ! \param side either 'L' or 'R', as in xORMQR \param trans either 'N' or 'T' as in xORMQR \param qr the matrix factored using \a qrDecomposition \param tau as created by \a qrDecomposition \param c as in xORMQR \return 0 for success */ template int productQ(char side, char trans, FullMatrix* qr, T* tau, FullMatrix* c); /** Multiplication used in RkMatrix::truncate() A B -> computing "AB^t" with A and B full upper triangular (non-unitary diagonal) */ template void myTrmm(FullMatrix* aFull, FullMatrix* bTri); #endif hmat-oss-1.0.4/src/lapack_overloads.hpp000066400000000000000000000461571245574051100201010ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /* Declarations of LAPACK functions used by hmat*/ #ifndef _LAPACK_OVERLOADS_HPP #define _LAPACK_OVERLOADS_HPP #include "data_types.hpp" #define F77_FUNC(a, b) a ##_ namespace proxy_lapack { #ifndef DOXYGEN_SHOULD_SKIP_THIS /* SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO )*/ /* SGETRF COMPUTES AN LU FACTORIZATION OF A GENERAL M-BY-N MATRIX A USING PARTIAL PIVOTING WITH ROW INTERCHANGES.*/ #define _SGETRF_ F77_FUNC(sgetrf,SGETRF) #define _DGETRF_ F77_FUNC(dgetrf,DGETRF) #define _CGETRF_ F77_FUNC(cgetrf,CGETRF) #define _ZGETRF_ F77_FUNC(zgetrf,ZGETRF) extern "C" { void _SGETRF_(int*, int*, S_t*, int*, int*, int*); void _DGETRF_(int*, int*, D_t*, int*, int*, int*); void _CGETRF_(int*, int*, C_t*, int*, int*, int*); void _ZGETRF_(int*, int*, Z_t*, int*, int*, int*); } template int getrf(int m, int n, T* a, int lda, int* ipiv); template <> inline int getrf(int m, int n, S_t* a, int lda, int* ipiv) { int info = 0; _SGETRF_(&m, &n, a, &lda, ipiv, &info); return info; } template <> inline int getrf(int m, int n, D_t* a, int lda, int* ipiv) { int info = 0; _DGETRF_(&m, &n, a, &lda, ipiv, &info); return info; } template <> inline int getrf(int m, int n, C_t* a, int lda, int* ipiv) { int info = 0; _CGETRF_(&m, &n, a, &lda, ipiv, &info); return info; } template <> inline int getrf(int m, int n, Z_t* a, int lda, int* ipiv) { int info = 0; _ZGETRF_(&m, &n, a, &lda, ipiv, &info); return info; } #undef _SGETRF_ #undef _DGETRF_ #undef _CGETRF_ #undef _ZGETRF_ /* SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )*/ /* SGETRI COMPUTES THE INVERSE OF A MATRIX USING THE LU FACTORIZATION COMPUTED BY SGETRF.*/ #define _SGETRI_ F77_FUNC(sgetri,SGETRI) #define _DGETRI_ F77_FUNC(dgetri,DGETRI) #define _CGETRI_ F77_FUNC(cgetri,CGETRI) #define _ZGETRI_ F77_FUNC(zgetri,ZGETRI) extern "C" void _SGETRI_(int*, S_t*, int*, const int*, S_t*, int*, int*); extern "C" void _DGETRI_(int*, D_t*, int*, const int*, D_t*, int*, int*); extern "C" void _CGETRI_(int*, C_t*, int*, const int*, C_t*, int*, int*); extern "C" void _ZGETRI_(int*, Z_t*, int*, const int*, Z_t*, int*, int*); template int getri(int n, T* a, int lda, const int* ipiv, T* work, int lwork); template<> inline int getri(int n, S_t* a, int lda, const int* ipiv, S_t* work, int lwork) { int info = 0; _SGETRI_(&n, a, &lda, ipiv, work, &lwork, &info); return info; } template<> inline int getri(int n, D_t* a, int lda, const int* ipiv, D_t* work, int lwork) { int info = 0; _DGETRI_(&n, a, &lda, ipiv, work, &lwork, &info); return info; } template<> inline int getri(int n, C_t* a, int lda, const int* ipiv, C_t* work, int lwork) { int info = 0; _CGETRI_(&n, a, &lda, ipiv, work, &lwork, &info); return info; } template<> inline int getri(int n, Z_t* a, int lda, const int* ipiv, Z_t* work, int lwork) { int info = 0; _ZGETRI_(&n, a, &lda, ipiv, work, &lwork, &info); return info; } #undef _SGETRI_ #undef _DGETRI_ #undef _CGETRI_ #undef _ZGETRI_ /* SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)*/ /* Solves a system of linear equations with an LU-factored square matrix, with multiple right-hand sides */ #define _SGETRS_ F77_FUNC(sgetrs,SGETRS) #define _DGETRS_ F77_FUNC(dgetrs,DGETRS) #define _CGETRS_ F77_FUNC(cgetrs,CGETRS) #define _ZGETRS_ F77_FUNC(zgetrs,ZGETRS) extern "C" void _SGETRS_(char*, int*, int*, const S_t*, int*, const int*, S_t*, int*, int*); extern "C" void _DGETRS_(char*, int*, int*, const D_t*, int*, const int*, D_t*, int*, int*); extern "C" void _CGETRS_(char*, int*, int*, const C_t*, int*, const int*, C_t*, int*, int*); extern "C" void _ZGETRS_(char*, int*, int*, const Z_t*, int*, const int*, Z_t*, int*, int*); template int getrs(char trans, int n, int nrhs, const T* a, int lda, const int* ipiv, T* b, int ldb); template<> inline int getrs(char trans, int n, int nrhs, const S_t* a, int lda, const int* ipiv, S_t* b, int ldb) { int info = 0; _SGETRS_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info); return info; } template<> inline int getrs(char trans, int n, int nrhs, const D_t* a, int lda, const int* ipiv, D_t* b, int ldb) { int info = 0; _DGETRS_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info); return info; } template<> inline int getrs(char trans, int n, int nrhs, const C_t* a, int lda, const int* ipiv, C_t* b, int ldb) { int info = 0; _CGETRS_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info); return info; } template<> inline int getrs(char trans, int n, int nrhs, const Z_t* a, int lda, const int* ipiv, Z_t* b, int ldb) { int info = 0; _ZGETRS_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info); return info; } #undef _SGETRS_ #undef _DGETRS_ #undef _CGETRS_ #undef _ZGETRS_ // SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) // ZGEQRF computes a QR factorization of a real M-by-N matrix A: // A = Q * R. #define _SGEQRF_ F77_FUNC(sgeqrf,SGEQRF) #define _DGEQRF_ F77_FUNC(dgeqrf,DGEQRF) #define _CGEQRF_ F77_FUNC(cgeqrf,CGEQRF) #define _ZGEQRF_ F77_FUNC(zgeqrf,ZGEQRF) extern "C" void _SGEQRF_(int*, int*, S_t*, int*, S_t*, S_t*, int*, int*); extern "C" void _DGEQRF_(int*, int*, D_t*, int*, D_t*, D_t*, int*, int*); extern "C" void _CGEQRF_(int*, int*, C_t*, int*, C_t*, C_t*, int*, int*); extern "C" void _ZGEQRF_(int*, int*, Z_t*, int*, Z_t*, Z_t*, int*, int*); template int geqrf(int m, int n, T* a, int lda, T* tau, T* work, int lwork); template<> inline int geqrf(int m, int n, S_t* a, int lda, S_t* tau, S_t* work, int lwork) { int info = 0; _SGEQRF_(&m, &n, a, &lda, tau, work, &lwork, &info); return info; } template<> inline int geqrf(int m, int n, D_t* a, int lda, D_t* tau, D_t* work, int lwork) { int info = 0; _DGEQRF_(&m, &n, a, &lda, tau, work, &lwork, &info); return info; } template<> inline int geqrf(int m, int n, C_t* a, int lda, C_t* tau, C_t* work, int lwork) { int info = 0; _CGEQRF_(&m, &n, a, &lda, tau, work, &lwork, &info); return info; } template<> inline int geqrf(int m, int n, Z_t* a, int lda, Z_t* tau, Z_t* work, int lwork) { int info = 0; _ZGEQRF_(&m, &n, a, &lda, tau, work, &lwork, &info); return info; } #undef _SGEQRF_ #undef _DGEQRF_ #undef _CGEQRF_ #undef _ZGEQRF_ /* SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,*/ /* WORK, LWORK, INFO ) */ /* ZGESVD computes the singular value decomposition (SVD) of a complex */ /* M-by-N matrix A, optionally computing the left and/or right singular */ /* vectors */ #define _SGESVD_ F77_FUNC(sgesvd,SGESVD) #define _DGESVD_ F77_FUNC(dgesvd,DGESVD) #define _CGESVD_ F77_FUNC(cgesvd,CGESVD) #define _ZGESVD_ F77_FUNC(zgesvd,ZGESVD) extern "C" void _SGESVD_(char*, char*, int*, int*, S_t*, int*, float*, S_t*, int*, S_t*, int*, S_t*, int*, int*); extern "C" void _DGESVD_(char*, char*, int*, int*, D_t*, int*, double*, D_t*, int*, D_t*, int*, D_t*, int*, int*); extern "C" void _CGESVD_(char*, char*, int*, int*, C_t*, int*, float*, C_t*, int*, C_t*, int*, C_t*, int*, float*, int*); extern "C" void _ZGESVD_(char*, char*, int*, int*, Z_t*, int*, double*, Z_t*, int*, Z_t*, int*, Z_t*, int*, double*, int*); template int gesvd(char jobu, char jobvt, int m, int n, T* a, int lda, Treal* s, T* u, int ldu, T* vt, int ldvt, T* work, int lwork); template<> inline int gesvd(char jobu, char jobvt, int m, int n, S_t* a, int lda, S_t* s, S_t* u, int ldu, S_t* vt, int ldvt, S_t* work, int lwork) { int info = 0; _SGESVD_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info); return info; } template<> inline int gesvd(char jobu, char jobvt, int m, int n, D_t* a, int lda, D_t* s, D_t* u, int ldu, D_t* vt, int ldvt, D_t* work, int lwork) { int info = 0; _DGESVD_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info); return info; } template<> inline int gesvd(char jobu, char jobvt, int m, int n, C_t* a, int lda, S_t* s, C_t* u, int ldu, C_t* vt, int ldvt, C_t* work, int lwork) { int info = 0; const int mn = (m < n ? m : n); S_t* rwork = (lwork == -1 ? NULL : new S_t[5 * mn]); _CGESVD_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info); if (rwork) delete [] rwork; return info; } template<> inline int gesvd(char jobu, char jobvt, int m, int n, Z_t* a, int lda, D_t* s, Z_t* u, int ldu, Z_t* vt, int ldvt, Z_t* work, int lwork) { int info = 0; const int mn = (m < n ? m : n); D_t* rwork = (lwork == -1 ? NULL : new D_t[5 * mn]); _ZGESVD_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info); if (rwork) delete [] rwork; return info; } #undef _SGESVD_ #undef _DGESVD_ #undef _CGESVD_ #undef _ZGESVD_ // SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) // SORGQR generates an M-by-N real matrix Q with orthonormal columns, // which is defined as the first N columns of a product of K elementary // reflectors of order M // // Q = H(1) H(2) . . . H(k) // // as returned by SGEQRF. #define _SORGQR_ F77_FUNC(sorgqr,SORGQR) #define _DORGQR_ F77_FUNC(dorgqr,DORGQR) extern "C" void _SORGQR_(int*, int*, int*, S_t*, int*, const S_t*, S_t*, int*, int*); extern "C" void _DORGQR_(int*, int*, int*, D_t*, int*, const D_t*, D_t*, int*, int*); template int orgqr(int m, int n, int k, T* a, int lda, const T* tau, T* work, int lwork); template<> inline int orgqr(int m, int n, int k, S_t* a, int lda, const S_t* tau, S_t* work, int lwork) { int info = 0; _SORGQR_(&m, &n, &k, a, &lda, tau, work, &lwork, &info); return info; } template<> inline int orgqr(int m, int n, int k, D_t* a, int lda, const D_t* tau, D_t* work, int lwork) { int info = 0; _DORGQR_(&m, &n, &k, a, &lda, tau, work, &lwork, &info); return info; } #undef _SORGQR_ #undef _DORGQR_ // SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) // CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, // which is defined as the first N columns of a product of K elementary // reflectors of order M // // Q = H(1) H(2) . . . H(k) // // as returned by CGEQRF. #define _CUNGQR_ F77_FUNC(cungqr,CUNGQR) #define _ZUNGQR_ F77_FUNC(zungqr,ZUNGQR) extern "C" void _CUNGQR_(int*, int*, int*, C_t*, int*, const C_t*, C_t*, int*, int*); extern "C" void _ZUNGQR_(int*, int*, int*, Z_t*, int*, const Z_t*, Z_t*, int*, int*); template int ungqr(int m, int n, int k, T* a, int lda, const T* tau, T* work, int lwork); template<> inline int ungqr(int m, int n, int k, C_t* a, int lda, const C_t* tau, C_t* work, int lwork) { int info = 0; _CUNGQR_(&m, &n, &k, a, &lda, tau, work, &lwork, &info); return info; } template<> inline int ungqr(int m, int n, int k, Z_t* a, int lda, const Z_t* tau, Z_t* work, int lwork) { int info = 0; _ZUNGQR_(&m, &n, &k, a, &lda, tau, work, &lwork, &info); return info; } #undef _CUNGQR_ #undef _ZUNGQR_ // SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, // $ WORK, LWORK, INFO ) // DORMQR overwrites the general real M-by-N matrix C with // // SIDE = 'L' SIDE = 'R' // TRANS = 'N': Q * C C * Q // TRANS = 'T': Q**T * C C * Q**T // // where Q is a real orthogonal matrix defined as the product of k // elementary reflectors // // Q = H(1) H(2) . . . H(k) // // as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N // if SIDE = 'R'. #define _SORMQR_ F77_FUNC(sormqr,SORMQR) #define _DORMQR_ F77_FUNC(dormqr,DORMQR) extern "C" void _SORMQR_(char*, char*, int*, int*, int*, const S_t*, int*, const S_t*, S_t*, int*, S_t*, int*, int*); extern "C" void _DORMQR_(char*, char*, int*, int*, int*, const D_t*, int*, const D_t*, D_t*, int*, D_t*, int*, int*); template int ormqr(char side, char trans, int m, int n, int k, const T* a, int lda, const T* tau, T* c, int ldc, T* work, int lwork); template<> inline int ormqr(char side, char trans, int m, int n, int k, const S_t* a, int lda, const S_t* tau, S_t* c, int ldc, S_t* work, int lwork) { int info = 0; _SORMQR_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); return info; } template<> inline int ormqr(char side, char trans, int m, int n, int k, const D_t* a, int lda, const D_t* tau, D_t* c, int ldc, D_t* work, int lwork) { int info = 0; _DORMQR_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); return info; } #undef _SORMQR_ #undef _DORMQR_ // SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, // $ WORK, LWORK, INFO ) // CUNMQR overwrites the general complex M-by-N matrix C with // // SIDE = 'L' SIDE = 'R' // TRANS = 'N': Q * C C * Q // TRANS = 'C': Q**H * C C * Q**H // // where Q is a complex unitary matrix defined as the product of k // elementary reflectors // // Q = H(1) H(2) . . . H(k) // // as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N // if SIDE = 'R'. #define _CUNMQR_ F77_FUNC(cunmqr,CUNMQR) #define _ZUNMQR_ F77_FUNC(zunmqr,ZUNMQR) extern "C" void _CUNMQR_(char*, char*, int*, int*, int*, const C_t*, int*, const C_t*, C_t*, int*, C_t*, int*, int*); extern "C" void _ZUNMQR_(char*, char*, int*, int*, int*, const Z_t*, int*, const Z_t*, Z_t*, int*, Z_t*, int*, int*); template int unmqr(char side, char trans, int m, int n, int k, const T* a, int lda, const T* tau, T* c, int ldc, T* work, int lwork); template<> inline int unmqr(char side, char trans, int m, int n, int k, const C_t* a, int lda, const C_t* tau, C_t* c, int ldc, C_t* work, int lwork) { int info = 0; _CUNMQR_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); return info; } template<> inline int unmqr(char side, char trans, int m, int n, int k, const Z_t* a, int lda, const Z_t* tau, Z_t* c, int ldc, Z_t* work, int lwork) { int info = 0; _ZUNMQR_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); return info; } #undef _CUNMQR_ #undef _ZUNMQR_ #define _SLASWP_ F77_FUNC(slaswp,SLASWP) #define _DLASWP_ F77_FUNC(dlaswp,DLASWP) #define _CLASWP_ F77_FUNC(claswp,CLASWP) #define _ZLASWP_ F77_FUNC(zlaswp,ZLASWP) extern "C" void _SLASWP_(int*, S_t*, int*, int*, int*, const int*, int*); extern "C" void _DLASWP_(int*, D_t*, int*, int*, int*, const int*, int*); extern "C" void _CLASWP_(int*, C_t*, int*, int*, int*, const int*, int*); extern "C" void _ZLASWP_(int*, Z_t*, int*, int*, int*, const int*, int*); template void laswp(int n, T* a, int lda, int k1, int k2, const int* ipiv, int incx); template<> inline void laswp(int n, S_t* a, int lda, int k1, int k2, const int* ipiv, int incx) { _SLASWP_(&n, a, &lda, &k1, &k2, ipiv, &incx); } template<> inline void laswp(int n, D_t* a, int lda, int k1, int k2, const int* ipiv, int incx) { _DLASWP_(&n, a, &lda, &k1, &k2, ipiv, &incx); } template<> inline void laswp(int n, C_t* a, int lda, int k1, int k2, const int* ipiv, int incx) { _CLASWP_(&n, a, &lda, &k1, &k2, ipiv, &incx); } template<> inline void laswp(int n, Z_t* a, int lda, int k1, int k2, const int* ipiv, int incx) { _ZLASWP_(&n, a, &lda, &k1, &k2, ipiv, &incx); } #undef _SLASWP_ #undef _DLASWP_ #undef _CLASWP_ #undef _ZLASWP_ #define _SPOTRF_ F77_FUNC(spotrf,SPOTRF) #define _DPOTRF_ F77_FUNC(dpotrf,DPOTRF) #define _CPOTRF_ F77_FUNC(cpotrf,CPOTRF) #define _ZPOTRF_ F77_FUNC(zpotrf,ZPOTRF) extern "C" { void _SPOTRF_(char * uplo, int* n, S_t* a, int* lda, int* info); void _DPOTRF_(char * uplo, int* n, D_t* a, int* lda, int* info); void _CPOTRF_(char * uplo, int* n, C_t* a, int* lda, int* info); void _ZPOTRF_(char * uplo, int* n, Z_t* a, int* lda, int* info); } template int potrf(char uplo, int n, T* a, int lda); template <> inline int potrf(char uplo, int n, S_t* a, int lda) { int info = 0; _SPOTRF_(&uplo, &n, a, &lda, &info); return info; } template <> inline int potrf(char uplo, int n, D_t* a, int lda) { int info = 0; _DPOTRF_(&uplo, &n, a, &lda, &info); return info; } template <> inline int potrf(char uplo, int n, C_t* a, int lda) { int info = 0; _CPOTRF_(&uplo, &n, a, &lda, &info); return info; } template <> inline int potrf(char uplo, int n, Z_t* a, int lda) { int info = 0; _ZPOTRF_(&uplo, &n, a, &lda, &info); return info; } #undef _SPOTRF_ #undef _DPOTRF_ #undef _CPOTRF_ #undef _ZPOTRF_ } // end namespace proxy_lapack // Some LAPACK routines only exist for real or complex matrices; for instance, // ORMQR is for real orthogonal matrices, and UNMQR for unitary complex matrices. namespace proxy_lapack_convenience { // WARNING: trans equals N or T for real matrices, but N or C for complex matrices. // or_un_mqr accepts N/T and replace T by C for complex matrices. template int or_un_mqr(char side, char trans, int m, int n, int k, const T* a, int lda, const T* tau, T* c, int ldc, T* work, int lwork); template<> inline int or_un_mqr(char side, char trans, int m, int n, int k, const S_t* a, int lda, const S_t* tau, S_t* c, int ldc, S_t* work, int lwork) { return proxy_lapack::ormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork); } template<> inline int or_un_mqr(char side, char trans, int m, int n, int k, const D_t* a, int lda, const D_t* tau, D_t* c, int ldc, D_t* work, int lwork) { return proxy_lapack::ormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork); } template<> inline int or_un_mqr(char side, char trans, int m, int n, int k, const C_t* a, int lda, const C_t* tau, C_t* c, int ldc, C_t* work, int lwork) { const char t = (trans == 'N' ? 'N' : 'C'); return proxy_lapack::unmqr(side, t, m, n, k, a, lda, tau, c, ldc, work, lwork); } template<> inline int or_un_mqr(char side, char trans, int m, int n, int k, const Z_t* a, int lda, const Z_t* tau, Z_t* c, int ldc, Z_t* work, int lwork) { const char t = (trans == 'N' ? 'N' : 'C'); return proxy_lapack::unmqr(side, t, m, n, k, a, lda, tau, c, ldc, work, lwork); } } // end namespace proxy_lapack_convenience namespace hmat { template double real(const T x); template<> inline double real(const C_t x) { return x.real(); } template<> inline double real(const Z_t x) { return x.real(); } template inline double real(const T x) { return x; } } // end namespace hmat #endif // DOXYGEN_SHOULD_SKIP_THIS #endif // _LAPACK_OVERLOADS_HPP hmat-oss-1.0.4/src/postscript.cpp000066400000000000000000000162751245574051100167730ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "postscript.hpp" #include "h_matrix.hpp" #include "rk_matrix.hpp" #include #include using namespace std; static double writeHeader(ofstream & file, int maxDim) { file << "%!" << endl << "% Fichier postscript representant une matrice" << endl; file << "/redrectangle {" << endl << " newpath" << endl << " moveto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " closepath" << endl << " 1 0 0 setrgbcolor" << endl << " fill" << endl << "} def" << endl << "/greenrectangle {" << endl << " newpath" << endl << " setrgbcolor" << endl << " moveto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " closepath" << endl << " fill" << endl << "} def" << endl << "" << endl << "/emptyrectangle {" << endl << " newpath" << endl << " setlinewidth" << endl << " 0 0 0 setrgbcolor" << endl << " moveto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " closepath" << endl << " stroke" << endl << "} def" << endl << endl << "/emptybluerectangle {" << endl << " newpath" << endl << " setlinewidth" << endl << " 0 0 255 setrgbcolor" << endl << " moveto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " rlineto" << endl << " closepath" << endl << " stroke" << endl << "} def" << endl << "/cross {" << endl << " newpath" << endl << "setlinewidth" << endl << " 0 0 0 setrgbcolor" << endl << " moveto" << endl << " rlineto" << endl << " moveto" << endl << " rlineto" << endl << " stroke" << endl << "} def" << endl << "/showrank {" << endl << " /data exch def" << endl << " /xfont exch def" << endl << " /Times-Roman findfont" << endl << " xfont scalefont" << endl << " setfont" << endl << " 0 0 0 setrgbcolor" << endl << " /y1 exch def" << endl << " /x1 exch def" << endl << " x1 y1 moveto" << endl << " data show" << endl << "} def" << endl << " " << endl; double scale = (612. / maxDim) * 0.95; file << scale << " " << scale << " scale" << endl; file << maxDim / 40. << " " << maxDim / 40. << " translate" << endl; return scale; } template void PostscriptDumper::write(const Tree<4> * tree, const char * filename) const { ofstream file; file.open(filename); const HMatrix * m = cast(tree); double scale = writeHeader(file, max(m->rows()->n, m->cols()->n)); recursiveDrawing(tree, file, 0, scale); file << "showpage" << endl; } template void PostscriptDumper::recursiveDrawing(const Tree<4> * tree, ofstream& f, int depth, double scale) const { if (!tree->isLeaf()) { for (int i = 0; i < 4; i++) { const Tree<4>* child = tree->getChild(i); if (child) { recursiveDrawing(child, f, depth + 1, scale); } } } const HMatrix * m = cast(tree); if (depth == 0) { int n = m->rows()->points->size(); int startX = m->cols()->offset; int lengthX = m->cols()->n; int startY = n - m->rows()->offset; int lengthY = -m->rows()->n; f << 0 << " "<< -lengthY << " " << -lengthX << " " << 0 << " " << 0 << " " << lengthY << " " << lengthX << " " << 0 << " " << startX << " " << startY; f << " " << 30 - depth << " emptyrectangle" << endl; } drawMatrix(tree, m, f, depth, scale); } template const HMatrix * PostscriptDumper::cast(const Tree<4> * tree) const { return static_cast *>(tree); } template void PostscriptDumper::drawMatrix(const Tree<4> * tree, const HMatrix * m, ofstream& f, int depth, double scale, bool cross) const { int n = m->rows()->points->size(); int startX = m->cols()->offset; int lengthX = m->cols()->n; int startY = n - m->rows()->offset; int lengthY = -m->rows()->n; if (m->isLeaf()) { if (m->isRkMatrix() && m->data.rk->a) { pair p = m->data.rk->compressionRatio(); double ratio = p.first / ((double) p.second); double color = 0; if (ratio < .20) { color = 1 - 5 * ratio; } f << 0 << " "<< -lengthY << " " << -lengthX << " " << 0 << " " << 0 << " " << lengthY << " " << lengthX << " " << 0 << " " << startX << " " << startY; f << " 0 " << color << " 0 " << " greenrectangle" << endl; f << startX << " " << startY + (lengthY * .95) << " " << -.7 * lengthY << " (" << m->data.rk->k << ") showrank" << endl; } else if (m->isFullMatrix()) { f << 0 << " "<< -lengthY << " " << -lengthX << " " << 0 << " " << 0 << " " << lengthY << " " << lengthX << " " << 0 << " " << startX << " " << startY; f << " redrectangle" << endl; } } else if(cross){ int n = m->rows()->points->size(); int startX = m->cols()->offset; int startY = m->rows()->offset; int colOffset = m->get(1, 1)->cols()->offset; int rowsCount = m->rows()->n; int rowOffset = m->get(1, 1)->rows()->offset; int colsCount = m->cols()->n; f << 0 << " " << -rowsCount << " " << colOffset << " " << n - startY << " " << colsCount << " " << 0 << " " << startX << " " << n - rowOffset << " " << 30 - depth << " cross" << endl; } } template class PostscriptDumper; template class PostscriptDumper; template class PostscriptDumper; template class PostscriptDumper; hmat-oss-1.0.4/src/postscript.hpp000066400000000000000000000034001245574051100167620ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _POSTSCRIPT_HPP #define _POSTSCRIPT_HPP #include "tree.hpp" #include template class HMatrix; /*! \brief Create a Postscript file representing the HMatrix. The result .ps file shows the matrix structure and the compression ratio. In the output, red = full block, green = compressed. The darker the green, the worst the compression ration is. There is saturation at black when the block size is divided by less than 5. */ template class PostscriptDumper { public: void write(const Tree<4> * tree, const char * filename) const; protected: virtual const HMatrix * cast(const Tree<4> * tree) const; virtual void drawMatrix(const Tree<4> * tree, const HMatrix *, std::ofstream& f, int depth, double scale, bool cross=true) const; private: void recursiveDrawing(const Tree<4> * tree, std::ofstream& f, int depth, double scale) const; }; #endif // _POSTSCRIPT_HPP hmat-oss-1.0.4/src/rk_matrix.cpp000066400000000000000000000565311245574051100165600ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #include "rk_matrix.hpp" #include "h_matrix.hpp" #include "cluster_tree.hpp" #include // memcpy #include // DBL_MAX #include "data_types.hpp" #include "lapack_operations.hpp" #include "common/context.hpp" #include "common/my_assert.h" using namespace std; /** RkApproximationControl */ template RkApproximationControl RkMatrix::approx; int RkApproximationControl::findK(double *sigma, int maxK, double epsilon) { // Control of approximation for fixed approx.k != 0 int newK = k; if (newK != 0) { newK = min(newK, maxK); } else { myAssert(epsilon >= 0.); double sumSingularValues = 0.; for (int i = 0; i < maxK; i++) { sumSingularValues += sigma[i]; } int i = 0; for (i = 0; i < maxK; i++) { if (sigma[i] <= epsilon * sumSingularValues) { break; } } newK = i; } return newK; } /** RkMatrix */ template RkMatrix::RkMatrix(FullMatrix* _a, const ClusterData* _rows, FullMatrix* _b, const ClusterData* _cols, CompressionMethod _method) : rows(_rows), cols(_cols), a(_a), b(_b), k(0), method(_method) { // We make a special case for empty matrices. if ((!a) && (!b)) { return; } k = _a->cols; myAssert((size_t) a->rows == rows->n); myAssert(a->cols == k); myAssert((size_t) b->rows == cols->n); myAssert(b->cols == k); } template RkMatrix::~RkMatrix() { if (a) { delete a; a = NULL; } if (b) { delete b; b = NULL; } } template FullMatrix* RkMatrix::eval() const { // Special case of the empty matrix, assimilated to the zero matrix. if (k == 0) { return FullMatrix::Zero(rows->n, cols->n); } FullMatrix* result = new FullMatrix(a->rows, b->rows); result->gemm('N', 'T', Constants::pone, a, b, Constants::zero); return result; } template void RkMatrix::scale(T alpha) { // We need just to scale the first matrix, A. if (a) { a->scale(alpha); } } template void RkMatrix::gemv(char trans, T alpha, const FullMatrix* x, T beta, FullMatrix* y) const { if (k == 0) { if (beta != Constants::pone) { y->scale(beta); } return; } if (trans == 'N') { FullMatrix z(b->cols, x->cols); z.gemm('T', 'N', Constants::pone, b, x, Constants::zero); y->gemm('N', 'N', alpha, a, &z, beta); } else { myAssert(trans == 'T'); FullMatrix z(a->cols, x->cols); z.gemm('T', 'N', Constants::pone, a, x, Constants::zero); y->gemm('N', 'N', alpha, b, &z, beta); } } template const RkMatrix* RkMatrix::subset(const ClusterData* subRows, const ClusterData* subCols) const { myAssert(subRows->isSubset(*rows)); myAssert(subCols->isSubset(*cols)); // The offset in the matrix, and not in all the indices size_t rowsOffset = subRows->offset - rows->offset; size_t colsOffset = subCols->offset - cols->offset; FullMatrix* subA = new FullMatrix(a->m + rowsOffset, subRows->n, k, a->lda); FullMatrix* subB = new FullMatrix(b->m + colsOffset, subCols->n, k, b->lda); return new RkMatrix(subA, subRows, subB, subCols, method); } template pair RkMatrix::compressionRatio() { pair result = pair(0, 0); result.first = rows->n * k + cols->n * k; result.second = rows->n * cols->n; return result; } template void RkMatrix::truncate() { DECLARE_CONTEXT; if (k == 0) { myAssert(!(a || b)); return; } myAssert(rows->n >= (size_t) k); // Case: more columns than one dimension of the matrix. // In this case, the calculation of the SVD of the matrix "R_a R_b^t" is more // expensive than computing the full SVD matrix. We make then a full matrix conversion, // and compress it with RkMatrix::fromMatrix(). // TODO: in this case, the epsilon of recompression is not respected if ((size_t) k > min(rows->n, cols->n)) { FullMatrix* tmp = eval(); RkMatrix* rk = compressMatrix(tmp, rows, cols); // "Move" rk into this, and delete the old "this". swap(*rk); delete rk; } /* To recompress an Rk-matrix to Rk-matrix, we need : - A = Q_a R_A (QR decomposition) - B = Q_b R_b (QR decomposition) - Calculate the SVD of R_a R_b^t = U S V^t - Make truncation U, S, and V in the same way as for compression of a full rank matrix, ie: - Restrict U to its newK first columns U_tilde - Restrict S to its newK first values (diagonal matrix) S_tilde - Restrict V to its newK first columns V_tilde - Put A = Q_a U_tilde SQRT (S_tilde) B = Q_b V_tilde SQRT(S_tilde) The sizes of the matrices are: - Qa : rows x k - Ra : k x k - Qb : cols x k - Rb : k x k So: - Ra Rb^t: k x k - U : k * k - S : k (diagonal) - V^t: k * k Hence: - newA: rows x newK - newB: cols x newK */ int ierr; // QR decomposition of A and B T* tauA = qrDecomposition(a); // A contient QaRa strongAssert(tauA); T* tauB = qrDecomposition(b); // B contient QbRb strongAssert(tauB); // Matrices created by the SVD FullMatrix *u = NULL, *vt = NULL; Vector *sigma = NULL; { // The scope is to automatically delete rAFull. // For computing Ra*Rb^t, we would like to use directly a BLAS function // because Ra and Rb are triangular matrices. // // However, there is no multiplication of two triangular matrices in BLAS, // left matrix must be converted into full matrix first. FullMatrix rAFull(k, k); for (int col = 0; col < k; col++) { for (int row = 0; row <= col; row++) { rAFull.get(row, col) = a->get(row, col); } } // Ra Rb^t myTrmm(&rAFull, b); // SVD of Ra Rb^t ierr = truncatedSvd(&rAFull, &u, &sigma, &vt); strongAssert(!ierr); } // Control of approximation int newK = approx.findK(sigma->v, k, approx.recompressionEpsilon); // We put the root of singular values in sigma for (int i = 0; i < k; i++) { sigma->v[i] = sqrt(sigma->v[i]); } // We need to calculate Qa * Utilde * SQRT (SigmaTilde) // For that we first calculated Utilde * SQRT (SigmaTilde) FullMatrix* newA = FullMatrix::Zero(rows->n, newK); for (int col = 0; col < newK; col++) { T alpha = sigma->v[col]; for (int row = 0; row < k; row++) { newA->get(row, col) = u->get(row, col) * alpha; } } delete u; u = NULL; // newA <- Qa * newA (et newA = Utilde * SQRT(SigmaTilde)) productQ('L', 'N', a, tauA, newA); free(tauA); // newB = Qb * VTilde * SQRT(SigmaTilde) FullMatrix* newB = FullMatrix::Zero(cols->n, newK); // Copy with transposing for (int col = 0; col < newK; col++) { T alpha = sigma->v[col]; for (int row = 0; row < k; row++) { newB->get(row, col) = vt->get(col, row) * alpha; } } delete vt; delete sigma; productQ('L', 'N', b, tauB, newB); free(tauB); delete a; a = newA; delete b; b = newB; k = newK; } // Swap members with members from another instance. // Since rows and cols are constant, they must not be swapped. template void RkMatrix::swap(RkMatrix& other) { myAssert(rows == other.rows); myAssert(cols == other.cols); std::swap(a, other.a); std::swap(b, other.b); std::swap(k, other.k); std::swap(method, other.method); } template void RkMatrix::axpy(T alpha, const FullMatrix* mat) { RkMatrix* tmp = formattedAddParts(&alpha, &mat, &rows, &cols, 1); swap(*tmp); delete tmp; } template void RkMatrix::axpy(T alpha, const RkMatrix* mat) { RkMatrix* tmp = formattedAddParts(&alpha, &mat, 1); swap(*tmp); delete tmp; } template RkMatrix* RkMatrix::formattedAdd(const RkMatrix* o) const { const RkMatrix* parts[1] = {o}; T alpha[1] = {Constants::pone}; return formattedAddParts(alpha, parts, 1); } template RkMatrix* RkMatrix::formattedAdd(const FullMatrix* o) const { const FullMatrix* parts[1] = {o}; const ClusterData* rowsList[1] = {rows}; const ClusterData* colsList[1] = {cols}; T alpha[1] = {Constants::pone}; return formattedAddParts(alpha, parts, rowsList, colsList, 1); } template RkMatrix* RkMatrix::formattedAddParts(T* alpha, const RkMatrix** parts, int n) const { DECLARE_CONTEXT; // If only one of the parts is non-zero, then the recompression is not necessary to // get exactly the same result. int notNullParts = (k == 0 ? 0 : 1); int kTotal = k; CompressionMethod minMethod = method; for (int i = 0; i < n; i++) { // Check that partial RkMatrix indices are subsets of their global indices set. // According to the indices organization, it is necessary to check that the indices // of the matrix are such that: // - parts[i].rows->offset >= rows->offset // - offset + n <= this.offset + this.n // - same for cols myAssert(parts[i]->rows->isSubset(*rows)); myAssert(parts[i]->cols->isSubset(*cols)); kTotal += parts[i]->k; minMethod = std::min(minMethod, parts[i]->method); if (parts[i]->k != 0) { notNullParts += 1; } } // In case the sum of the ranks of the sub-matrices is greater than // the matrix size, it is more efficient to put everything in a // full matrix. if ((size_t) kTotal >= min(rows->n, cols->n)) { const FullMatrix** fullParts = new const FullMatrix*[n]; const ClusterData** rowsParts = new const ClusterData*[n]; const ClusterData** colsParts = new const ClusterData*[n]; for (int i = 0; i < n; i++) { fullParts[i] = parts[i]->eval(); rowsParts[i] = parts[i]->rows; colsParts[i] = parts[i]->cols; } RkMatrix* result = formattedAddParts(alpha, fullParts, rowsParts, colsParts, n); for (int i = 0; i < n; i++) { delete fullParts[i]; } delete[] fullParts; delete[] rowsParts; delete[] colsParts; return result; } FullMatrix* resultA = new FullMatrix(rows->n, kTotal); resultA->clear(); FullMatrix* resultB = new FullMatrix(cols->n, kTotal); resultB->clear(); // Special case if the original matrix is empty. if (k > 0) { resultA->copyMatrixAtOffset(a, 0, 0); resultB->copyMatrixAtOffset(b, 0, 0); } // According to the indices organization, the sub-matrices are // contiguous blocks in the "big" matrix whose columns offset is // this-> k + parts [0] -> k + ... + parts [i - 1] -> k // and rows offset is // parts [i] -> rows-> offset - rows-> offset // for rows, and size // parts [i] -> rows-> nx parts [i] -> k (rows x columns) // Same for columns. int kOffset = k; for (int i = 0; i < n; i++) { size_t rowOffset = parts[i]->rows->offset - rows->offset; size_t rowCount = parts[i]->rows->n; size_t colCount = parts[i]->k; if ((rowCount == 0) || (colCount == 0)) { continue; } resultA->copyMatrixAtOffset(parts[i]->a, rowOffset, kOffset); // Scaling the matrix already in place if (alpha[i] != Constants::pone) { FullMatrix tmp(resultA->m + rowOffset + (kOffset * resultA->lda), parts[i]->a->rows, parts[i]->a->cols, resultA->lda); tmp.scale(alpha[i]); } rowOffset = parts[i]->cols->offset - cols->offset; resultB->copyMatrixAtOffset(parts[i]->b, rowOffset, kOffset); kOffset += parts[i]->k; } RkMatrix* rk = new RkMatrix(resultA, rows, resultB, cols, minMethod); if (notNullParts > 1) { rk->truncate(); } return rk; } template RkMatrix* RkMatrix::formattedAddParts(T* alpha, const FullMatrix** parts, const ClusterData **rowsList, const ClusterData **colsList, int n) const { DECLARE_CONTEXT; FullMatrix* me = eval(); strongAssert(me); for (int i = 0; i < n; i++) { myAssert(rowsList[i]->isSubset(*rows)); myAssert(colsList[i]->isSubset(*cols)); int rowOffset = rowsList[i]->offset - rows->offset; int colOffset = colsList[i]->offset - cols->offset; int maxCol = colsList[i]->n; int maxRow = rowsList[i]->n; for (int col = 0; col < maxCol; col++) { for (int row = 0; row < maxRow; row++) { me->get(row + rowOffset, col + colOffset) += alpha[i] * parts[i]->get(row, col); } } } RkMatrix* result = compressMatrix(me, rows, cols); delete me; return result; } template RkMatrix* RkMatrix::multiplyRkFull(char transR, char transM, const RkMatrix* rk, const FullMatrix* m, const ClusterData* mCols) { DECLARE_CONTEXT; myAssert((transR == 'N') || (transM == 'N'));// we do not manage the case R^T*M^T myAssert(((transR == 'N') ? rk->cols->n : rk->rows->n) == ((transM == 'N') ? m->rows : m->cols)); RkMatrix* rkCopy = (transR == 'N' ? new RkMatrix(rk->a, rk->rows, rk->b, rk->cols, rk->method) : new RkMatrix(rk->b, rk->cols, rk->a, rk->rows, rk->method)); FullMatrix* newB = new FullMatrix((transM == 'N')? m->cols : m->rows, rkCopy->b->cols); if (transM == 'N') { myAssert(m->rows == rkCopy->b->rows); myAssert(newB->rows == m->cols); myAssert(newB->cols == rkCopy->b->cols); newB->gemm('T', 'N', Constants::pone, m, rkCopy->b, Constants::zero); } else { myAssert(m->cols == rkCopy->b->rows); myAssert(newB->rows == m->rows); myAssert(newB->cols == rkCopy->b->cols); newB->gemm('N','N',Constants::pone, m, rkCopy->b, Constants::zero); } FullMatrix* newA = rkCopy->a->copy(); RkMatrix* result = new RkMatrix(newA, rkCopy->rows, newB, mCols, rkCopy->method); rkCopy->a = NULL; rkCopy->b = NULL; delete rkCopy; /* R M = A B^t M = A (B^t M) = A (M^t B)^t */ // MatrixXd b((rk->b.transpose() * (*m)).transpose()); return result; } template RkMatrix* RkMatrix::multiplyFullRk(char transM, char transR, const FullMatrix* m, const RkMatrix* rk, const ClusterData* mRows) { DECLARE_CONTEXT; myAssert((transR == 'N') || (transM == 'N')); // we do not manage the case R^T*M^T FullMatrix* a = rk->a; FullMatrix* b = rk->b; if (transR == 'T') { // permutation to transpose the matrix Rk std::swap(a, b); } const ClusterData *rkCols = ((transR == 'N')? rk->cols : rk->rows); /* M R = M (A B^t) = (MA) B^t */ myAssert(((transM == 'N') ? m->rows : m->cols) == mRows->n); FullMatrix* newA = new FullMatrix((transM == 'N')? m->rows:m->cols,(transR == 'N')? a->cols:b->cols); if (transM == 'N') { newA->gemm('N', 'N', Constants::pone, m, a, Constants::zero); } else { newA->gemm('T', 'N',Constants::pone, m, a, Constants::zero); } FullMatrix* newB = b->copy(); return new RkMatrix(newA, mRows, newB, rkCols, rk->method); } template RkMatrix* RkMatrix::multiplyRkH(char transRk, char transH, const RkMatrix* rk, const HMatrix* h) { DECLARE_CONTEXT; myAssert((transRk == 'N') || (transH == 'N')); // we do not manage the case R^T*M^T myAssert(((transRk == 'N') ? *rk->cols : *rk->rows) == ((transH == 'N')? *h->rows() : *h->cols())); FullMatrix* a = (transRk == 'N')? rk->a : rk->b; FullMatrix* b = (transRk == 'N')? rk->b : rk->a; const ClusterData* rkRows = ((transRk == 'N')? rk->rows : rk->cols); // R M = A (M^t B)^t // Size of the HMatrix is n x m, with // n = h.data.bt->data.first->data.n // m =h.data.bt->data.second->data.n // So H^t size is m x n and the product is m x cols(B) // and the number of columns of B is k. int p = rk->k; myAssert(b->cols == p); FullMatrix* resB = new FullMatrix(transH == 'N' ? h->cols()->n : h->rows()->n, p); resB->clear(); h->gemv(transH == 'N' ? 'T' : 'N', Constants::pone, b, Constants::zero, resB); FullMatrix* newA = a->copy(); const ClusterData *newCols = ((transH == 'N' )? h->cols() : h->rows()); return new RkMatrix(newA, rkRows, resB, newCols, rk->method); } template RkMatrix* RkMatrix::multiplyHRk(char transH, char transR, const HMatrix* h, const RkMatrix* rk) { DECLARE_CONTEXT; if (rk->k == 0) { const ClusterData* newRows = ((transH == 'N') ? h-> rows() : h->cols()); const ClusterData* newCols = ((transR == 'N') ? rk->cols : rk->rows); return new RkMatrix(NULL, newRows, NULL, newCols, rk->method); } // M R = (M A) B^t // The size of the HMatrix is n x m // m = h.data.bt->data.second->data.n // Therefore the product is n x cols(A) // and the number of columns of A is k. myAssert((transR == 'N') || (transH == 'N')); // we do not manage the case of product transposee*transposee FullMatrix* a = rk->a; FullMatrix* b = rk->b; if (transR == 'T') { // permutation of a and b to transpose the matrix Rk std::swap(a, b); } const ClusterData *rkCols = ((transR == 'N' )? rk->cols : rk->rows); int n = ((transH == 'N')? h->rows()->n : h->cols()->n); myAssert(a->cols == rk->k); int p = rk->k; FullMatrix* resA = new FullMatrix(n, p); resA->clear(); h->gemv(transH, Constants::pone, a, Constants::zero, resA); FullMatrix* newB = b->copy(); const ClusterData* newRows = ((transH == 'N')? h-> rows() : h->cols()); // If this base been transposed earlier, back in the right direction. if (transR == 'T') { std::swap(a, b); } return new RkMatrix(resA, newRows, newB, rkCols, rk->method); } template RkMatrix* RkMatrix::multiplyRkRk(char transA, char transB, const RkMatrix* a, const RkMatrix* b) { DECLARE_CONTEXT; myAssert(((transA == 'N') ? *a->cols : *a->rows) == ((transB == 'N') ? *b->rows : *b->cols)); // It is possible to do the computation differently, yielding a // different rank and a different amount of computation. // TODO: choose the best order. FullMatrix* Aa = (transA == 'N' ? a->a : a->b); FullMatrix* Ab = (transA == 'N' ? a->b : a->a); FullMatrix* Ba = (transB == 'N' ? b->a : b->b); FullMatrix* Bb = (transB == 'N' ? b->b : b->a); myAssert(Ab->rows == Ba->rows); // compatibility of the multiplication FullMatrix* tmp = new FullMatrix(a->k, b->k); FullMatrix* newA = new FullMatrix(Aa->rows, b->k); myAssert(tmp->rows == Ab->cols); myAssert(tmp->cols == Ba->cols); myAssert(Ab->rows == Ba->rows); tmp->gemm('T', 'N', Constants::pone, Ab, Ba, Constants::zero); myAssert(Ab->cols == tmp->rows); newA->gemm('N', 'N', Constants::pone, Aa, tmp, Constants::zero); delete tmp; FullMatrix* newB = Bb->copy(); CompressionMethod combined = std::min(a->method, b->method); return new RkMatrix(newA, ((transA == 'N') ? a->rows : a->cols), newB, ((transB == 'N') ? b->cols : b->rows), combined); } template void RkMatrix::multiplyWithDiagOrDiagInv(const HMatrix * d, bool inverse, bool left) { myAssert(*d->rows() == *d->cols()); myAssert(!left || (*rows == *d->cols())); myAssert(left || (*cols == *d->rows())); // extracting the diagonal Vector diag(d->cols()->n); d->getDiag(&diag, 0); if (inverse) { for (int i = 0; i < d->cols()->n; i++) { diag.v[i] = Constants::pone / diag.v[i]; } } // left multiplication by d of b (if M<-M*D : left = false) or a (if M<-D*M : left = true) FullMatrix* aOrB = (left ? a : b); for (int j = 0; j < k; j++) { for (int i = 0; i < aOrB->rows; i++) { aOrB->get(i, j) *= diag.v[i]; } } } template void RkMatrix::gemmRk(char transHA, char transHB, T alpha, const HMatrix* ha, const HMatrix* hb, T beta) { DECLARE_CONTEXT; // TODO: remove this limitation, if needed. myAssert(beta == Constants::pone); if (!(ha->isLeaf() || hb->isLeaf())) { RkMatrix* subRks[2 * 2]; for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { const ClusterData* subRows = (transHA == 'N' ? ha->get(i, j)->rows() : ha->get(j, i)->cols()); const ClusterData* subCols = (transHB == 'N' ? hb->get(i, j)->cols() : hb->get(j, i)->rows()); subRks[i + j * 2] = new RkMatrix(NULL, subRows, NULL, subCols, NoCompression); for (int k = 0; k < 2; k++) { // C_ij = A_ik * B_kj const HMatrix* a_ik = (transHA == 'N' ? ha->get(i, k) : ha->get(k, i)); const HMatrix* b_kj = (transHB == 'N' ? hb->get(k, j) : hb->get(j, k)); subRks[i + j * 2]->gemmRk(transHA, transHB, alpha, a_ik, b_kj, beta); } } } // Reconstruction of C by adding the parts T alpha[4] = {Constants::pone, Constants::pone, Constants::pone, Constants::pone}; RkMatrix* rk = formattedAddParts(alpha, (const RkMatrix**) subRks, 2 * 2); swap(*rk); for (int i = 0; i < 4; i++) { delete subRks[i]; } delete rk; } else { RkMatrix* rk = NULL; // One of the product matrix is a leaf if (ha->isRkMatrix() || hb->isRkMatrix()) { if ((ha->isRkMatrix() && ha->data.rk->k == 0) || (hb->isRkMatrix() && hb->data.rk->k == 0)) { return; } rk = HMatrix::multiplyRkMatrix(transHA, transHB, ha, hb); } else { myAssert(ha->isFullMatrix() || hb->isFullMatrix()); FullMatrix* fullMat = HMatrix::multiplyFullMatrix(transHA, transHB, ha, hb); rk = compressMatrix(fullMat, (transHA == 'N' ? ha->rows() : ha->cols()), (transHB == 'N' ? hb->cols() : hb->rows())); delete fullMat; } axpy(alpha, rk); delete rk; } } template void RkMatrix::copy(RkMatrix* o) { delete a; delete b; rows = o->rows; cols = o->cols; k = o->k; if (k != 0) { myAssert(o->a); myAssert(o->b); } a = (o->a ? o->a->copy() : NULL); b = (o->b ? o->b->copy() : NULL); } template void RkMatrix::checkNan() const { if (k == 0) { return; } a->checkNan(); b->checkNan(); } // Templates declaration template class RkMatrix; template class RkMatrix; template class RkMatrix; template class RkMatrix; hmat-oss-1.0.4/src/rk_matrix.hpp000066400000000000000000000237771245574051100165730ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _RK_MATRIX_HPP #define _RK_MATRIX_HPP /* Implementation of Rk-matrices */ #include #include #include "full_matrix.hpp" #include "compression.hpp" template class HMatrix; template class AssemblyFunction; class ClusterData; /** Control the approximation of Rk-matrices. In the case where k != 0, we do an approximation with a fixed rank k, otherwise the approximation is adaptive, it stops when the singular value is less than an error relative to the sum of the singular values in the SVD. */ class RkApproximationControl { public: int k; /// If != 0, fixed-rank approximation double assemblyEpsilon; /// Tolerance for the assembly double recompressionEpsilon; /// Tolerance for the recompressions CompressionMethod method; /** Initialization with impossible values by default */ RkApproximationControl() : k(0), assemblyEpsilon(-1.), recompressionEpsilon(-1.), method(Svd) {} /** Returns the number of singular values to keep. The stop criterion is (assuming that the singular value are in descending order): sigma [k] / SUM (sigma) class RkMatrix { public: const ClusterData *rows; const ClusterData *cols; // A B^t FullMatrix* a; FullMatrix* b; int k; /// Rank CompressionMethod method; /// Method used to compress this RkMatrix public: /// Control of the approximation. See \a RkApproximationControl for more /// details. static RkApproximationControl approx; // Type double precision associated to T typedef typename Types::dp dp_t; public: /** Construction of a RkMatrix . A Rk-matrix is a compressed representation of a matrix of size n*m by a matrix R = AB^t with A a n*k matrix and B is a m*k matrix. The represented matrix R corresponds to a set of indices _rows and _cols. \param _a matrix A \param _rows indices of the rows (of size n) \param _b matrix B (not B^t) \param _cols indices of the columns (of size k) */ RkMatrix(FullMatrix* _a, const ClusterData* _rows, FullMatrix* _b, const ClusterData* _cols, CompressionMethod _method); ~RkMatrix(); /** Swaps members of two RkMatrix instances. Since rows and cols are constant, they cannot be swaped and the other instance must have the same members. \param other other RkMatrix instance. */ void swap(RkMatrix& other); /** Gives a pointer to a RkMatrix representing a subset of indices. The pointer is supposed to be read-only (for efficiency reasons). \param subRows subset of rows \param subCols subset of columns \return pointer to a new matrix with subRows and subCols. */ const RkMatrix* subset(const ClusterData* subRows, const ClusterData* subCols) const; /** Returns the compression ratio (stored_elements, total_elements). */ std::pair compressionRatio(); /** Returns a pointer to a new matrix M = AB^t (uncompressed) */ FullMatrix* eval() const; /** Recompress an RkMatrix in place. @warning The previous rk->a and rk->b are no longer valid after this function. */ void truncate(); /** this <- this + alpha * mat \param alpha \param mat */ void axpy(T alpha, const FullMatrix* mat); /** this <- this + alpha * mat \param alpha \param mat */ void axpy(T alpha, const RkMatrix* mat); /** Formatted addition of two Rk-matrices. The two matrices must be on the same sets of indices in the case otherwise use RkMatrix::formattedAddParts. The formatted addition of R and S is defined by: \code truncate (R + S) \endcode with the addition defined by the juxtaposition of the matrices A and B of each RkMatrix component of the product. \param o The matrix sum \return truncate(*this + m) A new matrix. */ RkMatrix *formattedAdd(const FullMatrix* o) const; /** Addition of a matrix and formatted a RkMatrix. The two matrices must be on the same sets of indices in the case otherwise use RkMatrix::formattedAddParts. The formatted addition of R and S is defined by: \code truncate(R + S) \endcode with the addition defined by the juxtaposition of the matrices A and B of RkMatrix each component of the product. \param o The matrix sum \return truncate(*this + m) A new matrix. */ RkMatrix *formattedAdd(const RkMatrix* o) const; /** Adds a list of RkMatrix to a RkMatrix. In this function, RkMatrix may include some only indices, that is to say be subsets of evidence of this. \param units The list RkMatrix adding. \param n Number of matrices to add \return truncate(*this + parts[0] + parts[1] + ... + parts[n-1]) */ RkMatrix* formattedAddParts(T* alpha, const RkMatrix** parts, int n) const; /** Adds a list of MatrixXd (solid matrices) to RkMatrix. In this function, MatrixXd may cover a portion of index only, that is to say be subsets of indices this. The MatrixXd are converted RkMatrix before the addition, which is with RkMatrix :: formattedAddParts. \param units The list MatrixXd adding. \param rowsList The list of indices rows \param colsList The list of column indices \param n Number of matrices to add \return truncate (*this + parts[0] + parts[1] + ... + parts[n-1]) */ RkMatrix* formattedAddParts(T* alpha, const FullMatrix** parts, const ClusterData** rowsList, const ClusterData** colsList, int n) const; void gemmRk(char transA, char transB, T alpha, const HMatrix* a, const HMatrix* b, T beta); /** Multiplication by a scalar. \param alpha the scalar */ void scale(T alpha); /** Copy RkMatrix into this. */ void copy(RkMatrix* o); /** Compute y <- alpha * op(A) * y + beta * y. The arguments are similar to BLAS GEMV. */ void gemv(char trans, T alpha, const FullMatrix* x, T beta, FullMatrix* y) const; /** Right multiplication of RkMatrix by a matrix. \param transR 'N' or 'T' depending on whether R is transposed or not \param Beam 'N' or 'T' depending on whether M is transposed or not \return R * M */ static RkMatrix* multiplyRkFull(char transR, char transM, const RkMatrix* rk, const FullMatrix* m, const ClusterData* mCols); /** Left multiplication of RkMatrix by a matrix. \param transR 'N' or 'T' depending on whether R is transposed or not \param Beam 'N' or 'T' depending on whether M is transposed or not \return R * M */ static RkMatrix* multiplyFullRk(char transM, char transR, const FullMatrix* m, const RkMatrix* rk, const ClusterData* mRows); /* These functions are added to manage the particular case of the product by H-matrix, which is treated by decomposing the product into the succession of products by a vector, the result being a RkMatrix. */ /** Right multiplying of RkMatrix by HMatrix. The product is made by multiplying the vector by vector. \param R rk \param h H \param transRk 'N' or 'T' depending on whether or not R is transposed \param transh 'N' or 'T' depending on whether or not H is transposed \return R * H */ static RkMatrix* multiplyRkH(char transRk, char transH, const RkMatrix* rk, const HMatrix* h); /** Left multiplying a RkMatrix by HMatrix. The product is made by multiplying vector by vector. \param h H \param R rk \param transR 'N' or 'T' depending on whether or not R is transposed \param transh 'N' or 'T' depending on whether or not H is transposed \return H * R */ static RkMatrix* multiplyHRk(char transH, char transR, const HMatrix* h, const RkMatrix* rk); /** Multiplying a RkMatrix by a RkMatrix The product is made by multiplying vector by vector. \param a \param b \return A * B */ static RkMatrix* multiplyRkRk(char transA, char transB, const RkMatrix* a, const RkMatrix* b); /*! \brief in situ multiplication of the matrix by the diagonal of the matrix given as argument \param d D matrix which we just considered the diagonal \param inverse D or D^{-1} \param left multiplication (left = true) or right (left = false) of this by D */ void multiplyWithDiagOrDiagInv(const HMatrix * d, bool inverse, bool left = false); /*! \brief Triggers an assertion if there are NaNs in the RkMatrix. */ void checkNan() const; }; #endif hmat-oss-1.0.4/src/system_types.h000066400000000000000000000036261245574051100167720ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ #ifndef _SYSTEM_TYPES_H #define _SYSTEM_TYPES_H #include "config.h" #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_TIME_H # include #endif /* See what has to be done for WIN32... */ #if (defined (_WIN64) || defined (_WIN32)) && \ !(defined (__MINGW64__) || defined(__MINGW32__)) # include # include # ifndef HAVE_MODE_T # define HAVE_MODE_T typedef int mode_t; # endif /* Windows defines the equivalent SSIZE_T in the platform SDK */ /* as the signed equivalent of size_t which is defined as long */ /* on WIN32 and long long/__int64 on WIN64 */ # ifdef _WIN64 typedef __int64 off64_t; typedef __int64 ssize_t; # else /* _WIN64 */ # ifdef __ICL /* Intel compiler and LARGE FILES on WIN32 */ typedef __int64 off64_t; # else typedef off_t off64_t; # endif typedef long ssize_t; # endif /* _WIN64 */ /* Intel compiler */ # ifdef __ICL # define lseek _lseeki64 # endif #elif defined(HAVE_UNISTD_H) #include #endif #endif /* _SYSTEM_TYPES_H */ hmat-oss-1.0.4/src/tree.hpp000066400000000000000000000074251245574051100155220ustar00rootroot00000000000000/* HMat-OSS (HMatrix library, open source software) Copyright (C) 2014-2015 Airbus Group SAS This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. http://github.com/jeromerobert/hmat-oss */ /*! \file \ingroup HMatrix \brief Templated Tree class used by ClusterTree and HMatrix. */ #ifndef _TREE_HPP #define _TREE_HPP #include #include #include // size_t /*! \brief Templated tree class. This class represents a tree of arity N, holding an instance of NodeData in its nodes. */ template class Tree { public: /// depth of the current node in the tree int depth; protected: /// NULL for a leaf, pointeur on an array of N sons otherwie. Tree** children; public: /// Pointer to the father, NULL if this node is the root Tree* father; public: Tree(Tree* _father, int _depth = 0) : depth(_depth), children(NULL), father(_father) {} virtual ~Tree() { if (!children) { return; } for (int i = 0; i < N; i++) { if (children[i]) { delete children[i]; children[i] = NULL; } } delete[] children; } /*! \brief Insert a child in the children array. If a child is already present, it is removed but not deleted. \param index index in the children array \param child pointeur to the child */ void insertChild(int index, Tree *child) { if (!children) { children = new Tree*[N]; for (int i = 0; i < N; i++) { children[i] = NULL; } } child->father = this; children[index] = child; child->depth = depth + 1; } /*! \brief Remove a children, an delete it if necessary. */ void removeChild(int index) { delete children[index]; children[index] = NULL; } /*! \brief Return the number of nodes in the tree. */ int nodesCount() const { int result = 1; if (!isLeaf()) { for (int i = 0; i < N; i++) { if (getChild(i)) { result += getChild(i)->nodesCount(); } } } return result; } /*! \brief Return the child of index, or NULL. \warning Will segfault if used on a leaf. */ inline Tree *getChild(int index) const { return children[index]; } /*! \brief Return true if the node is a leaf. */ inline bool isLeaf() const { return !children; } /*! \brief Return a list of nodes. */ virtual std::list*> listNodes() const { std::list*> result; result.push_back(this); if (!isLeaf()) { for (int i = 0; i < N; i++) { Tree* child = getChild(i); if (child) { std::list*> childNodes = child->listNodes(); result.splice(result.end(), childNodes, childNodes.begin(), childNodes.end()); } } } return result; } /*! \brief Return a list of leaves. */ void listAllLeaves(std::vector*>& leaves) const { if (!isLeaf()) { for (int i = 0; i < N; i++) { Tree* child = getChild(i); if (child) { child->listAllLeaves(leaves); } } } else { leaves.push_back(const_cast*>(this)); } } }; #endif // _TREE_HPP